Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion charts/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ data:

otel:
enabled: {{ .Values.config.logging.otel.enabled }}
sampling_rate: {{ .Values.config.logging.otel.sampling_rate }}

masking:
enabled: {{ .Values.config.logging.masking.enabled }}
Expand Down
6 changes: 4 additions & 2 deletions charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ config:
format: json
output: stdout

# OpenTelemetry tracing (HyperFleet standard)
# Configuration via standard environment variables.
# See: https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/standards/tracing.md#configuration
otel:
enabled: false
sampling_rate: 1.0
enabled: true

masking:
enabled: true
Expand Down
42 changes: 37 additions & 5 deletions cmd/hyperfleet-api/servecmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"log/slog"
"os"
"os/signal"
"strconv"
"syscall"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -69,14 +70,45 @@ func runServe(cmd *cobra.Command, args []string) {
logger.Info(ctx, config.DumpConfig(environments.Environment().Config))

var tp *trace.TracerProvider
if environments.Environment().Config.Logging.OTel.Enabled {
samplingRate := environments.Environment().Config.Logging.OTel.SamplingRate
traceProvider, err := telemetry.InitTraceProvider(ctx, "hyperfleet-api", api.Version, samplingRate)

// Check for deprecated HYPERFLEET_LOGGING_OTEL_ENABLED variable
if deprecatedEnv := os.Getenv("HYPERFLEET_LOGGING_OTEL_ENABLED"); deprecatedEnv != "" {
logger.With(ctx,
"deprecated_variable", "HYPERFLEET_LOGGING_OTEL_ENABLED",
"replacement", "TRACING_ENABLED",
).Warn("HYPERFLEET_LOGGING_OTEL_ENABLED is deprecated and ignored. Please use TRACING_ENABLED instead.")
}

// Determine if tracing is enabled using TRACING_ENABLED (tracing standard)
var tracingEnabled bool
if tracingEnv := os.Getenv("TRACING_ENABLED"); tracingEnv != "" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we align on using a single variable for enabling tracing?

I see the standard says TRACING_ENABLED and this may come from before standarising environment variables with prefix HYPERFLEET_

Then, the docs mention HYPERFLEET_LOGGING_OTEL_ENABLED

I think we should consolidate in a single one and keep it consistent across repositories.

AFAIK OTEL_* variables do not have the HYPERFLEET_ prefix since they are used directly by the OTEL libraries, but any other variable that we introduce should be prefixed by our standard convention.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it! I’ll remove HYPERFLEET_LOGGING_OTEL_ENABLED then. I’ll keep the OTEL_ variables* for consistency with hyperfleet-sentinel. Does that make sense?

Copy link
Contributor

@rh-amarin rh-amarin Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also to clarify, I meant to use HYPERFLEET_TRACING_ENABLED instead of simple TRACING_ENABLED

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HYPERFLEET_TRACING_ENABLED is not compliant with the tracing standards — we should use TRACING_ENABLED instead. See: tracing standards

if enabled, err := strconv.ParseBool(tracingEnv); err == nil {
tracingEnabled = enabled
} else {
logger.With(ctx,
logger.FieldTracingEnabled, tracingEnv,
"falling_back_to", environments.Environment().Config.Logging.OTel.Enabled).
WithError(err).Warn("Invalid TRACING_ENABLED value, falling back to config")
tracingEnabled = environments.Environment().Config.Logging.OTel.Enabled
}
} else {
// Use config default if TRACING_ENABLED not set
tracingEnabled = environments.Environment().Config.Logging.OTel.Enabled
}

if tracingEnabled {
// OpenTelemetry configuration is driven entirely by standard environment variables:
serviceName := "hyperfleet-api"
if svcName := os.Getenv("OTEL_SERVICE_NAME"); svcName != "" {
serviceName = svcName
}

traceProvider, err := telemetry.InitTraceProvider(ctx, serviceName, api.Version)
Comment on lines +101 to +106
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both sentinel and adapter have a name field
I wonder if we should add it to the API to avoid having this constant "hyperfleet-api" here

Then, here we are reading a variable that is otel specific.... I wonder if that should be done inside the telemetry.InitTraceProvider function, so main can become completely unaware of this setting

if err != nil {
logger.WithError(ctx, err).Warn("Failed to initialize OpenTelemetry")
} else {
tp = traceProvider
logger.With(ctx, logger.FieldSamplingRate, samplingRate).Info("OpenTelemetry initialized")
logger.With(ctx, logger.FieldServiceName, serviceName).Info("OpenTelemetry initialized")
}
} else {
logger.With(ctx, logger.FieldOTelEnabled, false).Info("OpenTelemetry disabled")
Expand All @@ -89,7 +121,7 @@ func runServe(cmd *cobra.Command, args []string) {
"masking_enabled", environments.Environment().Config.Logging.Masking.Enabled,
).Info("Logger initialized")

apiServer := server.NewAPIServer()
apiServer := server.NewAPIServer(tracingEnabled)
go apiServer.Start()

metricsServer := server.NewMetricsServer()
Expand Down
4 changes: 2 additions & 2 deletions cmd/hyperfleet-api/server/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ func env() *environments.Env {
return environments.Environment()
}

func NewAPIServer() Server {
func NewAPIServer(tracingEnabled bool) Server {
s := &apiServer{}

mainRouter := s.routes()
mainRouter := s.routes(tracingEnabled)

// referring to the router as type http.Handler allows us to add middleware via more handlers
var mainHandler http.Handler = mainRouter
Expand Down
4 changes: 2 additions & 2 deletions cmd/hyperfleet-api/server/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func LoadDiscoveredRoutes(
}
}

func (s *apiServer) routes() *mux.Router {
func (s *apiServer) routes(tracingEnabled bool) *mux.Router {
services := &env().Services

metadataHandler := handlers.NewMetadataHandler()
Expand Down Expand Up @@ -79,7 +79,7 @@ func (s *apiServer) routes() *mux.Router {

// OpenTelemetry middleware (conditionally enabled)
// Extracts trace_id/span_id from traceparent header and adds to logger context
if env().Config.Logging.OTel.Enabled {
if tracingEnabled {
mainRouter.Use(middleware.OTelMiddleware)
}

Expand Down
46 changes: 39 additions & 7 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ export HYPERFLEET_DATABASE_PASSWORD=secret-password
# Result: Uses "secret-password" (env var wins)
```

**Special Case - OpenTelemetry Tracing:**

`TRACING_ENABLED` (Tracing standard) has special precedence for cross-component consistency:

```text
TRACING_ENABLED > config (env/flags) > default
```

See [OpenTelemetry Configuration](#opentelemetry-configuration) for details.

---

## Configuration File Locations
Expand Down Expand Up @@ -180,8 +190,7 @@ Logging behavior and output settings.
| `logging.level` | string | `info` | Log level: `debug`, `info`, `warn`, `error` |
| `logging.format` | string | `json` | Log format: `json`, `text` |
| `logging.output` | string | `stdout` | Log output: `stdout`, `stderr` |
| `logging.otel.enabled` | bool | `false` | Enable OpenTelemetry tracing |
| `logging.otel.sampling_rate` | float | `1.0` | OTEL sampling rate (0.0-1.0) |
| `logging.otel.enabled` | bool | `true` | Enable OpenTelemetry tracing (see [OpenTelemetry Configuration](#opentelemetry-configuration)) |
| `logging.masking.enabled` | bool | `true` | Enable sensitive data masking in logs |

**Example:**
Expand All @@ -200,6 +209,31 @@ logging:
- token
```

### OpenTelemetry Configuration

OpenTelemetry tracing is configured via standard environment variables following the [HyperFleet Tracing Standard](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/standards/tracing.md).

**Enabling Tracing:**

| Property | Environment Variable | Type | Default | Description |
|----------|---------------------|------|---------|-------------|
| `logging.otel.enabled` | `TRACING_ENABLED` | bool | `true` | Enable OpenTelemetry tracing (HyperFleet standard) |

**Standard OpenTelemetry Environment Variables:**

Once enabled, tracing is configured using standard OpenTelemetry variables:

| Variable | Description | Default |
|----------|-------------|---------|
| `OTEL_SERVICE_NAME` | Service name in traces | `hyperfleet-api` |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP collector endpoint | stdout exporter |
| `OTEL_EXPORTER_OTLP_PROTOCOL` | Export protocol (`grpc` or `http/protobuf`) | `grpc` |
| `OTEL_TRACES_SAMPLER` | Sampler type | `parentbased_traceidratio` |
| `OTEL_TRACES_SAMPLER_ARG` | Sampling rate (0.0-1.0) | `1.0` |
| `OTEL_RESOURCE_ATTRIBUTES` | Additional resource attributes (k=v,k2=v2) | - |

**See:** [Logging Documentation](logging.md#opentelemetry-integration) for tracing configuration details and [Tracing Standard](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/standards/tracing.md#configuration) for complete reference.

---

## Advanced Configuration
Expand Down Expand Up @@ -360,8 +394,7 @@ Complete table of all configuration properties, their environment variables, and
| `logging.level` | `HYPERFLEET_LOGGING_LEVEL` | string | `info` |
| `logging.format` | `HYPERFLEET_LOGGING_FORMAT` | string | `json` |
| `logging.output` | `HYPERFLEET_LOGGING_OUTPUT` | string | `stdout` |
| `logging.otel.enabled` | `HYPERFLEET_LOGGING_OTEL_ENABLED` | bool | `false` |
| `logging.otel.sampling_rate` | `HYPERFLEET_LOGGING_OTEL_SAMPLING_RATE` | float | `1.0` |
| `logging.otel.enabled` | `TRACING_ENABLED` | bool | `true` |
| `logging.masking.enabled` | `HYPERFLEET_LOGGING_MASKING_ENABLED` | bool | `true` |
| `logging.masking.headers` | `HYPERFLEET_LOGGING_MASKING_HEADERS` | csv | `Authorization,Cookie` |
| `logging.masking.fields` | `HYPERFLEET_LOGGING_MASKING_FIELDS` | csv | `password,token` |
Expand Down Expand Up @@ -422,8 +455,6 @@ All CLI flags and their corresponding configuration paths.
| `--log-level`, `-l` | `logging.level` | string |
| `--log-format` | `logging.format` | string |
| `--log-output` | `logging.output` | string |
| `--log-otel-enabled` | `logging.otel.enabled` | bool |
| `--log-otel-sampling-rate` | `logging.otel.sampling_rate` | float |
| **OCM** | | |
| `--ocm-base-url` | `ocm.base_url` | string |
| `--ocm-client-id` | `ocm.client_id` | string |
Expand Down Expand Up @@ -507,7 +538,6 @@ The application performs comprehensive validation at startup.
**Logging**:
- `logging.level`: must be `debug`, `info`, `warn`, or `error`
- `logging.format`: must be `json` or `text`
- `logging.otel.sampling_rate`: 0.0-1.0

**Adapters**:
- `adapters.required.cluster`: must be array of strings
Expand Down Expand Up @@ -611,3 +641,5 @@ Before deploying to production, verify:
- ✅ CLI flags (--kebab-case)
- ✅ Configuration files (YAML snake_case)
- ✅ Default values
- ✅ OpenTelemetry tracing variables (TRACING_ENABLED, OTEL_*) if tracing is enabled

81 changes: 56 additions & 25 deletions docs/logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,36 @@ export HYPERFLEET_LOGGING_LEVEL=debug
# Structured JSON format with info level
export HYPERFLEET_LOGGING_FORMAT=json
export HYPERFLEET_LOGGING_LEVEL=info
export HYPERFLEET_LOGGING_OTEL_ENABLED=true
export HYPERFLEET_LOGGING_OTEL_SAMPLING_RATE=0.1

# OpenTelemetry tracing (Tracing standard)
export TRACING_ENABLED=true
export OTEL_TRACES_SAMPLER=parentbased_traceidratio
export OTEL_TRACES_SAMPLER_ARG=0.1
export OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
```

**For complete configuration reference**, including all logging settings (levels, formats, OpenTelemetry, masking), see:
- **[Configuration Guide](config.md)** - All logging environment variables and defaults

### OpenTelemetry Environment Variables

HyperFleet uses standard OpenTelemetry environment variables for tracing configuration:

| Variable | Description | Default | Example |
|----------|-------------|---------|---------|
| `TRACING_ENABLED` | Enable/disable tracing (Tracing standard, overrides config) | - | `true`, `false` |
| `OTEL_SERVICE_NAME` | Service name in traces | `hyperfleet-api` | `hyperfleet-api-prod` |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP collector endpoint (if not set, uses stdout) | - | `http://otel-collector:4317` |
| `OTEL_EXPORTER_OTLP_PROTOCOL` | OTLP protocol | `grpc` | `grpc`, `http/protobuf` |
| `OTEL_TRACES_SAMPLER` | Sampler type | `parentbased_traceidratio` | `always_on`, `traceidratio` |
| `OTEL_TRACES_SAMPLER_ARG` | Sampling rate (0.0-1.0) | `1.0` | `0.1` (10%) |
| `OTEL_RESOURCE_ATTRIBUTES` | Additional resource attributes | - | `env=prod,region=us-east` |

**Variable Precedence (highest to lowest):**
1. `TRACING_ENABLED` - Tracing standard (env var)
2. `config.yaml: logging.otel.enabled` - Config file
3. Default (`true`)

## Usage

### Basic Logging
Expand Down Expand Up @@ -346,24 +369,16 @@ logger.With(ctx, "host", "postgres.svc").WithError(err).Error("Failed to connect

### Initialization

OpenTelemetry is initialized in `cmd/hyperfleet-api/servecmd/cmd.go`:
OpenTelemetry is initialized in `cmd/hyperfleet-api/servecmd/cmd.go` (see `runServe()` function, lines ~74-110).

```go
if environments.Environment().Config.Logging.OTel.Enabled {
samplingRate := environments.Environment().Config.Logging.OTel.SamplingRate
tp, err := telemetry.InitTraceProvider(ctx, "hyperfleet-api", api.Version, samplingRate)
if err != nil {
logger.WithError(ctx, err).Warn("Failed to initialize OpenTelemetry")
} else {
defer func() {
if err := tp.Shutdown(context.Background()); err != nil {
logger.WithError(ctx, err).Error("Error shutting down tracer provider")
}
}()
logger.With(ctx, logger.FieldSamplingRate, samplingRate).Info("OpenTelemetry initialized")
}
}
```
**Key behavior:**
- Checks `TRACING_ENABLED` environment variable first (tracing standard)
- Falls back to config file setting if not set
- Uses `OTEL_SERVICE_NAME` if set, otherwise defaults to `"hyperfleet-api"`
- Initializes trace provider via `telemetry.InitTraceProvider(ctx, serviceName, api.Version)`
- Shuts down with timeout during graceful shutdown

See the actual implementation for complete error handling and shutdown logic.

### Trace Propagation

Expand All @@ -375,10 +390,26 @@ The OTel middleware automatically:

### Sampling

Configure sampling rate to control trace volume:
Configure sampling using standard OpenTelemetry environment variables:

```bash
# Sampler type (default: parentbased_traceidratio)
export OTEL_TRACES_SAMPLER=parentbased_traceidratio

# Sampling rate: 0.0-1.0 (default: 1.0)
export OTEL_TRACES_SAMPLER_ARG=0.1 # 10% of requests traced
```

**Sampling rate examples:**
- `0.0`: No traces (disabled)
- `0.1`: 10% of requests traced
- `1.0`: All requests traced (use in development only)
- `0.1`: 10% of requests traced (recommended for production)
- `1.0`: All requests traced (development only)

**Sampler types:**
- `always_on`: Sample all requests
- `always_off`: Sample no requests
- `traceidratio`: Sample based on trace ID ratio (use with OTEL_TRACES_SAMPLER_ARG)
- `parentbased_traceidratio`: Respect parent decision, otherwise use trace ID ratio (default)

## Data Masking

Expand Down Expand Up @@ -443,9 +474,9 @@ mainRouter.Use(logging.RequestLoggingMiddleware)

### Missing trace_id/span_id

1. Check OTel is enabled: `export HYPERFLEET_LOGGING_OTEL_ENABLED=true`
1. Check tracing is enabled: `export TRACING_ENABLED=true`
2. Verify middleware order: `OTelMiddleware` must be after `RequestIDMiddleware`
3. Check sampling rate: `export HYPERFLEET_LOGGING_OTEL_SAMPLING_RATE=1.0` (for testing)
3. Check sampling rate: `export OTEL_TRACES_SAMPLER_ARG=1.0` (for testing - trace all requests)

### Data Not Masked

Expand Down Expand Up @@ -502,7 +533,7 @@ func TestLogging(t *testing.T) {
HYPERFLEET_LOGGING_LEVEL=debug OCM_ENV=integration_testing go test ./test/integration/...

# Run tests without OTel
HYPERFLEET_LOGGING_OTEL_ENABLED=false OCM_ENV=integration_testing go test ./...
TRACING_ENABLED=false OCM_ENV=integration_testing go test ./...
```

## References
Expand Down
32 changes: 19 additions & 13 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ require (
github.com/testcontainers/testcontainers-go/modules/postgres v0.33.0
github.com/yaacov/tree-search-language v0.0.0-20190923184055-1c2dad2e354b
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0
go.opentelemetry.io/otel v1.38.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/trace v1.38.0
go.opentelemetry.io/otel v1.40.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.40.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.40.0
go.opentelemetry.io/otel/sdk v1.40.0
go.opentelemetry.io/otel/trace v1.40.0
go.uber.org/mock v0.6.0
gopkg.in/resty.v1 v1.12.0
gorm.io/datatypes v1.2.7
Expand All @@ -53,6 +55,7 @@ require (
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
Expand Down Expand Up @@ -80,6 +83,7 @@ require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/gorilla/css v1.0.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
Expand Down Expand Up @@ -133,17 +137,19 @@ require (
github.com/woodsbury/decimal128 v1.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
go.opentelemetry.io/otel/metric v1.38.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 // indirect
go.opentelemetry.io/otel/metric v1.40.0 // indirect
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/crypto v0.46.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/crypto v0.47.0 // indirect
golang.org/x/net v0.49.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.39.0 // indirect
golang.org/x/text v0.32.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
google.golang.org/protobuf v1.36.10 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/text v0.33.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect
google.golang.org/grpc v1.78.0 // indirect
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gorm.io/driver/mysql v1.5.6 // indirect
)
Loading