diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 2842704bb..a7625ac3b 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -106,6 +106,7 @@ var ( modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") haEnableLeaderElection = flag.Bool("ha-enable-leader-election", false, "Enables leader election for high availability. When enabled, readiness probes will only pass on the leader.") + tracing = flag.Bool("tracing", true, "Enables emitting traces") setupLog = ctrl.Log.WithName("setup") ) @@ -141,6 +142,13 @@ func (r *Runner) Run(ctx context.Context) error { flag.Parse() initLogging(&opts) + if *tracing { + err := common.InitTracing(ctx, setupLog) + if err != nil { + return err + } + } + setupLog.Info("GIE build", "commit-sha", version.CommitSHA, "build-ref", version.BuildRef) // Validate flags diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 41fee834d..f6354bfee 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -166,30 +166,34 @@ $ helm uninstall pool-1 The following table list the configurable parameters of the chart. -| **Parameter Name** | **Description** | -|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| -| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. | -| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | -| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. | -| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | -| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. | -| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. | -| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. | -| `inferenceExtension.image.tag` | Image tag of the endpoint picker. | -| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | -| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. | -| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. | -| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. | -| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. | -| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. | -| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | | -| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | -| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | -| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | -| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | -| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | -| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | +| **Parameter Name** | **Description** | +|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. | +| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | +| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. | +| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | +| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. | +| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. | +| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. | +| `inferenceExtension.image.tag` | Image tag of the endpoint picker. | +| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | +| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. | +| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. | +| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. | +| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. | +| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. | +| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | +| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | +| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | +| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | +| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | +| `inferenceExtension.tracing.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. | +| `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. | +| `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. | +| `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. | +| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | +| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | ### Provider Specific Configuration @@ -214,6 +218,21 @@ These are the options available to you with `provider.name` set to `istio`: | `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. | | `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy | +#### OpenTelemetry + +The EndpointPicker supports OpenTelemetry-based tracing. To enable trace collection, use the following configuration: +```yaml +inferenceExtension: + tracing: + enabled: true + otelExporterEndpoint: "http://localhost:4317" + sampling: + sampler: "parentbased_traceidratio" + samplerArg: "0.1" +``` +Make sure that the `otelExporterEndpoint` points to your OpenTelemetry collector endpoint. +Current only the `parentbased_traceidratio` sampler is supported. You can adjust the base sampling ratio using the `samplerArg` (e.g., 0.1 means 10% of traces will be sampled). + ## Notes This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/). diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index f012c2e47..55eb16776 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -62,6 +62,12 @@ spec: - "--{{ .name }}" - "{{ .value }}" {{- end }} + - "--tracing" + {{- if .Values.inferenceExtension.tracing.enabled }} + - "true" + {{- else }} + - "false" + {{- end }} ports: - name: grpc containerPort: 9002 @@ -101,6 +107,30 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + {{- if .Values.inferenceExtension.tracing.enabled }} + - name: OTEL_SERVICE_NAME + value: "gateway-api-inference-extension" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | default "http://localhost:4317" | quote }} + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: OTEL_RESOURCE_ATTRIBUTES + value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)' + - name: OTEL_TRACES_SAMPLER + value: {{ .Values.inferenceExtension.tracing.sampling.sampler | default "parentbased_traceidratio" | quote }} + - name: OTEL_TRACES_SAMPLER_ARG + value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | default "0.1" | quote }} + {{- end }} {{- if .Values.inferenceExtension.env }} {{- toYaml .Values.inferenceExtension.env | nindent 8 }} {{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 91d6a48e6..f901f7f0f 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -53,6 +53,12 @@ inferenceExtension: gke: enabled: false + tracing: + enabled: false + otelExporterEndpoint: "http://localhost:4317" + sampling: + sampler: "parentbased_traceidratio" + samplerArg: "0.1" inferencePool: targetPorts: @@ -85,4 +91,4 @@ istio: trafficPolicy: {} # connectionPool: # http: - # maxRequestsPerConnection: 256000 + # maxRequestsPerConnection: 256000 \ No newline at end of file diff --git a/go.mod b/go.mod index 1a60c36ce..d5c0eb2dd 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,10 @@ require ( github.com/prometheus/common v0.66.1 github.com/prometheus/prometheus v0.305.0 github.com/stretchr/testify v1.11.1 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 + go.opentelemetry.io/otel/sdk v1.38.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/sync v0.17.0 @@ -95,12 +99,9 @@ require ( github.com/x448/float16 v0.8.4 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect - go.opentelemetry.io/otel v1.37.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/sdk v1.37.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect go.opentelemetry.io/proto/otlp v1.6.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/automaxprocs v1.6.0 // indirect diff --git a/go.sum b/go.sum index 817b54b73..b5ac9dfce 100644 --- a/go.sum +++ b/go.sum @@ -252,20 +252,22 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 h1:JgtbA0xkWHnTmYk7YusopJFX6uleBmAuZ8n05NEh8nQ= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0/go.mod h1:179AK5aar5R3eS9FucPy6rggvU0g52cvKId8pv4+v0c= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI= go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= diff --git a/pkg/common/telemetry.go b/pkg/common/telemetry.go new file mode 100644 index 000000000..3723e0f7d --- /dev/null +++ b/pkg/common/telemetry.go @@ -0,0 +1,140 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package common + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/go-logr/logr" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.37.0" + + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/version" +) + +type errorHandler struct { + logger logr.Logger +} + +func (h *errorHandler) Handle(err error) { + h.logger.V(logging.DEFAULT).Error(err, "trace error occurred") +} + +func InitTracing(ctx context.Context, logger logr.Logger) error { + logger = logger.WithName("trace") + loggerWrap := &errorHandler{logger: logger} + + _, ok := os.LookupEnv("OTEL_SERVICE_NAME") + if !ok { + os.Setenv("OTEL_SERVICE_NAME", "gateway-api-inference-extension") + } + + _, ok = os.LookupEnv("OTEL_EXPORTER_OTLP_ENDPOINT") + if !ok { + os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317") + } + + traceExporter, err := initTraceExporter(ctx, logger) + if err != nil { + loggerWrap.Handle(fmt.Errorf("%s: %v", "init trace exporter failed", err)) + return err + } + + // Go SDK doesn't have an automatic sampler, handle manually + samplerType, ok := os.LookupEnv("OTEL_TRACES_SAMPLER") + if !ok { + samplerType = "parentbased_traceidratio" + } + samplerARG, ok := os.LookupEnv("OTEL_TRACES_SAMPLER_ARG") + if !ok { + samplerARG = "0.1" + } + + sampler := sdktrace.ParentBased(sdktrace.TraceIDRatioBased(0.1)) + if samplerType == "parentbased_traceidratio" { + fraction, err := strconv.ParseFloat(samplerARG, 64) + if err != nil { + fraction = 0.1 + } + + sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(fraction)) + } else { + loggerWrap.Handle(fmt.Errorf("unsupported sampler type: %s, fallback to parentbased_traceidratio with 0.1 Ratio", samplerType)) + } + + opt := []sdktrace.TracerProviderOption{ + sdktrace.WithBatcher(traceExporter), + sdktrace.WithSampler(sampler), + sdktrace.WithResource(resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceVersionKey.String(version.BuildRef), + )), + } + + tracerProvider := sdktrace.NewTracerProvider(opt...) + otel.SetTracerProvider(tracerProvider) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{})) + otel.SetErrorHandler(loggerWrap) + + go func() { + <-ctx.Done() + err := tracerProvider.Shutdown(context.Background()) + if err != nil { + loggerWrap.Handle(fmt.Errorf("%s: %v", "failed to shutdown TraceProvider", err)) + } + + logger.V(logging.DEFAULT).Info("trace provider shutting down") + }() + + return nil +} + +// initTraceExporter create a SpanExporter +// support exporter type +// - console: export spans in console for development use case +// - otlp: export spans through gRPC to an opentelemetry collector +func initTraceExporter(ctx context.Context, logger logr.Logger) (sdktrace.SpanExporter, error) { + var traceExporter sdktrace.SpanExporter + traceExporter, err := stdouttrace.New(stdouttrace.WithPrettyPrint()) + if err != nil { + return nil, fmt.Errorf("failed to create stdouttrace exporter: %w", err) + } + + exporterType, ok := os.LookupEnv("OTEL_TRACES_EXPORTER") + if !ok { + exporterType = "console" + } + + logger.Info("init OTel trace exporter", "type", exporterType) + if exporterType == "otlp" { + traceExporter, err = otlptracegrpc.New(ctx, otlptracegrpc.WithInsecure()) + if err != nil { + return nil, fmt.Errorf("failed to create otlp-grcp exporter: %w", err) + } + } + + return traceExporter, nil +}