apply review's suggestion

Frapschen · Frapschen · commit 10dfec1de415 · 2025-09-24T17:06:42.000+08:00
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -166,30 +166,31 @@ $ helm uninstall pool-1
 
 The following table list the configurable parameters of the chart.
 
-| **Parameter Name**                          | **Description**                                                                                                        |
-|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-| `inferencePool.apiVersion`                  | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
-| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
-| `inferencePool.modelServerType`            | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
-| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                                   |
-| `inferenceExtension.replicas`               | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                         |
-| `inferenceExtension.image.name`             | Name of the container image used for the endpoint picker.                                                              |
-| `inferenceExtension.image.hub`              | Registry URL where the endpoint picker image is hosted.                                                                |
-| `inferenceExtension.image.tag`              | Image tag of the endpoint picker.                                                                                      |
-| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
-| `inferenceExtension.env`                    | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.             |
-| `inferenceExtension.extraContainerPorts`    | List of additional container ports to expose. Defaults to `[]`.                                                        |
-| `inferenceExtension.extraServicePorts`      | List of additional service ports to expose. Defaults to `[]`.                                                          |
-| `inferenceExtension.flags`                  | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list.                                                            |
-| `inferenceExtension.affinity`               | Affinity for the endpoint picker. Defaults to `{}`.                                                                    |
-| `inferenceExtension.tolerations`            | Tolerations for the endpoint picker. Defaults to `[]`.                                                                 |   |
-| `inferenceExtension.monitoring.interval`   | Metrics scraping interval for monitoring. Defaults to `10s`.                                                           |
-| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
-| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                      |
-| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
-| `inferenceExtension.pluginsCustomConfig`    | Custom config that is passed to EPP as inline yaml.      |
-| `provider.name`                             | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                   |
-| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
+| **Parameter Name**                                      | **Description**                                                                                                        |
+|---------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
+| `inferencePool.apiVersion`                              | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
+| `inferencePool.targetPortNumber`                        | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
+| `inferencePool.modelServerType`                         | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
+| `inferencePool.modelServers.matchLabels`                | Label selector to match vllm backends managed by the inference pool.                                                   |
+| `inferenceExtension.replicas`                           | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                         |
+| `inferenceExtension.image.name`                         | Name of the container image used for the endpoint picker.                                                              |
+| `inferenceExtension.image.hub`                          | Registry URL where the endpoint picker image is hosted.                                                                |
+| `inferenceExtension.image.tag`                          | Image tag of the endpoint picker.                                                                                      |
+| `inferenceExtension.image.pullPolicy`                   | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
+| `inferenceExtension.env`                                | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.             |
+| `inferenceExtension.extraContainerPorts`                | List of additional container ports to expose. Defaults to `[]`.                                                        |
+| `inferenceExtension.extraServicePorts`                  | List of additional service ports to expose. Defaults to `[]`.                                                          |
+| `inferenceExtension.flags`                              | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list.                                                            |
+| `inferenceExtension.affinity`                           | Affinity for the endpoint picker. Defaults to `{}`.                                                                    |
+| `inferenceExtension.tolerations`                        | Tolerations for the endpoint picker. Defaults to `[]`.                                                                 |   |
+| `inferenceExtension.monitoring.interval`                | Metrics scraping interval for monitoring. Defaults to `10s`.                                                           |
+| `inferenceExtension.monitoring.secret.name`             | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
+| `inferenceExtension.monitoring.prometheus.enabled`      | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                      |
+| `inferenceExtension.monitoring.gke.enabled`             | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
+| `inferenceExtension.pluginsCustomConfig`                | Custom config that is passed to EPP as inline yaml.      |
+| `inferenceExtension.trace.enabled`                      | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.   |
+| `provider.name`                                         | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                   |
+| `provider.gke.autopilot`                                | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
 
 ### Provider Specific Configuration
 
@@ -214,17 +215,10 @@ These are the options available to you with `provider.name` set to `istio`:
 | `istio.destinationRule.host`            | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. |
 | `istio.destinationRule.trafficPolicy.connectionPool`            | Configure the connectionPool level settings of the traffic policy |
 
-### Opentelemetry
-
-he following table list the configurable parameters of opentelemetry trace.
-
-
-| **Parameter Name**             | **Description**                                                              |
-|--------------------------------|------------------------------------------------------------------------------|
-| `opentelemetry.enabled`        | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.   |
-| `opentelemetry.autoENVInject.CRInstanceName` | Controls the behavior of opentelemetry-operator auto-instrument.            |
-| `opentelemetry.env`        | A list of environment variables to manually configure the OpenTelemetry SDK. |
+## OpenTelemetry
 
+The EndpointPicker supports OpenTelemetry-based tracing. To enable it, use `--set inferenceExtension.trace.enabled=true`
+and configure the correct OpenTelemetry collector endpoint via the environment variable `OTEL_EXPORTER_OTLP_ENDPOINT` in `inferenceExtension.env`.
 
 ## Notes
 
diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
@@ -31,3 +31,22 @@ Selector labels
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
 inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
+
+
+{{/*
+Generate environment variable list for inference extension
+Exclude OTEL_ prefixed environment variables when tracing is not enabled
+*/}}
+{{- define "inferenceExtension.envs" -}}
+{{- range .Values.inferenceExtension.env }}
+{{- if and (not $.Values.inferenceExtension.trace.enabled) (hasPrefix "OTEL_" .name) }}
+{{- else }}
+- name: {{ .name }}
+  {{- if .value }}
+  value: "{{ .value }}"
+  {{- else if .valueFrom }}
+  valueFrom: {{ .valueFrom | toYaml | nindent 4 }}
+  {{- end }}
+{{- end }}
+{{- end }}
+{{- end -}}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -22,10 +22,6 @@ spec:
     metadata:
       labels:
         {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
-      annotations:
-        {{- if and .Values.opentelemetry.enabled }}
-        instrumentation.opentelemetry.io/inject-sdk: {{ .Values.opentelemetry.autoENVInject.CRInstanceName | quote }}
-        {{- end }}
     spec:
       serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
@@ -67,7 +63,11 @@ spec:
         - "{{ .value }}"
         {{- end }}
         - "--tracing"
+        {{- if .Values.inferenceExtension.trace.enabled }}
+        - "true"
+        {{- else }}
         - "false"
+        {{- end }}
         ports:
         - name: grpc
           containerPort: 9002
@@ -107,12 +107,7 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
-        {{- if .Values.inferenceExtension.env }}
-        {{- toYaml .Values.inferenceExtension.env | nindent 8 }}
-        {{- end }}
-        {{- if and .Values.opentelemetry.enabled .Values.opentelemetry.env }}
-        {{- toYaml .Values.opentelemetry.env | nindent 8 }}
-        {{- end }}
+        {{- include "inferenceExtension.envs" . | nindent 8 }}
         volumeMounts:
         - name: plugins-config-volume
           mountPath: "/config"
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -6,7 +6,29 @@ inferenceExtension:
     tag: main
     pullPolicy: Always
   extProcPort: 9002
-  env: []
+  env:
+    # The default OTEL_* environments is used to config the behaviour of OTel SDK
+    # If you also enabled trace.autoENVInject setting, the auto env inject will be skipped by opentelemetry-operator,
+    - name: OTEL_EXPORTER_OTLP_ENDPOINT
+      value: "http://localhost:4317"
+    - name: OTEL_SERVICE_NAME
+      value: "gateway-api-inference-extension"
+    - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
+      valueFrom:
+        fieldRef:
+          apiVersion: v1
+          fieldPath: spec.nodeName
+    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
+      valueFrom:
+        fieldRef:
+          apiVersion: v1
+          fieldPath: metadata.name
+    - name: OTEL_RESOURCE_ATTRIBUTES
+      value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
+    - name: OTEL_TRACES_SAMPLER
+      value: "parentbased_traceidratio"
+    - name: OTEL_TRACES_SAMPLER_ARG
+      value: "0.1"
   pluginsConfigFile: "default-plugins.yaml"
   # Define additional container ports
   extraContainerPorts: []
@@ -53,6 +75,8 @@ inferenceExtension:
     
     gke:
       enabled: false
+  trace:
+    enabled: false
 
 inferencePool:
   targetPorts:
@@ -85,35 +109,4 @@ istio:
     trafficPolicy: {}
       # connectionPool:
       #   http:
-      #     maxRequestsPerConnection: 256000
-
-opentelemetry:
-  enabled: true
-  # With this setting you can send trace to the exist opentelemetry collector based on opentelemetry-operator
-  # See https://github.com/open-telemetry/opentelemetry-operator?tab=readme-ov-file#opentelemetry-auto-instrumentation-injection
-  autoENVInject:
-    # The possible values for the annotation can be
-    # "true" - inject and Instrumentation resource from the namespace.
-    # "my-instrumentation" - name of Instrumentation CR instance in the current namespace.
-    # "my-other-namespace/my-instrumentation" - name and namespace of Instrumentation CR instance in
-    # "false" - do not inject
-    CRInstanceName: "false"
-  # Add the required OTel environment manually
-  # If you also enabled autoENVInject setting, the auto env inject will be skipped by opentelemetry-operator,
-  env:
-    - name: OTEL_EXPORTER_OTLP_ENDPOINT
-      value: "http://localhost:4317"
-    - name: OTEL_SERVICE_NAME
-      value: "gateway-api-inference-extension"
-    - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
-      valueFrom:
-        fieldRef:
-          apiVersion: v1
-          fieldPath: spec.nodeName
-    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
-      valueFrom:
-        fieldRef:
-          apiVersion: v1
-          fieldPath: metadata.name
-    - name: OTEL_RESOURCE_ATTRIBUTES
-      value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
+      #     maxRequestsPerConnection: 256000
diff --git a/pkg/common/traces.go b/pkg/common/traces.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"strconv"
 
 	"github.com/go-logr/logr"
 	"go.opentelemetry.io/otel"
@@ -40,6 +41,27 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {
 		os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", collectorAddr)
 	}
 
+	// Go SDK doesn't have an automatic sampler, handle manually
+	samplerType, ok := os.LookupEnv("OTEL_TRACES_SAMPLER")
+	if !ok {
+		samplerType = "parentbased_traceidratio"
+	}
+	samplerARG, ok := os.LookupEnv("OTEL_TRACES_SAMPLER_ARG")
+	if !ok {
+		samplerARG = "0.1"
+	}
+
+	sampler := sdktrace.ParentBased(sdktrace.TraceIDRatioBased(0.1))
+	if samplerType == "parentbased_traceidratio" {
+		fraction, err := strconv.ParseFloat(samplerARG, 64)
+		if err != nil {
+			fraction = 0.1
+		}
+		sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(fraction))
+	} else {
+		loggerWrap.Handle(fmt.Errorf("un supported sampler type: %s, fallback to parentbased_traceidratio with 0.1 Ratio", samplerType))
+	}
+
 	traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithInsecure())
 	if err != nil {
 		loggerWrap.Handle(fmt.Errorf("%s: %v", "new OTel trace gRPC exporter fail", err))
@@ -49,7 +71,7 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {
 	logger.Info(fmt.Sprintf("OTel trace exporter connect to: %s with service name: %s", collectorAddr, serviceName))
 	opt := []sdktrace.TracerProviderOption{
 		sdktrace.WithBatcher(traceExporter),
-		sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.AlwaysSample())),
+		sdktrace.WithSampler(sampler),
 		sdktrace.WithResource(resource.NewWithAttributes(
 			semconv.SchemaURL,
 			semconv.ServiceVersionKey.String(version.BuildRef),