update README.md

Frapschen · Frapschen · commit 81e1af743660 · 2025-09-30T12:35:40.000+08:00
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -166,31 +166,34 @@ $ helm uninstall pool-1
 
 The following table list the configurable parameters of the chart.
 
-| **Parameter Name**                                      | **Description**                                                                                                        |
-|---------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-| `inferencePool.apiVersion`                              | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
-| `inferencePool.targetPortNumber`                        | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
-| `inferencePool.modelServerType`                         | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
-| `inferencePool.modelServers.matchLabels`                | Label selector to match vllm backends managed by the inference pool.                                                   |
-| `inferenceExtension.replicas`                           | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                         |
-| `inferenceExtension.image.name`                         | Name of the container image used for the endpoint picker.                                                              |
-| `inferenceExtension.image.hub`                          | Registry URL where the endpoint picker image is hosted.                                                                |
-| `inferenceExtension.image.tag`                          | Image tag of the endpoint picker.                                                                                      |
-| `inferenceExtension.image.pullPolicy`                   | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
-| `inferenceExtension.env`                                | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.             |
-| `inferenceExtension.extraContainerPorts`                | List of additional container ports to expose. Defaults to `[]`.                                                        |
-| `inferenceExtension.extraServicePorts`                  | List of additional service ports to expose. Defaults to `[]`.                                                          |
-| `inferenceExtension.flags`                              | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list.                                                            |
-| `inferenceExtension.affinity`                           | Affinity for the endpoint picker. Defaults to `{}`.                                                                    |
-| `inferenceExtension.tolerations`                        | Tolerations for the endpoint picker. Defaults to `[]`.                                                                 |   |
-| `inferenceExtension.monitoring.interval`                | Metrics scraping interval for monitoring. Defaults to `10s`.                                                           |
-| `inferenceExtension.monitoring.secret.name`             | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
-| `inferenceExtension.monitoring.prometheus.enabled`      | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                      |
-| `inferenceExtension.monitoring.gke.enabled`             | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
-| `inferenceExtension.pluginsCustomConfig`                | Custom config that is passed to EPP as inline yaml.      |
-| `inferenceExtension.trace.enabled`                      | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.   |
-| `provider.name`                                         | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                   |
-| `provider.gke.autopilot`                                | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
+| **Parameter Name**                                 | **Description**                                                                                                                                                                                                                                                                                                                       |
+|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `inferencePool.apiVersion`                         | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions.                                                                                                                                           |
+| `inferencePool.targetPortNumber`                   | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000.                                                                                                                                                                                                                |
+| `inferencePool.modelServerType`                    | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm.                                                                                                                                                                                                                                |
+| `inferencePool.modelServers.matchLabels`           | Label selector to match vllm backends managed by the inference pool.                                                                                                                                                                                                                                                                  |
+| `inferenceExtension.replicas`                      | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                                                                                                                                                              |
+| `inferenceExtension.image.name`                    | Name of the container image used for the endpoint picker.                                                                                                                                                                                                                                                                             |
+| `inferenceExtension.image.hub`                     | Registry URL where the endpoint picker image is hosted.                                                                                                                                                                                                                                                                               |
+| `inferenceExtension.image.tag`                     | Image tag of the endpoint picker.                                                                                                                                                                                                                                                                                                     |
+| `inferenceExtension.image.pullPolicy`              | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.                                                                                                                                                                                                                     |
+| `inferenceExtension.env`                           | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.                                                                                                                                                                                                                            |
+| `inferenceExtension.extraContainerPorts`           | List of additional container ports to expose. Defaults to `[]`.                                                                                                                                                                                                                                                                       |
+| `inferenceExtension.extraServicePorts`             | List of additional service ports to expose. Defaults to `[]`.                                                                                                                                                                                                                                                                         |
+| `inferenceExtension.flags`                         | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list.                                                                                                                                                                                      |
+| `inferenceExtension.affinity`                      | Affinity for the endpoint picker. Defaults to `{}`.                                                                                                                                                                                                                                                                                   |
+| `inferenceExtension.tolerations`                   | Tolerations for the endpoint picker. Defaults to `[]`.                                                                                                                                                                                                                                                                                |
+| `inferenceExtension.monitoring.interval`           | Metrics scraping interval for monitoring. Defaults to `10s`.                                                                                                                                                                                                                                                                          |
+| `inferenceExtension.monitoring.secret.name`        | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`.                                                                                                                                                                                                        |
+| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                                                                                                                                                                                                                                            |
+| `inferenceExtension.monitoring.gke.enabled`        | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`.                                                                                                                                                                                                                                                      |
+| `inferenceExtension.pluginsCustomConfig`           | Custom config that is passed to EPP as inline yaml.                                                                                                                                                                                                                                                                                   |
+| `inferenceExtension.tracing.enabled`               | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.                                                                                                                                                                                                                                                            |
+| `inferenceExtension.tracing.otelExporterEndpoint`  | OpenTelemetry collector endpoint.                                                                                                                                                                                                                                                                                                     |
+| `inferenceExtension.tracing.sampling.sampler`      | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans.                                                                                                                           |
+| `inferenceExtension.tracing.sampling.samplerArg`   | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling.                                                                                                                       |
+| `provider.name`                                    | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                                                                                                                                                                                                            |
+| `provider.gke.autopilot`                           | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`.                                                                                                                                                                                                          |
 
 ### Provider Specific Configuration
 
@@ -215,10 +218,20 @@ These are the options available to you with `provider.name` set to `istio`:
 | `istio.destinationRule.host`            | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. |
 | `istio.destinationRule.trafficPolicy.connectionPool`            | Configure the connectionPool level settings of the traffic policy |
 
-## OpenTelemetry
+#### OpenTelemetry
 
-The EndpointPicker supports OpenTelemetry-based tracing. To enable it, use `--set inferenceExtension.trace.enabled=true`
-and configure the correct OpenTelemetry collector endpoint via the environment variable `OTEL_EXPORTER_OTLP_ENDPOINT` in `inferenceExtension.env`.
+The EndpointPicker supports OpenTelemetry-based tracing. To enable trace collection, use the following configuration:
+```yaml
+inferenceExtension:
+  tracing:
+    enabled: true
+    otelExporterEndpoint: "http://localhost:4317"
+    sampling:
+      sampler: "parentbased_traceidratio"
+      samplerArg: "0.1"
+```
+Make sure that the `otelExporterEndpoint` points to your OpenTelemetry collector endpoint. 
+Current only the `parentbased_traceidratio` sampler is supported. You can adjust the base sampling ratio using the `samplerArg` (e.g., 0.1 means 10% of traces will be sampled).
 
 ## Notes
 
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -63,7 +63,7 @@ spec:
         - "{{ .value }}"
         {{- end }}
         - "--tracing"
-        {{- if .Values.inferenceExtension.trace.enabled }}
+        {{- if .Values.inferenceExtension.tracing.enabled }}
         - "true"
         {{- else }}
         - "false"
@@ -107,11 +107,11 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
-        {{- if .Values.inferenceExtension.trace.enabled }}
+        {{- if .Values.inferenceExtension.tracing.enabled }}
         - name: OTEL_SERVICE_NAME
           value: "gateway-api-inference-extension"
         - name: OTEL_EXPORTER_OTLP_ENDPOINT
-          value: {{ .Values.inferenceExtension.trace.otelExporterEndpoint | default "http://localhost:4317" | quote }}
+          value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | default "http://localhost:4317" | quote }}
         - name: OTEL_TRACES_EXPORTER
           value: "otlp"
         - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
@@ -127,9 +127,9 @@ spec:
         - name: OTEL_RESOURCE_ATTRIBUTES
           value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
         - name: OTEL_TRACES_SAMPLER
-          value: {{ .Values.inferenceExtension.trace.sampling.sampler | default "parentbased_traceidratio" | quote }}
+          value: {{ .Values.inferenceExtension.tracing.sampling.sampler | default "parentbased_traceidratio" | quote }}
         - name: OTEL_TRACES_SAMPLER_ARG
-          value: {{ .Values.inferenceExtension.trace.sampling.samplerArg | default "0.1" | quote }}
+          value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | default "0.1" | quote }}
         {{- end }}
         {{- if .Values.inferenceExtension.env }}
         {{- toYaml .Values.inferenceExtension.env | nindent 8 }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -53,7 +53,7 @@ inferenceExtension:
     
     gke:
       enabled: false
-  trace:
+  tracing:
     enabled: false
     otelExporterEndpoint: "http://localhost:4317"
     sampling:
diff --git a/pkg/common/telemetry.go b/pkg/common/telemetry.go
@@ -60,7 +60,7 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {
 	traceExporter, err := initTraceExporter(ctx, logger)
 	if err != nil {
 		loggerWrap.Handle(fmt.Errorf("%s: %v", "init trace exporter fail", err))
-		return nil
+		return err
 	}
 
 	// Go SDK doesn't have an automatic sampler, handle manually

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {`
`60`	`60`	`traceExporter, err := initTraceExporter(ctx, logger)`
`61`	`61`	`if err != nil {`
`62`	`62`	`loggerWrap.Handle(fmt.Errorf("%s: %v", "init trace exporter fail", err))`
`63`		`- return nil`
	`63`	`+ return err`
`64`	`64`	`}`
`65`	`65`
`66`	`66`	`// Go SDK doesn't have an automatic sampler, handle manually`