Skip to content

Commit 10dfec1

Browse files
committed
apply review's suggestion
1 parent ee8cbcc commit 10dfec1

File tree

5 files changed

+101
-78
lines changed

5 files changed

+101
-78
lines changed

config/charts/inferencepool/README.md

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -166,30 +166,31 @@ $ helm uninstall pool-1
166166

167167
The following table list the configurable parameters of the chart.
168168

169-
| **Parameter Name** | **Description** |
170-
|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
171-
| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
172-
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
173-
| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
174-
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
175-
| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. |
176-
| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. |
177-
| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. |
178-
| `inferenceExtension.image.tag` | Image tag of the endpoint picker. |
179-
| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
180-
| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. |
181-
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
182-
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
183-
| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. |
184-
| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. |
185-
| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | |
186-
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
187-
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
188-
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
189-
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
190-
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
191-
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
192-
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
169+
| **Parameter Name** | **Description** |
170+
|---------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
171+
| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. |
172+
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
173+
| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
174+
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
175+
| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. |
176+
| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. |
177+
| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. |
178+
| `inferenceExtension.image.tag` | Image tag of the endpoint picker. |
179+
| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
180+
| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. |
181+
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
182+
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
183+
| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. |
184+
| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. |
185+
| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | |
186+
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
187+
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
188+
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
189+
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
190+
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
191+
| `inferenceExtension.trace.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. |
192+
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
193+
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
193194

194195
### Provider Specific Configuration
195196

@@ -214,17 +215,10 @@ These are the options available to you with `provider.name` set to `istio`:
214215
| `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. |
215216
| `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy |
216217

217-
### Opentelemetry
218-
219-
he following table list the configurable parameters of opentelemetry trace.
220-
221-
222-
| **Parameter Name** | **Description** |
223-
|--------------------------------|------------------------------------------------------------------------------|
224-
| `opentelemetry.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. |
225-
| `opentelemetry.autoENVInject.CRInstanceName` | Controls the behavior of opentelemetry-operator auto-instrument. |
226-
| `opentelemetry.env` | A list of environment variables to manually configure the OpenTelemetry SDK. |
218+
## OpenTelemetry
227219

220+
The EndpointPicker supports OpenTelemetry-based tracing. To enable it, use `--set inferenceExtension.trace.enabled=true`
221+
and configure the correct OpenTelemetry collector endpoint via the environment variable `OTEL_EXPORTER_OTLP_ENDPOINT` in `inferenceExtension.env`.
228222

229223
## Notes
230224

config/charts/inferencepool/templates/_helpers.tpl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,22 @@ Selector labels
3131
{{- define "gateway-api-inference-extension.selectorLabels" -}}
3232
inferencepool: {{ include "gateway-api-inference-extension.name" . }}
3333
{{- end -}}
34+
35+
36+
{{/*
37+
Generate environment variable list for inference extension
38+
Exclude OTEL_ prefixed environment variables when tracing is not enabled
39+
*/}}
40+
{{- define "inferenceExtension.envs" -}}
41+
{{- range .Values.inferenceExtension.env }}
42+
{{- if and (not $.Values.inferenceExtension.trace.enabled) (hasPrefix "OTEL_" .name) }}
43+
{{- else }}
44+
- name: {{ .name }}
45+
{{- if .value }}
46+
value: "{{ .value }}"
47+
{{- else if .valueFrom }}
48+
valueFrom: {{ .valueFrom | toYaml | nindent 4 }}
49+
{{- end }}
50+
{{- end }}
51+
{{- end }}
52+
{{- end -}}

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ spec:
2222
metadata:
2323
labels:
2424
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
25-
annotations:
26-
{{- if and .Values.opentelemetry.enabled }}
27-
instrumentation.opentelemetry.io/inject-sdk: {{ .Values.opentelemetry.autoENVInject.CRInstanceName | quote }}
28-
{{- end }}
2925
spec:
3026
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
3127
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
@@ -67,7 +63,11 @@ spec:
6763
- "{{ .value }}"
6864
{{- end }}
6965
- "--tracing"
66+
{{- if .Values.inferenceExtension.trace.enabled }}
67+
- "true"
68+
{{- else }}
7069
- "false"
70+
{{- end }}
7171
ports:
7272
- name: grpc
7373
containerPort: 9002
@@ -107,12 +107,7 @@ spec:
107107
valueFrom:
108108
fieldRef:
109109
fieldPath: metadata.namespace
110-
{{- if .Values.inferenceExtension.env }}
111-
{{- toYaml .Values.inferenceExtension.env | nindent 8 }}
112-
{{- end }}
113-
{{- if and .Values.opentelemetry.enabled .Values.opentelemetry.env }}
114-
{{- toYaml .Values.opentelemetry.env | nindent 8 }}
115-
{{- end }}
110+
{{- include "inferenceExtension.envs" . | nindent 8 }}
116111
volumeMounts:
117112
- name: plugins-config-volume
118113
mountPath: "/config"

config/charts/inferencepool/values.yaml

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,29 @@ inferenceExtension:
66
tag: main
77
pullPolicy: Always
88
extProcPort: 9002
9-
env: []
9+
env:
10+
# The default OTEL_* environments is used to config the behaviour of OTel SDK
11+
# If you also enabled trace.autoENVInject setting, the auto env inject will be skipped by opentelemetry-operator,
12+
- name: OTEL_EXPORTER_OTLP_ENDPOINT
13+
value: "http://localhost:4317"
14+
- name: OTEL_SERVICE_NAME
15+
value: "gateway-api-inference-extension"
16+
- name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
17+
valueFrom:
18+
fieldRef:
19+
apiVersion: v1
20+
fieldPath: spec.nodeName
21+
- name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
22+
valueFrom:
23+
fieldRef:
24+
apiVersion: v1
25+
fieldPath: metadata.name
26+
- name: OTEL_RESOURCE_ATTRIBUTES
27+
value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
28+
- name: OTEL_TRACES_SAMPLER
29+
value: "parentbased_traceidratio"
30+
- name: OTEL_TRACES_SAMPLER_ARG
31+
value: "0.1"
1032
pluginsConfigFile: "default-plugins.yaml"
1133
# Define additional container ports
1234
extraContainerPorts: []
@@ -53,6 +75,8 @@ inferenceExtension:
5375

5476
gke:
5577
enabled: false
78+
trace:
79+
enabled: false
5680

5781
inferencePool:
5882
targetPorts:
@@ -85,35 +109,4 @@ istio:
85109
trafficPolicy: {}
86110
# connectionPool:
87111
# http:
88-
# maxRequestsPerConnection: 256000
89-
90-
opentelemetry:
91-
enabled: true
92-
# With this setting you can send trace to the exist opentelemetry collector based on opentelemetry-operator
93-
# See https://github.com/open-telemetry/opentelemetry-operator?tab=readme-ov-file#opentelemetry-auto-instrumentation-injection
94-
autoENVInject:
95-
# The possible values for the annotation can be
96-
# "true" - inject and Instrumentation resource from the namespace.
97-
# "my-instrumentation" - name of Instrumentation CR instance in the current namespace.
98-
# "my-other-namespace/my-instrumentation" - name and namespace of Instrumentation CR instance in
99-
# "false" - do not inject
100-
CRInstanceName: "false"
101-
# Add the required OTel environment manually
102-
# If you also enabled autoENVInject setting, the auto env inject will be skipped by opentelemetry-operator,
103-
env:
104-
- name: OTEL_EXPORTER_OTLP_ENDPOINT
105-
value: "http://localhost:4317"
106-
- name: OTEL_SERVICE_NAME
107-
value: "gateway-api-inference-extension"
108-
- name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
109-
valueFrom:
110-
fieldRef:
111-
apiVersion: v1
112-
fieldPath: spec.nodeName
113-
- name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
114-
valueFrom:
115-
fieldRef:
116-
apiVersion: v1
117-
fieldPath: metadata.name
118-
- name: OTEL_RESOURCE_ATTRIBUTES
119-
value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
112+
# maxRequestsPerConnection: 256000

pkg/common/traces.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"strconv"
78

89
"github.com/go-logr/logr"
910
"go.opentelemetry.io/otel"
@@ -40,6 +41,27 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {
4041
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", collectorAddr)
4142
}
4243

44+
// Go SDK doesn't have an automatic sampler, handle manually
45+
samplerType, ok := os.LookupEnv("OTEL_TRACES_SAMPLER")
46+
if !ok {
47+
samplerType = "parentbased_traceidratio"
48+
}
49+
samplerARG, ok := os.LookupEnv("OTEL_TRACES_SAMPLER_ARG")
50+
if !ok {
51+
samplerARG = "0.1"
52+
}
53+
54+
sampler := sdktrace.ParentBased(sdktrace.TraceIDRatioBased(0.1))
55+
if samplerType == "parentbased_traceidratio" {
56+
fraction, err := strconv.ParseFloat(samplerARG, 64)
57+
if err != nil {
58+
fraction = 0.1
59+
}
60+
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(fraction))
61+
} else {
62+
loggerWrap.Handle(fmt.Errorf("un supported sampler type: %s, fallback to parentbased_traceidratio with 0.1 Ratio", samplerType))
63+
}
64+
4365
traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithInsecure())
4466
if err != nil {
4567
loggerWrap.Handle(fmt.Errorf("%s: %v", "new OTel trace gRPC exporter fail", err))
@@ -49,7 +71,7 @@ func InitTracing(ctx context.Context, logger logr.Logger) error {
4971
logger.Info(fmt.Sprintf("OTel trace exporter connect to: %s with service name: %s", collectorAddr, serviceName))
5072
opt := []sdktrace.TracerProviderOption{
5173
sdktrace.WithBatcher(traceExporter),
52-
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.AlwaysSample())),
74+
sdktrace.WithSampler(sampler),
5375
sdktrace.WithResource(resource.NewWithAttributes(
5476
semconv.SchemaURL,
5577
semconv.ServiceVersionKey.String(version.BuildRef),

0 commit comments

Comments
 (0)