feat(epp): use kebab-cased flags for epp (kubernetes-sigs#1177)

Xunzhuo · kfswain · commit aa214d68cb9e · 2025-07-31T21:31:12.000Z
* feat(epp): use kebab-cased flags for epp

Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;

* resolve feedbacks

Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;

---------

Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;
diff --git a/cmd/bbr/main.go b/cmd/bbr/main.go
@@ -40,15 +40,15 @@ import (
 
 var (
 	grpcPort = flag.Int(
-		"grpcPort",
+		"grpc-port",
 		9004,
 		"The gRPC port used for communicating with Envoy proxy")
 	grpcHealthPort = flag.Int(
-		"grpcHealthPort",
+		"grpc-health-port",
 		9005,
 		"The port used for gRPC liveness and readiness probes")
 	metricsPort = flag.Int(
-		"metricsPort", 9090, "The metrics port")
+		"metrics-port", 9090, "The metrics port")
 	streaming = flag.Bool(
 		"streaming", false, "Enables streaming support for Envoy full-duplex streaming mode")
 	logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go
@@ -54,89 +54,89 @@ import (
 
 var (
 	grpcPort = flag.Int(
-		"grpcPort",
+		"grpc-port",
 		runserver.DefaultGrpcPort,
 		"The gRPC port used for communicating with Envoy proxy")
 	grpcHealthPort = flag.Int(
-		"grpcHealthPort",
+		"grpc-health-port",
 		runserver.DefaultGrpcHealthPort,
 		"The port used for gRPC liveness and readiness probes")
 	metricsPort = flag.Int(
-		"metricsPort",
+		"metrics-port",
 		runserver.DefaultMetricsPort,
 		"The metrics port")
 	enablePprof = flag.Bool(
-		"enablePprof",
+		"enable-pprof",
 		runserver.DefaultEnablePprof,
 		"Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
 	destinationEndpointHintKey = flag.String(
-		"destinationEndpointHintKey",
+		"destination-endpoint-hint-key",
 		runserver.DefaultDestinationEndpointHintKey,
 		"Header and response metadata key used by Envoy to route to the appropriate pod. This must match Envoy configuration.")
 	destinationEndpointHintMetadataNamespace = flag.String(
-		"DestinationEndpointHintMetadataNamespace",
+		"destination-endpoint-hint-metadata-namespace",
 		runserver.DefaultDestinationEndpointHintMetadataNamespace,
 		"The key for the outer namespace struct in the metadata field of the extproc response that is used to wrap the"+
 			"target endpoint. If not set, then an outer namespace struct should not be created.")
 	poolName = flag.String(
-		"poolName",
+		"pool-name",
 		runserver.DefaultPoolName,
 		"Name of the InferencePool this Endpoint Picker is associated with.")
 	poolNamespace = flag.String(
-		"poolNamespace",
+		"pool-namespace",
 		runserver.DefaultPoolNamespace,
 		"Namespace of the InferencePool this Endpoint Picker is associated with.")
 	refreshMetricsInterval = flag.Duration(
-		"refreshMetricsInterval",
+		"refresh-metrics-interval",
 		runserver.DefaultRefreshMetricsInterval,
 		"interval to refresh metrics")
 	refreshPrometheusMetricsInterval = flag.Duration(
-		"refreshPrometheusMetricsInterval",
+		"refresh-prometheus-metrics-interval",
 		runserver.DefaultRefreshPrometheusMetricsInterval,
 		"interval to flush prometheus metrics")
 	logVerbosity = flag.Int(
 		"v",
 		logging.DEFAULT,
 		"number for the log level verbosity")
 	secureServing = flag.Bool(
-		"secureServing",
+		"secure-serving",
 		runserver.DefaultSecureServing,
 		"Enables secure serving. Defaults to true.")
 	healthChecking = flag.Bool(
-		"healthChecking",
+		"health-checking",
 		runserver.DefaultHealthChecking,
 		"Enables health checking")
 	certPath = flag.String(
-		"certPath",
+		"cert-path",
 		runserver.DefaultCertPath,
 		"The path to the certificate for secure serving. The certificate and private key files "+
 			"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
 			"then a self-signed certificate is used.")
 	// metric flags
 	totalQueuedRequestsMetric = flag.String(
-		"totalQueuedRequestsMetric",
+		"total-queued-requests-metric",
 		runserver.DefaultTotalQueuedRequestsMetric,
 		"Prometheus metric for the number of queued requests.")
 	kvCacheUsagePercentageMetric = flag.String(
-		"kvCacheUsagePercentageMetric",
+		"kv-cache-usage-percentage-metric",
 		runserver.DefaultKvCacheUsagePercentageMetric,
 		"Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
 	// LoRA metrics
 	loraInfoMetric = flag.String(
-		"loraInfoMetric",
+		"lora-info-metric",
 		runserver.DefaultLoraInfoMetric,
 		"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
 	// configuration flags
 	configFile = flag.String(
-		"configFile",
+		"config-file",
 		runserver.DefaultConfigFile,
 		"The path to the configuration file")
 	configText = flag.String(
-		"configText",
+		"config-text",
 		runserver.DefaultConfigText,
 		"The configuration specified as text, in lieu of a file")
 
-	modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods. "+
+	modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
 		"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
 	modelServerMetricsPath                    = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
 	modelServerMetricsScheme                  = flag.String("modelServerMetricsScheme", "http", "Scheme to scrape metrics from pods")
@@ -181,8 +181,8 @@ func bindEnvToFlags() {
 		"POOL_NAME":                                       "poolName",
 		"POOL_NAMESPACE":                                  "poolNamespace",
 		// durations & bools work too; flag.Set expects the *string* form
-		"REFRESH_METRICS_INTERVAL": "refreshMetricsInterval",
-		"SECURE_SERVING":           "secureServing",
+		"REFRESH_METRICS_INTERVAL": "refresh-metrics-interval",
+		"SECURE_SERVING":           "secure-serving",
 	} {
 		if v := os.Getenv(env); v != "" {
 			// ignore error; Parse() will catch invalid values later
diff --git a/config/charts/body-based-routing/templates/bbr.yaml b/config/charts/body-based-routing/templates/bbr.yaml
@@ -18,8 +18,8 @@ spec:
         image: {{ .Values.bbr.image.hub }}/{{ .Values.bbr.image.name }}:{{ .Values.bbr.image.tag }}
         imagePullPolicy: {{ .Values.bbr.image.pullPolicy | default "Always" }}
         args:
-        - "-streaming"
-        - "-v"
+        - "--streaming"
+        - "--v"
         - "3"
         ports:
         - containerPort: {{ .Values.bbr.port }}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -23,31 +23,31 @@ spec:
         image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
         imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
         args:
-        - -poolName
+        - --pool-name
         - {{ .Release.Name }}
-        - -poolNamespace
+        - --pool-namespace
         - {{ .Release.Namespace }}
         - --v
         - "{{ .Values.inferenceExtension.logVerbosity | default "3" }}"
         - --grpcPort
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - -metricsPort
+        - --metrics-port
         - "9090"
-        - -configFile
+        - --config-file
         - "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
         # https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
         - "--enablePprof={{ .Values.inferenceExtension.enablePprof }}"
         - "--modelServerMetricsPath={{ .Values.inferenceExtension.modelServerMetricsPath }}"
         - "--modelServerMetricsScheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
         - "--modelServerMetricsHttpsInsecureSkipVerify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
         {{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
-        - -totalQueuedRequestsMetric
+        - --total-queued-requests-metric
         - "nv_trt_llm_request_metrics{request_type=waiting}"
-        - -kvCacheUsagePercentageMetric
+        - --kv-cache-usage-percentage-metric
         - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
-        - -loraInfoMetric
+        - --lora-info-metric
         - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
         {{- end }}
         ports:
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
@@ -53,19 +53,19 @@ spec:
         image: registry.k8s.io/gateway-api-inference-extension/epp:v0.5.1
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "vllm-llama3-8b-instruct"
-        - "-poolNamespace"
+        - "--pool-namespace"
         - "default"
-        - -v
+        - --v
         - "4"
         - --zap-encoder
         - "json"
-        - -grpcPort
+        - --grpc-port
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - "-configFile"
+        - "--config-file"
         - "/config/default-plugins.yaml"
         ports:
         - containerPort: 9002
diff --git a/conformance/resources/manifests/manifests.yaml b/conformance/resources/manifests/manifests.yaml
@@ -199,19 +199,19 @@ spec:
         image: registry.k8s.io/gateway-api-inference-extension/epp:v0.5.1
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "primary-inference-pool"
-        - -poolNamespace
+        - --pool-namespace
         - "gateway-conformance-app-backend"
-        - -v
+        - --v
         - "4"
         - --zap-encoder
         - "json"
-        - -grpcPort
+        - --grpc-port
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - "-configFile"
+        - "--config-file"
         - "/config/conformance-plugins.yaml"
         ports:
         - containerPort: 9002
@@ -293,19 +293,19 @@ spec:
         image: registry.k8s.io/gateway-api-inference-extension/epp:v0.5.1
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "secondary-inference-pool"
-        - -poolNamespace
+        - --pool-namespace
         - "gateway-conformance-app-backend"
-        - -v
+        - --v
         - "4"
         - --zap-encoder
         - "json"
-        - -grpcPort
+        - --grpc-port
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - "-configFile"
+        - "--config-file"
         - "/config/conformance-plugins.yaml"
         ports:
         - containerPort: 9002
diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
@@ -50,19 +50,19 @@ spec:
         image: $E2E_IMAGE
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "vllm-llama3-8b-instruct"
-        - -poolNamespace
+        - --pool-namespace
         - "$E2E_NS"
-        - -v
+        - --v
         - "4"
         - --zap-encoder
         - "json"
-        - -grpcPort
+        - --grpc-port
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - "-configFile"
+        - "--config-file"
         - "/config/default-plugins.yaml"
         ports:
         - containerPort: 9002