|
77 | 77 | spec: |
78 | 78 | containers: |
79 | 79 | - name: echoserver |
80 | | - image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd |
| 80 | + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20251106-v1.3.0-263-g47c3435c |
81 | 81 | ports: |
82 | 82 | - containerPort: 3000 |
83 | 83 | readinessProbe: |
@@ -121,7 +121,7 @@ spec: |
121 | 121 | spec: |
122 | 122 | containers: |
123 | 123 | - name: echoserver |
124 | | - image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd |
| 124 | + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20251106-v1.3.0-263-g47c3435c |
125 | 125 | ports: |
126 | 126 | - containerPort: 3000 |
127 | 127 | readinessProbe: |
@@ -200,7 +200,7 @@ spec: |
200 | 200 | terminationGracePeriodSeconds: 130 |
201 | 201 | containers: |
202 | 202 | - name: epp |
203 | | - image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 |
| 203 | + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928 |
204 | 204 | imagePullPolicy: Always |
205 | 205 | args: |
206 | 206 | - --pool-name |
@@ -298,7 +298,7 @@ spec: |
298 | 298 | terminationGracePeriodSeconds: 130 |
299 | 299 | containers: |
300 | 300 | - name: epp |
301 | | - image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0 |
| 301 | + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928 |
302 | 302 | imagePullPolicy: Always |
303 | 303 | args: |
304 | 304 | - --pool-name |
@@ -340,6 +340,215 @@ spec: |
340 | 340 | configMap: |
341 | 341 | name: plugins-config |
342 | 342 | --- |
| 343 | +# -- Data Parallelism (DP) backend deployment: 3 pods, each listening on three ports to simulate ranks --- |
| 344 | +apiVersion: apps/v1 |
| 345 | +kind: Deployment |
| 346 | +metadata: |
| 347 | + name: dp-inference-model-server-deployment |
| 348 | + namespace: inference-conformance-app-backend |
| 349 | + labels: |
| 350 | + app: dp-inference-model-server |
| 351 | +spec: |
| 352 | + replicas: 3 |
| 353 | + selector: |
| 354 | + matchLabels: |
| 355 | + app: dp-inference-model-server |
| 356 | + template: |
| 357 | + metadata: |
| 358 | + labels: |
| 359 | + app: dp-inference-model-server |
| 360 | + spec: |
| 361 | + containers: |
| 362 | + - name: echoserver-3000 |
| 363 | + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20251106-v1.3.0-263-g47c3435c |
| 364 | + ports: |
| 365 | + - containerPort: 3000 |
| 366 | + readinessProbe: |
| 367 | + httpGet: |
| 368 | + path: / |
| 369 | + port: 3000 |
| 370 | + initialDelaySeconds: 3 |
| 371 | + periodSeconds: 5 |
| 372 | + failureThreshold: 2 |
| 373 | + env: |
| 374 | + - name: HTTP_PORT # Default port for HTTP echo server |
| 375 | + value: "3000" |
| 376 | + - name: H2C_PORT # Default port for HTC echo server |
| 377 | + value: "3001" |
| 378 | + - name: INCLUDE_HTTP_PORT_HEADER |
| 379 | + value: "true" |
| 380 | + - name: POD_NAME |
| 381 | + valueFrom: |
| 382 | + fieldRef: |
| 383 | + fieldPath: metadata.name |
| 384 | + - name: NAMESPACE |
| 385 | + valueFrom: |
| 386 | + fieldRef: |
| 387 | + fieldPath: metadata.namespace |
| 388 | + - name: POD_IP |
| 389 | + valueFrom: |
| 390 | + fieldRef: |
| 391 | + fieldPath: status.podIP |
| 392 | + - name: echoserver-3002 |
| 393 | + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20251106-v1.3.0-263-g47c3435c |
| 394 | + ports: |
| 395 | + - containerPort: 3002 |
| 396 | + readinessProbe: |
| 397 | + httpGet: |
| 398 | + path: / |
| 399 | + port: 3002 |
| 400 | + initialDelaySeconds: 3 |
| 401 | + periodSeconds: 5 |
| 402 | + failureThreshold: 2 |
| 403 | + env: |
| 404 | + - name: HTTP_PORT |
| 405 | + value: "3002" |
| 406 | + - name: H2C_PORT |
| 407 | + value: "3003" |
| 408 | + - name: INCLUDE_HTTP_PORT_HEADER |
| 409 | + value: "true" |
| 410 | + - name: POD_NAME |
| 411 | + valueFrom: |
| 412 | + fieldRef: |
| 413 | + fieldPath: metadata.name |
| 414 | + - name: NAMESPACE |
| 415 | + valueFrom: |
| 416 | + fieldRef: |
| 417 | + fieldPath: metadata.namespace |
| 418 | + - name: POD_IP |
| 419 | + valueFrom: |
| 420 | + fieldRef: |
| 421 | + fieldPath: status.podIP |
| 422 | + - name: echoserver-3004 |
| 423 | + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20251106-v1.3.0-263-g47c3435c |
| 424 | + ports: |
| 425 | + - containerPort: 3004 |
| 426 | + readinessProbe: |
| 427 | + httpGet: |
| 428 | + path: / |
| 429 | + port: 3004 |
| 430 | + initialDelaySeconds: 3 |
| 431 | + periodSeconds: 5 |
| 432 | + failureThreshold: 2 |
| 433 | + env: |
| 434 | + - name: HTTP_PORT |
| 435 | + value: "3004" |
| 436 | + - name: H2C_PORT |
| 437 | + value: "3005" |
| 438 | + - name: INCLUDE_HTTP_PORT_HEADER |
| 439 | + value: "true" |
| 440 | + - name: POD_NAME |
| 441 | + valueFrom: |
| 442 | + fieldRef: |
| 443 | + fieldPath: metadata.name |
| 444 | + - name: NAMESPACE |
| 445 | + valueFrom: |
| 446 | + fieldRef: |
| 447 | + fieldPath: metadata.namespace |
| 448 | + - name: POD_IP |
| 449 | + valueFrom: |
| 450 | + fieldRef: |
| 451 | + fieldPath: status.podIP |
| 452 | +--- |
| 453 | +# --- Data Parallelism (DP) InferencePool Definition --- |
| 454 | +apiVersion: inference.networking.k8s.io/v1 |
| 455 | +kind: InferencePool |
| 456 | +metadata: |
| 457 | + name: dp-inference-pool |
| 458 | + namespace: inference-conformance-app-backend |
| 459 | +spec: |
| 460 | + selector: |
| 461 | + matchLabels: |
| 462 | + app: dp-inference-model-server |
| 463 | + targetPorts: |
| 464 | + - number: 3000 |
| 465 | + - number: 3002 |
| 466 | + - number: 3004 |
| 467 | + endpointPickerRef: |
| 468 | + name: dp-endpoint-picker-svc |
| 469 | + port: |
| 470 | + number: 9002 |
| 471 | +--- |
| 472 | +# --- Data Parallelism (DP) Conformance EPP service Definition --- |
| 473 | +apiVersion: v1 |
| 474 | +kind: Service |
| 475 | +metadata: |
| 476 | + name: dp-endpoint-picker-svc |
| 477 | + namespace: inference-conformance-app-backend |
| 478 | +spec: |
| 479 | + selector: |
| 480 | + app: dp-app-backend-epp |
| 481 | + ports: |
| 482 | + - protocol: TCP |
| 483 | + port: 9002 |
| 484 | + targetPort: 9002 |
| 485 | + appProtocol: http2 |
| 486 | + type: ClusterIP |
| 487 | +--- |
| 488 | +# --- Data Parallelism (DP) Conformance EPP Deployment --- |
| 489 | +apiVersion: apps/v1 |
| 490 | +kind: Deployment |
| 491 | +metadata: |
| 492 | + name: dp-app-endpoint-picker |
| 493 | + namespace: inference-conformance-app-backend |
| 494 | + labels: |
| 495 | + app: dp-app-backend-epp |
| 496 | +spec: |
| 497 | + replicas: 1 |
| 498 | + selector: |
| 499 | + matchLabels: |
| 500 | + app: dp-app-backend-epp |
| 501 | + template: |
| 502 | + metadata: |
| 503 | + labels: |
| 504 | + app: dp-app-backend-epp |
| 505 | + spec: |
| 506 | + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool |
| 507 | + terminationGracePeriodSeconds: 130 |
| 508 | + containers: |
| 509 | + - name: epp |
| 510 | + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928 |
| 511 | + imagePullPolicy: Always |
| 512 | + args: |
| 513 | + - --pool-name |
| 514 | + - "dp-inference-pool" |
| 515 | + - --pool-namespace |
| 516 | + - "inference-conformance-app-backend" |
| 517 | + - --v |
| 518 | + - "4" |
| 519 | + - --zap-encoder |
| 520 | + - "json" |
| 521 | + - --grpc-port |
| 522 | + - "9002" |
| 523 | + - --grpc-health-port |
| 524 | + - "9003" |
| 525 | + - "--config-file" |
| 526 | + - "/config/conformance-plugins.yaml" |
| 527 | + ports: |
| 528 | + - containerPort: 9002 |
| 529 | + - containerPort: 9003 |
| 530 | + - name: metrics |
| 531 | + containerPort: 9090 |
| 532 | + livenessProbe: |
| 533 | + grpc: |
| 534 | + port: 9003 |
| 535 | + service: inference-extension |
| 536 | + initialDelaySeconds: 5 |
| 537 | + periodSeconds: 10 |
| 538 | + readinessProbe: |
| 539 | + grpc: |
| 540 | + port: 9003 |
| 541 | + service: inference-extension |
| 542 | + initialDelaySeconds: 5 |
| 543 | + periodSeconds: 10 |
| 544 | + volumeMounts: |
| 545 | + - name: plugins-config-volume |
| 546 | + mountPath: "/config" |
| 547 | + volumes: |
| 548 | + - name: plugins-config-volume |
| 549 | + configMap: |
| 550 | + name: plugins-config |
| 551 | +--- |
343 | 552 | apiVersion: v1 |
344 | 553 | kind: ConfigMap |
345 | 554 | metadata: |
|
0 commit comments