|
| 1 | +{{- $targetIP := "" -}} |
| 2 | +{{- if eq .Values.moderlServingEndpoint.mode "gateway" -}} |
| 3 | + {{- $gw := lookup "gateway.networking.k8s.io/v1" "Gateway" .Values.moderlServingEndpoint.namespace .Values.moderlServingEndpoint.name -}} |
| 4 | + {{- if not $gw }} |
| 5 | + {{- fail "Gateway .Values.moderlServingEndpoint.name not found in namespace .Values.moderlServingEndpoint.namespace. Please create it before installing this chart." -}} |
| 6 | + {{- end }} |
| 7 | + {{- if or (not $gw.status) (not $gw.status.addresses) -}} |
| 8 | + {{- fail "Gateway .Values.moderlServingEndpoint.name found, but .status.addresses is not populated yet. Please wait until an IP is assigned." -}} |
| 9 | + {{- end }} |
| 10 | + {{- $targetIP = (index $gw.status.addresses 0).value | quote -}} |
| 11 | +{{- end }} |
| 12 | +{{- if eq .Values.moderlServingEndpoint.mode "service" -}} |
| 13 | + {{- $svc := lookup "v1" "Service" .Values.moderlServingEndpoint.namespace .Values.moderlServingEndpoint.name -}} |
| 14 | + {{- if not $svc }} |
| 15 | + {{- fail "Service .Values.moderlServingEndpoint.name not found in namespace .Values.moderlServingEndpoint.namespace. Please create it before installing this chart." -}} |
| 16 | + {{- end }} |
| 17 | + {{- if or (not $svc.status) (not $svc.status.loadBalancer) -}} |
| 18 | + {{- fail "Service .Values.moderlServingEndpoint.name found, but .status.loadBalancer is not populated yet. Please wait until an IP is assigned." -}} |
| 19 | + {{- end }} |
| 20 | + {{- $targetIP = (index $svc.status.loadBalancer.ingress 0).ip | quote -}} |
| 21 | +{{- end }} |
| 22 | + |
| 23 | +apiVersion: apps/v1 |
| 24 | +kind: Deployment |
| 25 | +metadata: |
| 26 | + labels: |
| 27 | + app: {{ .Release.Name }} |
| 28 | + name: {{ .Release.Name }} |
| 29 | +spec: |
| 30 | + replicas: 1 |
| 31 | + selector: |
| 32 | + matchLabels: |
| 33 | + app: {{ .Release.Name }} |
| 34 | + template: |
| 35 | + metadata: |
| 36 | + labels: |
| 37 | + app: {{ .Release.Name }} |
| 38 | + spec: |
| 39 | + containers: |
| 40 | + # The following image was built from this source https://github.com/AI-Hypercomputer/inference-benchmark/tree/07628c9fe01b748f5a4cc9e5c2ee4234aaf47699 |
| 41 | + - image: 'us-docker.pkg.dev/cloud-tpu-images/inference/inference-benchmark@sha256:1c100b0cc949c7df7a2db814ae349c790f034b4b373aaad145e77e815e838438' |
| 42 | + imagePullPolicy: Always |
| 43 | + name: {{ .Release.Name }} |
| 44 | + command: |
| 45 | + - bash |
| 46 | + - -c |
| 47 | + - ./latency_throughput_curve.sh |
| 48 | + env: |
| 49 | + - name: IP |
| 50 | + value: {{ $targetIP }} |
| 51 | + - name: REQUEST_RATES |
| 52 | + value: {{ .Values.request-rates }} |
| 53 | + - name: BENCHMARK_TIME_SECONDS |
| 54 | + value: {{ .Values.timeSeconds | quote }} |
| 55 | + - name: MAX_NUM_PROMPTS |
| 56 | + value: {{ .Values.maxNumPrompts }} |
| 57 | + - name: TOKENIZER |
| 58 | + value: {{ .Values.tokenizer | quote }} |
| 59 | + - name: MODELS |
| 60 | + value: {{ .Values.models | quote }} |
| 61 | + - name: BACKEND |
| 62 | + value: {{ .Values.backend | quote }} |
| 63 | + - name: PORT |
| 64 | + value: {{ .Values.port }} |
| 65 | + - name: INPUT_LENGTH |
| 66 | + value: {{ .Values.inputLength }} |
| 67 | + - name: OUTPUT_LENGTH |
| 68 | + value: {{ .Values.outputLength }} |
| 69 | + - name: FILE_PREFIX |
| 70 | + value: {{ .Values.filePrefix | quote}} |
| 71 | + - name: PROMPT_DATASET_FILE |
| 72 | + value: ShareGPT_V3_unfiltered_cleaned_split.json |
| 73 | + - name: TRAFFIC_SPLIT |
| 74 | + value: {{ .Values.trafficSplit | quote }} |
| 75 | + - name: SCRAPE_SERVER_METRICS |
| 76 | + value: {{ .Values.scrapeServerMetrics | quote }} |
| 77 | + - name: SAVE_AGGREGATION_RESULT |
| 78 | + value: {{ .Values.saveAggregatedResult | quote }} |
| 79 | + - name: STREAM_REQUEST |
| 80 | + value: {{ .Values.streamRequest | quote }} |
| 81 | + - name: HF_TOKEN |
| 82 | + valueFrom: |
| 83 | + secretKeyRef: |
| 84 | + key: token |
| 85 | + name: hf-token |
| 86 | + resources: |
| 87 | + limits: |
| 88 | + cpu: "2" |
| 89 | + memory: 20Gi |
| 90 | + requests: |
| 91 | + cpu: "2" |
| 92 | + memory: 20Gi |
0 commit comments