@@ -6,54 +6,10 @@ permissions:
66 packages : write
77
88on :
9- # Direct trigger on PRs (temporary until gate workflow is on main)
109 pull_request :
1110 branches :
1211 - main
1312 - dev
14- # Called by the gate workflow after ok-to-test approval
15- workflow_call :
16- inputs :
17- ref :
18- description : ' Git ref to checkout (SHA for PRs)'
19- required : false
20- type : string
21- model_id :
22- description : ' Model ID'
23- required : false
24- type : string
25- default : ' unsloth/Meta-Llama-3.1-8B'
26- accelerator_type :
27- description : ' Accelerator type (H100, A100, L40S)'
28- required : false
29- type : string
30- default : ' H100'
31- request_rate :
32- description : ' Request rate (req/s)'
33- required : false
34- type : string
35- default : ' 20'
36- num_prompts :
37- description : ' Number of prompts'
38- required : false
39- type : string
40- default : ' 3000'
41- skip_cleanup :
42- description : ' Skip cleanup after tests'
43- required : false
44- type : string
45- default : ' true'
46- max_num_seqs :
47- description : ' vLLM max batch size (lower = easier to saturate)'
48- required : false
49- type : string
50- default : ' 1'
51- hpa_stabilization_seconds :
52- description : ' HPA stabilization window in seconds'
53- required : false
54- type : string
55- default : ' 30'
56- # Manual trigger for maintainers
5713 workflow_dispatch :
5814 inputs :
5915 model_id :
9450 steps :
9551 - name : Checkout source
9652 uses : actions/checkout@v4
97- with :
98- ref : ${{ inputs.ref || github.sha }}
9953
10054 - name : Log in to GHCR
10155 uses : docker/login-action@v3
10963 env :
11064 REGISTRY : ghcr.io
11165 IMAGE_NAME : ${{ github.repository }}
112- # Use inputs.ref when available, otherwise fall back to GITHUB_SHA
113- # Note: inputs.ref could be a branch, tag, or SHA - we use the first 8 characters for tagging
114- GIT_REF : ${{ inputs.ref || github.sha }}
66+ GIT_REF : ${{ github.sha }}
11567 run : |
11668 # Build image with git ref tag for this PR
11769 # Use first 8 chars of the git ref
@@ -132,14 +84,13 @@ jobs:
13284 runs-on : [self-hosted, openshift]
13385 needs : build-image
13486 env :
135- # Handle inputs from both workflow_call (inputs.*) and workflow_dispatch (github.event.inputs.*)
136- MODEL_ID : ${{ inputs.model_id || github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }}
137- ACCELERATOR_TYPE : ${{ inputs.accelerator_type || github.event.inputs.accelerator_type || 'H100' }}
138- REQUEST_RATE : ${{ inputs.request_rate || github.event.inputs.request_rate || '20' }}
139- NUM_PROMPTS : ${{ inputs.num_prompts || github.event.inputs.num_prompts || '3000' }}
140- MAX_NUM_SEQS : ${{ inputs.max_num_seqs || github.event.inputs.max_num_seqs || '1' }}
141- HPA_STABILIZATION_SECONDS : ${{ inputs.hpa_stabilization_seconds || github.event.inputs.hpa_stabilization_seconds || '30' }}
142- SKIP_CLEANUP : ${{ inputs.skip_cleanup || github.event.inputs.skip_cleanup || 'true' }}
87+ MODEL_ID : ${{ github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }}
88+ ACCELERATOR_TYPE : ${{ github.event.inputs.accelerator_type || 'H100' }}
89+ REQUEST_RATE : ${{ github.event.inputs.request_rate || '20' }}
90+ NUM_PROMPTS : ${{ github.event.inputs.num_prompts || '3000' }}
91+ MAX_NUM_SEQS : ${{ github.event.inputs.max_num_seqs || '1' }}
92+ HPA_STABILIZATION_SECONDS : ${{ github.event.inputs.hpa_stabilization_seconds || '30' }}
93+ SKIP_CLEANUP : ${{ github.event.inputs.skip_cleanup || 'true' }}
14394 # Unique release names per run to avoid conflicts with other concurrent runs
14495 WVA_RELEASE_NAME : wva-e2e-${{ github.run_id }}
14596 LLMD_RELEASE_SUFFIX : e2e-${{ github.run_id }}
14899 steps :
149100 - name : Checkout source
150101 uses : actions/checkout@v4
151- with :
152- ref : ${{ inputs.ref || github.sha }}
153102
154103 - name : Extract Go version from go.mod
155104 run : sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
0 commit comments