|
42 | 42 | RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
43 | 43 | N_USERS: 8
|
44 | 44 | DURATION: 10m
|
| 45 | + |
| 46 | + strategy: |
| 47 | + matrix: |
| 48 | + model: [phi-2] |
| 49 | + ftype: [q4_0, q8_0, f16] |
| 50 | + include: |
| 51 | + - model: phi-2 |
| 52 | + ftype: q4_0 |
| 53 | + pr_comment_enabled: "true" |
| 54 | + |
45 | 55 | if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
|
46 | 56 | steps:
|
47 | 57 | - name: Clone
|
@@ -116,7 +126,7 @@ jobs:
|
116 | 126 | --scenario script.js \
|
117 | 127 | --duration ${{ github.event.inputs.duration || env.DURATION }} \
|
118 | 128 | --hf-repo ggml-org/models \
|
119 |
| - --hf-file phi-2/ggml-model-q4_0.gguf \ |
| 129 | + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ |
120 | 130 | --model-path-prefix /models \
|
121 | 131 | --parallel ${{ env.N_USERS }} \
|
122 | 132 | -ngl 33 \
|
@@ -146,7 +156,7 @@ jobs:
|
146 | 156 | with:
|
147 | 157 | authToken: ${{secrets.GITHUB_TOKEN}}
|
148 | 158 | sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
149 |
| - context: bench-server-baseline |
| 159 | + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} |
150 | 160 | description: |
|
151 | 161 | ${{ env.BENCH_RESULTS }}
|
152 | 162 | state: 'success'
|
@@ -203,11 +213,12 @@ jobs:
|
203 | 213 | - name: Comment PR
|
204 | 214 | uses: mshick/add-pr-comment@v2
|
205 | 215 | id: comment_pr
|
206 |
| - if: ${{ github.event.pull_request != '' }} |
| 216 | + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} |
207 | 217 | with:
|
208 |
| - message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }} |
| 218 | + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} |
209 | 219 | message: |
|
210 |
| - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 |
| 220 | + <!--- Server benchmark completed (maybe not be related with the code changes) --> |
| 221 | + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for ${{ matrix.model }} ${{ matrix.ftype }}: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 |
211 | 222 |
|
212 | 223 | - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
213 | 224 | - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
|
0 commit comments