Skip to content

Commit 8991bf8

Browse files
committed
init
1 parent 455639b commit 8991bf8

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

.github/workflows/cuda.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@ jobs:
178178
- name: "non-quantized"
179179
artifact: "voxtral-cuda-export"
180180
extra_args: ""
181-
# TODO: enable gemma3 quantization
182-
# - name: "quantized-int4-tile-packed"
183-
# artifact: "voxtral-cuda-quantized-int4-tile-packed"
184-
# extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
181+
- name: "quantized-int4-tile-packed"
182+
artifact: "voxtral-cuda-quantized-int4-tile-packed"
183+
extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
184+
# TODO: enable int4-weight-only on gemma3.
185185
# - name: "quantized-int4-weight-only"
186186
# artifact: "voxtral-cuda-quantized-int4-weight-only"
187187
# # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -435,9 +435,9 @@ jobs:
435435
format:
436436
- name: "non-quantized"
437437
artifact: "gemma3-cuda-export"
438-
# TODO: enable quantized gemma3.
439-
# - name: "quantized-int4-tile-packed"
440-
# artifact: "gemma3-cuda-quantized-int4-tile-packed"
438+
- name: "quantized-int4-tile-packed"
439+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
440+
# TODO: enable int4-weight-only on gemma3.
441441
# - name: "quantized-int4-weight-only"
442442
# artifact: "gemma3-cuda-quantized-int4-weight-only"
443443
with:

0 commit comments

Comments
 (0)