File tree Expand file tree Collapse file tree 1 file changed +7
-7
lines changed
Expand file tree Collapse file tree 1 file changed +7
-7
lines changed Original file line number Diff line number Diff line change @@ -178,10 +178,10 @@ jobs:
178178 - name : " non-quantized"
179179 artifact : " voxtral-cuda-export"
180180 extra_args : " "
181- # TODO: enable gemma3 quantization
182- # - name : "quantized-int4-tile-packed"
183- # artifact : "voxtral-cuda-quantized-int4-tile-packed "
184- # extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
181+ - name : " quantized-int4-tile-packed "
182+ artifact : " voxtral-cuda- quantized-int4-tile-packed"
183+ extra_args : " --qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d "
184+ # TODO: enable int4-weight-only on gemma3.
185185 # - name: "quantized-int4-weight-only"
186186 # artifact: "voxtral-cuda-quantized-int4-weight-only"
187187 # # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -435,9 +435,9 @@ jobs:
435435 format :
436436 - name : " non-quantized"
437437 artifact : " gemma3-cuda-export"
438- # TODO: enable quantized gemma3.
439- # - name : "quantized-int4-tile-packed"
440- # artifact: "gemma3-cuda-quantized- int4-tile-packed"
438+ - name : " quantized-int4-tile-packed "
439+ artifact : " gemma3-cuda- quantized-int4-tile-packed"
440+ # TODO: enable int4-weight-only on gemma3.
441441 # - name: "quantized-int4-weight-only"
442442 # artifact: "gemma3-cuda-quantized-int4-weight-only"
443443 with :
You can’t perform that action at this time.
0 commit comments