diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py index 2cca588157..d617cff124 100644 --- a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py +++ b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py @@ -33,6 +33,7 @@ bf16 = False # using full precision for training lr_scheduler_type = "cosine" warmup_ratio = 0.1 +preprocessing_num_workers = 8 # this will run the recipe stage by stage: # oneshot sparsification -> finetuning -> oneshot quantization @@ -52,6 +53,7 @@ learning_rate=learning_rate, lr_scheduler_type=lr_scheduler_type, warmup_ratio=warmup_ratio, + preprocessing_num_workers=preprocessing_num_workers, ) logger.info( "llmcompressor does not currently support running compressed models in the marlin24 format." # noqa