ModelTC · llmc-reviewer · May 9, 2025 · May 9, 2025 · May 9, 2025
diff --git a/assets/wan_i2v/calib/astronaut.jpg b/assets/wan_i2v/calib/astronaut.jpg
diff --git a/assets/wan_i2v/calib/samples.json b/assets/wan_i2v/calib/samples.json
@@ -0,0 +1,7 @@
+[
+    {
+        "image": "astronaut.jpg",
+        "prompt": "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot.",
+        "negative_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+    }
+]
diff --git a/assets/wan_i2v/eval/astronaut.jpg b/assets/wan_i2v/eval/astronaut.jpg
diff --git a/assets/wan_i2v/eval/samples.json b/assets/wan_i2v/eval/samples.json
@@ -0,0 +1,7 @@
+[
+    {
+        "image": "astronaut.jpg",
+        "prompt": "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot.",
+        "negative_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+    }
+]
diff --git a/assets/wan_t2v/calib/samples.json b/assets/wan_t2v/calib/samples.json
diff --git a/assets/wan_t2v/eval/samples.json b/assets/wan_t2v/eval/samples.json
diff --git a/configs/quantization/video_gen/wan_i2v/awq_w_a.yaml b/configs/quantization/video_gen/wan_i2v/awq_w_a.yaml
@@ -0,0 +1,49 @@
+base:
+    seed: &seed 42
+model:
+    type: WanI2V
+    path: /path/to/model
+    torch_dtype: auto
+calib:
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/calib/
+    sample_steps: 40
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+eval:
+    eval_pos: [fake_quant]
+    type: video_gen
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/eval/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_awq/
+quant:
+    video_gen:
+        method: Awq
+        weight:
+            bit: 8
+            symmetric: True
+            granularity: per_channel
+            group_size: -1
+        act:
+            bit: 8
+            symmetric: True
+            granularity: per_token
+        special:
+            trans: True
+            trans_version: v2
+            weight_clip: False
+            clip_sym: True
+save:
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_i2v/rtn_w_a.yaml b/configs/quantization/video_gen/wan_i2v/rtn_w_a.yaml
@@ -0,0 +1,32 @@
+base:
+    seed: &seed 42
+model:
+    type: WanI2V
+    path: /path/to/model
+    torch_dtype: auto
+eval:
+    eval_pos: [fake_quant]
+    type: video_gen
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/eval/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_rtn/
+quant:
+    video_gen:
+        method: RTN
+        weight:
+            bit: 8
+            symmetric: True
+            granularity: per_channel
+        act:
+            bit: 8
+            symmetric: True
+            granularity: per_token
+save:
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_i2v/rtn_w_a_lora.yaml b/configs/quantization/video_gen/wan_i2v/rtn_w_a_lora.yaml
@@ -0,0 +1,33 @@
+base:
+    seed: &seed 42
+model:
+    type: WanI2V
+    path: /path/to/model
+    lora_path: /path/to/lora_weights
+    torch_dtype: auto
+eval:
+    eval_pos: [fake_quant]
+    type: video_gen
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/eval/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_rtn_lora/
+quant:
+    video_gen:
+        method: RTN
+        weight:
+            bit: 8
+            symmetric: True
+            granularity: per_channel
+        act:
+            bit: 8
+            symmetric: True
+            granularity: per_token
+save:
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_i2v/smoothquant_w_a.yaml b/configs/quantization/video_gen/wan_i2v/smoothquant_w_a.yaml
@@ -0,0 +1,45 @@
+base:
+    seed: &seed 42
+model:
+    type: WanI2V
+    path: /path/to/model
+    torch_dtype: auto
+calib:
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/calib/
+    sample_steps: 40
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+eval:
+    eval_pos: [fake_quant]
+    type: video_gen
+    name: i2v
+    download: False
+    path: ../assets/wan_i2v/eval/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_sq/
+quant:
+    video_gen:
+        method: SmoothQuant
+        weight:
+            bit: 8
+            symmetric: True
+            granularity: per_channel
+        act:
+            bit: 8
+            symmetric: True
+            granularity: per_token
+        special:
+            alpha: 0.75
+save:
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_t2v/awq_w_a.yaml b/configs/quantization/video_gen/wan_t2v/awq_w_a.yaml
@@ -5,7 +5,7 @@ model:
     path: /path/to/wan_t2v
     torch_dtype: auto
 calib:
-    name: custom_t2v
+    name: t2v
     download: False
     path: ../assets/wan_t2v/calib/
     sample_steps: 20
@@ -18,7 +18,7 @@ calib:
 eval:
     eval_pos: [transformed, fake_quant]
     type: video_gen
-    name: custom_t2v
+    name: t2v
     download: False
     path: ../assets/wan_t2v/calib/
     bs: 1
@@ -45,6 +45,5 @@ quant:
             weight_clip: True
             clip_sym: True
 save:
-    save_trans: False
-    save_fake: False
-    save_path: /path/to/save/
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml b/configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml
@@ -7,15 +7,15 @@ model:
 eval:
     eval_pos: [transformed, fake_quant]
     type: video_gen
-    name: custom_t2v
+    name: t2v
     download: False
-    path: /mtc/gushiqiao/llmc_video_new/llmc/assets/wan_t2v/
+    path: ../assets/wan_t2v/eval/
     bs: 1
     target_height: 480
     target_width: 832
     num_frames: 81
     guidance_scale: 5.0
-    output_video_path: ./output_videos_sq/
+    output_video_path: ./output_videos_rtn/
 quant:
     video_gen:
         method: RTN
@@ -28,6 +28,5 @@ quant:
             symmetric: True
             granularity: per_token
 save:
-    save_trans: False
-    save_fake: False
-    save_path: /path/to/save/
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/configs/quantization/video_gen/wan_t2v/smoothquant_w_a.yaml b/configs/quantization/video_gen/wan_t2v/smoothquant_w_a.yaml
@@ -5,7 +5,7 @@ model:
     path: /path/to/wan_t2v
     torch_dtype: auto
 calib:
-    name: custom_t2v
+    name: t2v
     download: False
     path: ../assets/wan_t2v/calib/
     sample_steps: 20
@@ -18,7 +18,7 @@ calib:
 eval:
     eval_pos: [transformed, fake_quant]
     type: video_gen
-    name: custom_t2v
+    name: t2v
     download: False
     path: ../assets/wan_t2v/calib/
     bs: 1
@@ -41,6 +41,5 @@ quant:
         special:
             alpha: 0.7
 save:
-    save_trans: False
-    save_fake: False
-    save_path: /path/to/save/
+    save_lightx2v: True
+    save_path: /path/to/x2v/
diff --git a/llmc/__main__.py b/llmc/__main__.py
@@ -121,39 +121,43 @@ def main(config):
 
                 if config.save.get('save_vllm', False):
                     deploy_all_modality(blockwise_opts, 'vllm_quant')
-                if config.save.get('save_lightllm', False):
+                elif config.save.get('save_lightllm', False):
                     deploy_all_modality(blockwise_opts, 'lightllm_quant')
-                if config.save.get('save_sgl', False):
+                elif config.save.get('save_sgl', False):
                     deploy_all_modality(blockwise_opts, 'sgl_quant')
 
                 blockwise_opt.save_model(save_quant_path)
                 update_vllm_quant_config(blockwise_opt.model, config, save_quant_path)
 
-        if 'save' in config and config.save.get('save_autoawq', False):
-            for modality_config in modality_configs:
-                assert (
-                    modality_config.weight.bit in [4] and 'act' not in modality_config
-                ), 'AutoAWQ supports only 4-bit weight-only quantization.'
-                assert (
-                    not modality_config.weight.symmetric
-                ), 'Only asymmetric quant is supported.'
-
-            deploy_all_modality(blockwise_opts, 'autoawq_quant')
-            blockwise_opt.save_model(save_quant_path)
-            update_autoawq_quant_config(config, save_quant_path)
-
-        if 'save' in config and config.save.get('save_mlcllm', False):
-            for modality_config in modality_configs:
-                assert (
-                    modality_config.weight.bit in [4] and 'act' not in modality_config
-                ), 'MlcLLM supports only 4-bit weight-only quantization.'
-                assert (
-                    not modality_config.weight.symmetric
-                ), 'Only asymmetric quant is supported.'
-
-            deploy_all_modality(blockwise_opts, 'mlcllm_quant')
-            blockwise_opt.save_model(save_quant_path)
-            update_autoawq_quant_config(config, save_quant_path)
+            elif config.save.get('save_autoawq', False):
+                for modality_config in modality_configs:
+                    assert (
+                        modality_config.weight.bit in [4] and 'act' not in modality_config
+                    ), 'AutoAWQ supports only 4-bit weight-only quantization.'
+                    assert (
+                        not modality_config.weight.symmetric
+                    ), 'Only asymmetric quant is supported.'
+
+                deploy_all_modality(blockwise_opts, 'autoawq_quant')
+                blockwise_opt.save_model(save_quant_path)
+                update_autoawq_quant_config(config, save_quant_path)
+
+            elif config.save.get('save_mlcllm', False):
+                for modality_config in modality_configs:
+                    assert (
+                        modality_config.weight.bit in [4] and 'act' not in modality_config
+                    ), 'MlcLLM supports only 4-bit weight-only quantization.'
+                    assert (
+                        not modality_config.weight.symmetric
+                    ), 'Only asymmetric quant is supported.'
+
+                deploy_all_modality(blockwise_opts, 'mlcllm_quant')
+                blockwise_opt.save_model(save_quant_path)
+                update_autoawq_quant_config(config, save_quant_path)
+
+            elif config.save.get('save_lightx2v', False):
+                deploy_all_modality(blockwise_opts, 'lightx2v_quant')
+                blockwise_opt.save_model(save_quant_path)
 
         if 'opencompass' in config:
             assert config.save.get('save_trans', False)
@@ -240,6 +244,11 @@ def main(config):
                     config.save.save_path, 'mlcllm_quant_model'
                 )
                 mkdirs(save_quant_path)
+            if config.save.get('save_lightx2v', False):
+                save_quant_path = os.path.join(
+                    config.save.save_path, 'lightx2v_quant_model'
+                )
+                mkdirs(save_quant_path)
             if config.save.get('save_fake', False):
                 save_fake_path = os.path.join(config.save.save_path, 'fake_quant_model')
                 mkdirs(save_fake_path)

diff --git a/llmc/compression/quantization/auto_clip.py b/llmc/compression/quantization/auto_clip.py
@@ -10,10 +10,13 @@
 
 if is_fp8_supported_gpu():
     from .kernel import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import kernel successful.')
+    logger.info('Successfully imported Triton kernel.')
 else:
     from .quant import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import quant successful.')
+    logger.info(
+        'Triton kernel not available: non-Hopper GPU detected.\n'
+        'Using LLMC Quantizer implementation instead.'
+    )
 
 
 class AutoClipper:

diff --git a/llmc/compression/quantization/awq.py b/llmc/compression/quantization/awq.py
@@ -13,10 +13,11 @@
 
 if is_fp8_supported_gpu():
     from .kernel import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import kernel successful.')
+    logger.info('Successfully imported Triton kernel.')
 else:
     from .quant import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import quant successful.')
+    logger.info('Triton kernel not available (non-Hopper GPU detected). \
+                Falling back to LLMC Quantizer implementation.')
 
 from .module_utils import (_LLMC_LINEAR_TYPES_, _LLMC_LN_TYPES_,
                            _TRANSFORMERS_LINEAR_TYPES_,

diff --git a/llmc/compression/quantization/base_blockwise_quantization.py b/llmc/compression/quantization/base_blockwise_quantization.py
@@ -21,10 +21,13 @@
 
 if is_fp8_supported_gpu():
     from .kernel import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import kernel successful.')
+    logger.info('Successfully imported Triton kernel.')
 else:
     from .quant import weight_cast_to_bf16, weight_cast_to_fp8
-    logger.info('import quant successful.')
+    logger.info(
+        'Triton kernel not available: non-Hopper GPU detected.\n'
+        'Using LLMC Quantizer implementation instead.'
+    )
 
 from .hadamard_utils import apply_exact_had_to_linear, get_hadK
 from .module_utils import (_LLMC_LINEAR_TYPES_, _LLMC_LN_TYPES_,