diff --git a/examples/pytorch/diffusion_model/diffusers/framepack/README.md b/examples/pytorch/diffusion_model/diffusers/framepack/README.md
new file mode 100644
index 00000000000..042b8e109bc
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/framepack/README.md
@@ -0,0 +1,57 @@
+# Step-by-Step
+
+This example quantizes and validates the accuracy of Flux.
+
+# Prerequisite
+
+## 1. Environment
+
+```shell
+# install zip according to your system
+sudo apt update && sudo apt install zip
+
+pip install -r requirements.txt
+pip install --update neural-compressor-pt
+pip install --update auto-round
+git clone --depth 1 https://github.com/lllyasviel/FramePack.git
+cp -r FramePack/diffusers_helper/ .
+
+# several models will be downloaded automatically into HF_HOME
+export HF_HOME=/path/to/save/model
+```
+
+## 2. Prepare Dataset
+
+```shell
+git clone --depth 1 https://github.com/Vchitect/VBench.git
+cd VBench
+sh vbench2_beta_i2v/download_data.sh
+```
+
+# Run
+
+## BF16
+
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+bash run_benchmark.sh \
+    --topology=BF16 \
+    --dataset_location=/path/to/VBench \
+    --output_video_path=bf16_video \
+    --dimension_list=subject_consistency i2v_background \
+    --result_path=bf16_result
+```
+
+## MXFP8 or FP8 
+
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+bash run_benchmark.sh \
+    --topology=MXFP8 \ # or FP8
+    --dataset_location=/path/to/VBench \
+    --output_video_path=mxfp8_video \
+    --dimension_list=subject_consistency i2v_background \
+    --result_path=mxfp8_result
+```
+
+- CUDA_VISIBLE_DEVICES: distribute the dimension_list to different visible GPUs to speed up the evaluation
diff --git a/examples/pytorch/diffusion_model/diffusers/framepack/main.py b/examples/pytorch/diffusion_model/diffusers/framepack/main.py
new file mode 100644
index 00000000000..51584f2133e
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/framepack/main.py
@@ -0,0 +1,347 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import sys
+import argparse
+
+import torch
+
+from neural_compressor.torch.quantization import (
+    AutoRoundConfig,
+    convert,
+    prepare,
+)
+from PIL import Image
+from diffusers import AutoencoderKLHunyuanVideo
+from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
+from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
+from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
+from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
+from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
+from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete
+from transformers import SiglipImageProcessor, SiglipVisionModel
+from diffusers_helper.clip_vision import hf_clip_vision_encode
+from diffusers_helper.bucket_tools import find_nearest_bucket
+import torch
+from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
+from auto_round import AutoRound
+import json
+import torchvision
+import torch
+import einops
+import numpy as np
+import argparse
+
+
+parser = argparse.ArgumentParser(
+    description="FramePack quantization.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
+parser.add_argument("--scheme", default="MXFP8", type=str, help="quantizaion scheme.")
+parser.add_argument("--quantize", action="store_true")
+parser.add_argument("--inference", action="store_true")
+parser.add_argument("--output_dir", "--quantized_model_path", default="./tmp_autoround", type=str, help="the directory to save quantized model")
+parser.add_argument("--dataset_location", type=str, help="path of cloned VBench repository which contains images and prompts for evaluation")
+parser.add_argument("--output_video_path", default="./tmp_video", type=str, help="the directory to save generated videos")
+parser.add_argument("--limit", default=-1, type=int, help="limit the number of prompts for evaluation")
+parser.add_argument("--seed", default=31337, type=int, help="random seed")
+parser.add_argument("--total_second_length", default=5, type=int, help="length of generated video")
+parser.add_argument("--latent_window_size", default=9, type=int)
+parser.add_argument("--steps", default=25, type=float, help="number of inference step")
+parser.add_argument("--cfg", default=1.0, type=float, help="real guidance scale")
+parser.add_argument("--gs", default=10.0, type=float, help="distilled guidance scale")
+parser.add_argument("--rs", default=0.0, type=float, help="guidance rescale")
+parser.add_argument("--gpu_memory_preservation", default=6, type=int)
+parser.add_argument("--use_teacache", action="store_true", help="faster speed, but often makes hands and fingers slightly worse")
+parser.add_argument("--mp4_crf", default=16, type=int, help="MP4 compression. Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs.")
+parser.add_argument(
+    "--dimension_list",
+    nargs="+",
+    choices=["subject_consistency", "background_consistency", "motion_smoothness", "dynamic_degree", "aesthetic_quality", "imaging_quality", "i2v_subject", "i2v_background", "camera_motion"],
+    help="list of evaluation dimensions, usage: --dimension_list <dim_1> <dim_2>",
+)
+parser.add_argument("--ratio", default="16-9", type=str, help="aspect ratio of image")
+
+args = parser.parse_args()
+free_mem_gb = get_cuda_free_memory_gb(gpu)
+high_vram = free_mem_gb > 60
+
+@torch.no_grad()
+def worker(input_image, prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
+    input_image = Image.open(input_image).convert("RGB")
+    input_image = np.array(input_image)
+    total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
+    total_latent_sections = int(max(round(total_latent_sections), 1))
+
+    # Clean GPU
+    if not high_vram:
+        unload_complete_models(
+            text_encoder, text_encoder_2, image_encoder, vae, transformer
+        )
+
+    # Text encoding
+
+    if not high_vram:
+        fake_diffusers_current_device(text_encoder, gpu)  # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
+        load_model_as_complete(text_encoder_2, target_device=gpu)
+
+    llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
+
+    llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
+
+    llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
+    llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
+    # Processing input image
+
+    H, W, C = input_image.shape
+    height, width = find_nearest_bucket(H, W, resolution=640)
+    input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
+
+    input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
+    input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
+
+    # VAE encoding
+
+    if not high_vram:
+        load_model_as_complete(vae, target_device=gpu)
+
+    start_latent = vae_encode(input_image_pt, vae)
+
+    # CLIP Vision
+
+    if not high_vram:
+        load_model_as_complete(image_encoder, target_device=gpu)
+
+    image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
+    image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
+
+    # Dtype
+
+    llama_vec = llama_vec.to(transformer.dtype)
+    llama_vec_n = llama_vec_n.to(transformer.dtype)
+    clip_l_pooler = clip_l_pooler.to(transformer.dtype)
+    clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
+    image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
+
+    # Sampling
+
+    rnd = torch.Generator("cpu").manual_seed(seed)
+    num_frames = latent_window_size * 4 - 3
+
+    history_latents = torch.zeros(size=(1, 16, 1 + 2 + 16, height // 8, width // 8), dtype=torch.float32).cpu()
+    history_pixels = None
+    total_generated_latent_frames = 0
+
+    latent_paddings = reversed(range(total_latent_sections))
+
+    if total_latent_sections > 4:
+        # In theory the latent_paddings should follow the above sequence, but it seems that duplicating some
+        # items looks better than expanding it when total_latent_sections > 4
+        # One can try to remove below trick and just
+        # use `latent_paddings = list(reversed(range(total_latent_sections)))` to compare
+        latent_paddings = [3] + [2] * (total_latent_sections - 3) + [1, 0]
+
+    for latent_padding in latent_paddings:
+        is_last_section = latent_padding == 0
+        latent_padding_size = latent_padding * latent_window_size
+
+        print(f"latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}")
+
+        indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0)
+        clean_latent_indices_pre, blank_indices, latent_indices, clean_latent_indices_post, clean_latent_2x_indices, clean_latent_4x_indices = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
+        clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1)
+
+        clean_latents_pre = start_latent.to(history_latents)
+        clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
+        clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
+
+        if not high_vram:
+            unload_complete_models()
+            move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
+
+
+        if use_teacache:
+            transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
+        else:
+            transformer.initialize_teacache(enable_teacache=False)
+
+        def callback(d):
+            preview = d["denoised"]
+            preview = vae_decode_fake(preview)
+
+            preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
+            preview = einops.rearrange(preview, "b c t h w -> (b h) (t w) c")
+
+            current_step = d["i"] + 1
+            hint = f"Sampling {current_step}/{steps}"
+            desc = f"Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30) :.2f} seconds (FPS-30). The video is being extended now ..."
+            print(hint, desc)
+            return
+
+        generated_latents = sample_hunyuan(
+            transformer=transformer,
+            sampler="unipc",
+            width=width,
+            height=height,
+            frames=num_frames,
+            real_guidance_scale=cfg,
+            distilled_guidance_scale=gs,
+            guidance_rescale=rs,
+            # shift=3.0,
+            num_inference_steps=steps,
+            generator=rnd,
+            prompt_embeds=llama_vec,
+            prompt_embeds_mask=llama_attention_mask,
+            prompt_poolers=clip_l_pooler,
+            negative_prompt_embeds=llama_vec_n,
+            negative_prompt_embeds_mask=llama_attention_mask_n,
+            negative_prompt_poolers=clip_l_pooler_n,
+            device=gpu,
+            dtype=torch.bfloat16,
+            image_embeddings=image_encoder_last_hidden_state,
+            latent_indices=latent_indices,
+            clean_latents=clean_latents,
+            clean_latent_indices=clean_latent_indices,
+            clean_latents_2x=clean_latents_2x,
+            clean_latent_2x_indices=clean_latent_2x_indices,
+            clean_latents_4x=clean_latents_4x,
+            clean_latent_4x_indices=clean_latent_4x_indices,
+            callback=callback,
+        )
+        if is_last_section:
+            generated_latents = torch.cat([start_latent.to(generated_latents), generated_latents], dim=2)
+
+        total_generated_latent_frames += int(generated_latents.shape[2])
+        history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
+
+        if not high_vram:
+            offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
+            load_model_as_complete(vae, target_device=gpu)
+
+        real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
+
+        if history_pixels is None:
+            history_pixels = vae_decode(real_history_latents, vae).cpu()
+        else:
+            section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
+            overlapped_frames = latent_window_size * 4 - 3
+
+            current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
+            history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
+
+        if not high_vram:
+            unload_complete_models()
+
+        print(f"Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}")
+
+        if is_last_section:
+            break
+    return history_pixels
+
+if __name__ == "__main__":
+    transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained("lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16).cpu()
+    transformer.to(dtype=torch.bfloat16)
+    transformer.requires_grad_(False)
+    transformer.eval()
+
+    if args.quantize:
+        setattr(transformer, "name_or_path", "lllyasviel/FramePackI2V_HY")
+
+        qconfig = AutoRoundConfig(
+            scheme=args.scheme,
+            iters=0,
+            export_format="fake",
+            output_dir=args.output_dir,
+        )
+        transformer = prepare(transformer, qconfig)
+        transformer = convert(transformer, qconfig)
+
+    if args.inference:
+        text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder", torch_dtype=torch.float16).cpu()
+        text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder_2", torch_dtype=torch.float16).cpu()
+        tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer")
+        tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2")
+        vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16).cpu()
+
+        feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="feature_extractor")
+        image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16).cpu()
+
+        vae.eval()
+        text_encoder.eval()
+        text_encoder_2.eval()
+        image_encoder.eval()
+
+        if not high_vram:
+            vae.enable_slicing()
+            vae.enable_tiling()
+
+        transformer.high_quality_fp32_output_for_inference = True
+        print("transformer.high_quality_fp32_output_for_inference = True")
+
+        vae.to(dtype=torch.float16)
+        image_encoder.to(dtype=torch.float16)
+        text_encoder.to(dtype=torch.float16)
+        text_encoder_2.to(dtype=torch.float16)
+
+        vae.requires_grad_(False)
+        text_encoder.requires_grad_(False)
+        text_encoder_2.requires_grad_(False)
+        image_encoder.requires_grad_(False)
+
+        if not high_vram:
+            # DynamicSwapInstaller is same as huggingface"s enable_sequential_offload but 3x faster
+            DynamicSwapInstaller.install_model(text_encoder, device=gpu)
+            DynamicSwapInstaller.install_model(transformer, device=gpu)
+        else:
+            text_encoder.to(gpu)
+            text_encoder_2.to(gpu)
+            image_encoder.to(gpu)
+            vae.to(gpu)
+            transformer.to(gpu)
+
+        if not os.path.exists(args.output_video_path):
+            os.makedirs(args.output_video_path)
+
+        idx = 0
+        for dimension in args.dimension_list:
+            # prepare inputs
+
+            image_folder = os.path.join(args.dataset_location, f"vbench2_beta_i2v/data/crop/{args.ratio}")
+            info_list = json.load(open(os.path.join(args.dataset_location, "vbench2_beta_i2v/vbench2_i2v_full_info.json"), "r"))
+            inputs = [(os.path.join(image_folder, info["image_name"]), info["prompt_en"]) for info in info_list if dimension in info["dimension"]]
+            for image_path, prompt in inputs:
+                idx += 1
+                if args.limit > 0 and idx >= args.limit:
+                    break
+
+                # only sample 1 video for each prompt to evaluate quickly
+                cur_save_path = f"{args.output_video_path}/{prompt}-0.mp4"
+
+                if os.path.exists(cur_save_path):
+                    continue
+                # perform sampling
+                x = worker(image_path, prompt, args.seed, args.total_second_length, args.latent_window_size, args.steps, args.cfg, args.gs, args.rs, args.gpu_memory_preservation, args.use_teacache, args.mp4_crf)
+                b, c, t, h, w = x.shape
+
+                per_row = b
+                for p in [6, 5, 4, 3, 2]:
+                    if b % p == 0:
+                        per_row = p
+                        break
+
+                x = torch.clamp(x.float(), -1., 1.) * 127.5 + 127.5
+                x = x.detach().cpu().to(torch.uint8)
+                video = einops.rearrange(x, "(m n) c t h w -> t (m h) (n w) c", n=per_row)
+                torchvision.io.write_video(cur_save_path, video, fps=30, video_codec="h264", options={"crf": "10"})
diff --git a/examples/pytorch/diffusion_model/diffusers/framepack/requirements.txt b/examples/pytorch/diffusion_model/diffusers/framepack/requirements.txt
new file mode 100644
index 00000000000..06e80a0af7f
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/framepack/requirements.txt
@@ -0,0 +1,35 @@
+Pillow
+matplotlib
+timm>=0.9,<=1.0.12
+wheel
+cython
+tensorboard
+scipy
+scikit-learn
+scikit-image
+openai-clip
+decord
+requests
+pyyaml
+easydict
+pyiqa
+lvis
+fairscale>=0.4.4
+fvcore
+easydict
+urllib3
+boto3
+omegaconf
+transformers
+pycocoevalcap
+detectron2@git+https://github.com/facebookresearch/detectron2.git
+accelerate
+diffusers
+sentencepiece==0.2.0
+av==12.1.0
+torchsde==0.2.6
+einops
+safetensors
+opencv-python-headless
+dreamsim
+numpy<2.0.0
diff --git a/examples/pytorch/diffusion_model/diffusers/framepack/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/framepack/run_benchmark.sh
new file mode 100644
index 00000000000..71dc61b38d9
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/framepack/run_benchmark.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  for var in "$@"
+  do
+    case $var in
+      --topology=*)
+          topology=$(echo $var |cut -f2 -d=)
+      ;;
+      --dataset_location=*)
+          dataset_location=$(echo $var |cut -f2 -d=)
+      ;;
+      --ratio=*)
+          ratio=$(echo $var |cut -f2 -d=)
+      ;;
+      --input_model=*)
+          input_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --limit=*)
+          limit=$(echo $var |cut -f2 -d=)
+      ;;
+      --output_video_path=*)
+          output_video_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --result_path=*)
+          result_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --dimension_list=*)
+          dimension_list=$(echo $var |cut -f2 -d=)
+      ;;
+      *)
+          echo "Error: No such parameter: ${var}"
+          exit 1
+      ;;
+    esac
+  done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+    limit=${limit:=-1}
+    ratio=${ratio:="16-9"}
+    output_video_path=${output_video_path:="./tmp_videos"}
+    result_path=${result_path:="./eval_result"}
+
+    if [[ ! "${result_path}" = /* ]]; then
+        result_path=$(realpath -s "$(pwd)/$result_path")
+    fi
+
+    if [[ ! "${output_video_path}" = /* ]]; then
+        output_video_path=$(realpath -s "$(pwd)/$output_video_path")
+    fi
+
+    if [ "${topology}" = "FP8" ]; then
+        extra_cmd="--scheme FP8 --quantize --inference"
+    elif [ "${topology}" = "MXFP8" ]; then
+        extra_cmd="--scheme MXFP8 --quantize --inference"
+    elif [ "${topology}" = "BF16" ]; then
+        extra_cmd="--inference"
+    fi
+
+    if [ -n "$CUDA_VISIBLE_DEVICES" ]; then
+        gpu_list="${CUDA_VISIBLE_DEVICES:-}"
+        IFS=',' read -ra gpu_ids <<< "$gpu_list"
+        visible_gpus=${#gpu_ids[@]}
+        echo "visible_gpus: ${visible_gpus}"
+
+        IFS=' ' read -ra dimensions <<< "$dimension_list"
+        dimension_num=${#dimensions[@]}
+        if [ "${visible_gpus}" -gt "${dimension_num}" ]; then
+            count=${dimension_num}
+	        step=1
+        else
+            count=${visible_gpus}
+	        step=$((dimension_num/visible_gpus))
+            left=${dimensions[@]:step*count-1:dimension_num}
+            dimensions=("${dimensions[@]:0:step*count-1}" "$left")
+        fi
+
+        for ((i=0; i<count; i++)); do
+            export CUDA_VISIBLE_DEVICES=${gpu_ids[i]}
+            python3 main.py \
+                --output_video_path ${output_video_path} \
+                --dataset_location ${dataset_location} \
+                --ratio ${ratio} \
+                --limit ${limit} \
+                --dimension_list ${dimensions[@]:i*step:(i+1)*step} \
+                ${extra_cmd} &
+            program_pid+=($!)
+            echo "Start (PID: ${program_pid[-1]}, GPU: ${i})"
+        done
+        wait "${program_pid[@]}"
+    else
+        python3 main.py \
+            --output_video_path ${output_video_path} \
+            --dataset_location ${dataset_location} \
+            --limit ${limit} \
+            --ratio ${ratio} \
+            --dimension_list ${dimension_list} \
+            ${extra_cmd}
+    fi
+
+    echo "Start calculating final score..."
+    cd ${dataset_location}
+    output=$(python evaluate_i2v.py \
+        --videos_path ${output_video_path} \
+        --dimension ${dimension_list} \
+        --output_path ${result_path} \
+        --ratio ${ratio} 2>&1)
+    result_file=$(echo "$output" | grep -i "Evaluation results saved to " | awk '{print $NF}')
+
+    echo "Evaluation results saved to ${result_file}"
+    zip -r "${result_path}.zip" ${result_path}
+    python scripts/cal_i2v_final_score.py --zip_file "${result_path}.zip" --model_name "framepack"
+
+}
+
+main "$@"
+
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index 5fa3b253cfa..2342f9f5b84 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -268,7 +268,6 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
         if tokenizer is not None:
             delattr(model.orig_model, "tokenizer")
         elif pipe is None:
-            tokenizer = "Placeholder"
             self.dataset = CapturedDataloader(model.args_list, model.kwargs_list)
         model = model.orig_model
         if pipe is not None: