From af6ec27d52977513d0d3c32c80cc4fd44fa6a565 Mon Sep 17 00:00:00 2001 From: Rohan Joshi Date: Mon, 4 Aug 2025 14:19:02 -0700 Subject: [PATCH] Run Qwen script with Buck Summary: Added targets to enable running script llama.py for lowering Qwen with Buck Removed use of relative path to load Qwen config Currently disabling SpinQuant R2 since it doesn't work with Qwen (but R1 does) -- will set use_r2 based on model later Reviewed By: cccclai Differential Revision: D79386256 --- examples/qualcomm/oss_scripts/llama/TARGETS | 1 + examples/qualcomm/oss_scripts/llama/llama.py | 27 +++++++------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS index 264854d9bfc..09a2948f3a0 100644 --- a/examples/qualcomm/oss_scripts/llama/TARGETS +++ b/examples/qualcomm/oss_scripts/llama/TARGETS @@ -26,6 +26,7 @@ python_library( "//executorch/devtools/backend_debug:delegation_info", "//executorch/devtools:lib", "//executorch/examples/models:models", + "//executorch/examples/models/llama:hf_download", "//executorch/examples/qualcomm/oss_scripts/llama:static_llama", "//executorch/examples/qualcomm:utils", "//executorch/extension/export_util:export_util", diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index 92fb12c799f..b37dc75dc39 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -350,24 +350,15 @@ def compile(args, pte_filename, tokenizer): start_ts = time.time() kv_config, prefill_config = None, None - params_path = "" if args.params: - params_path = args.params - else: - if args.decoder_model == "qwen2_5": - cur_dir = os.path.dirname(__file__) - params_path = os.path.join( - cur_dir, - "..", - "..", - "..", - "models", - "qwen2_5", - "config", - "0_5b_config.json", - ) - with open(params_path) as f: - kv_config = ModelArgs(**json.load(f)) + with open(args.params) as f: + kv_config = ModelArgs(**json.load(f)) + elif args.decoder_model == "qwen2_5": + from importlib.resources import files + + data_dir = files("executorch").joinpath("examples/models/qwen2_5/config") + config_file = data_dir.joinpath("0_5b_config.json") + kv_config = ModelArgs(**json.loads(config_file.read_text())) # TODO: support batch inputs if necessary kv_config.max_batch_size = 1 @@ -505,7 +496,7 @@ def permute(w, heads): apply_spinquant( model, use_r1=True, - use_r2=True, + use_r2=False, use_r4=False, pretrained_rotation_path=None, qkv_split=True,