Run Qwen script with Buck

rohansjoshi · facebook-github-bot · commit af6ec27d5297 · 2025-08-04T14:19:02.000-07:00
Summary:
Added targets to enable running script llama.py for lowering Qwen with Buck
Removed use of relative path to load Qwen config

Currently disabling SpinQuant R2 since it doesn't work with Qwen (but R1 does) -- will set use_r2 based on model later

Reviewed By: cccclai

Differential Revision: D79386256
diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS
@@ -26,6 +26,7 @@ python_library(
         "//executorch/devtools/backend_debug:delegation_info",
         "//executorch/devtools:lib",
         "//executorch/examples/models:models",
+        "//executorch/examples/models/llama:hf_download",
         "//executorch/examples/qualcomm/oss_scripts/llama:static_llama",
         "//executorch/examples/qualcomm:utils",
         "//executorch/extension/export_util:export_util",
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -350,24 +350,15 @@ def compile(args, pte_filename, tokenizer):
     start_ts = time.time()
 
     kv_config, prefill_config = None, None
-    params_path = ""
     if args.params:
-        params_path = args.params
-    else:
-        if args.decoder_model == "qwen2_5":
-            cur_dir = os.path.dirname(__file__)
-            params_path = os.path.join(
-                cur_dir,
-                "..",
-                "..",
-                "..",
-                "models",
-                "qwen2_5",
-                "config",
-                "0_5b_config.json",
-            )
-    with open(params_path) as f:
-        kv_config = ModelArgs(**json.load(f))
+        with open(args.params) as f:
+            kv_config = ModelArgs(**json.load(f))
+    elif args.decoder_model == "qwen2_5":
+        from importlib.resources import files
+
+        data_dir = files("executorch").joinpath("examples/models/qwen2_5/config")
+        config_file = data_dir.joinpath("0_5b_config.json")
+        kv_config = ModelArgs(**json.loads(config_file.read_text()))
 
     # TODO: support batch inputs if necessary
     kv_config.max_batch_size = 1
@@ -505,7 +496,7 @@ def permute(w, heads):
             apply_spinquant(
                 model,
                 use_r1=True,
-                use_r2=True,
+                use_r2=False,
                 use_r4=False,
                 pretrained_rotation_path=None,
                 qkv_split=True,