From af6ec27d52977513d0d3c32c80cc4fd44fa6a565 Mon Sep 17 00:00:00 2001
From: Rohan Joshi <rohansjoshi@meta.com>
Date: Mon, 4 Aug 2025 14:19:02 -0700
Subject: [PATCH] Run Qwen script with Buck

Summary:
Added targets to enable running script llama.py for lowering Qwen with Buck
Removed use of relative path to load Qwen config

Currently disabling SpinQuant R2 since it doesn't work with Qwen (but R1 does) -- will set use_r2 based on model later

Reviewed By: cccclai

Differential Revision: D79386256
---
 examples/qualcomm/oss_scripts/llama/TARGETS  |  1 +
 examples/qualcomm/oss_scripts/llama/llama.py | 27 +++++++-------------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS
index 264854d9bfc..09a2948f3a0 100644
--- a/examples/qualcomm/oss_scripts/llama/TARGETS
+++ b/examples/qualcomm/oss_scripts/llama/TARGETS
@@ -26,6 +26,7 @@ python_library(
         "//executorch/devtools/backend_debug:delegation_info",
         "//executorch/devtools:lib",
         "//executorch/examples/models:models",
+        "//executorch/examples/models/llama:hf_download",
         "//executorch/examples/qualcomm/oss_scripts/llama:static_llama",
         "//executorch/examples/qualcomm:utils",
         "//executorch/extension/export_util:export_util",
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
index 92fb12c799f..b37dc75dc39 100755
--- a/examples/qualcomm/oss_scripts/llama/llama.py
+++ b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -350,24 +350,15 @@ def compile(args, pte_filename, tokenizer):
     start_ts = time.time()
 
     kv_config, prefill_config = None, None
-    params_path = ""
     if args.params:
-        params_path = args.params
-    else:
-        if args.decoder_model == "qwen2_5":
-            cur_dir = os.path.dirname(__file__)
-            params_path = os.path.join(
-                cur_dir,
-                "..",
-                "..",
-                "..",
-                "models",
-                "qwen2_5",
-                "config",
-                "0_5b_config.json",
-            )
-    with open(params_path) as f:
-        kv_config = ModelArgs(**json.load(f))
+        with open(args.params) as f:
+            kv_config = ModelArgs(**json.load(f))
+    elif args.decoder_model == "qwen2_5":
+        from importlib.resources import files
+
+        data_dir = files("executorch").joinpath("examples/models/qwen2_5/config")
+        config_file = data_dir.joinpath("0_5b_config.json")
+        kv_config = ModelArgs(**json.loads(config_file.read_text()))
 
     # TODO: support batch inputs if necessary
     kv_config.max_batch_size = 1
@@ -505,7 +496,7 @@ def permute(w, heads):
             apply_spinquant(
                 model,
                 use_r1=True,
-                use_r2=True,
+                use_r2=False,
                 use_r4=False,
                 pretrained_rotation_path=None,
                 qkv_split=True,