diff --git a/examples/qualcomm/oss_scripts/llama/TARGETS b/examples/qualcomm/oss_scripts/llama/TARGETS
index 264854d9bfc..09a2948f3a0 100644
--- a/examples/qualcomm/oss_scripts/llama/TARGETS
+++ b/examples/qualcomm/oss_scripts/llama/TARGETS
@@ -26,6 +26,7 @@ python_library(
         "//executorch/devtools/backend_debug:delegation_info",
         "//executorch/devtools:lib",
         "//executorch/examples/models:models",
+        "//executorch/examples/models/llama:hf_download",
         "//executorch/examples/qualcomm/oss_scripts/llama:static_llama",
         "//executorch/examples/qualcomm:utils",
         "//executorch/extension/export_util:export_util",
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
index 92fb12c799f..b37dc75dc39 100755
--- a/examples/qualcomm/oss_scripts/llama/llama.py
+++ b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -350,24 +350,15 @@ def compile(args, pte_filename, tokenizer):
     start_ts = time.time()
 
     kv_config, prefill_config = None, None
-    params_path = ""
     if args.params:
-        params_path = args.params
-    else:
-        if args.decoder_model == "qwen2_5":
-            cur_dir = os.path.dirname(__file__)
-            params_path = os.path.join(
-                cur_dir,
-                "..",
-                "..",
-                "..",
-                "models",
-                "qwen2_5",
-                "config",
-                "0_5b_config.json",
-            )
-    with open(params_path) as f:
-        kv_config = ModelArgs(**json.load(f))
+        with open(args.params) as f:
+            kv_config = ModelArgs(**json.load(f))
+    elif args.decoder_model == "qwen2_5":
+        from importlib.resources import files
+
+        data_dir = files("executorch").joinpath("examples/models/qwen2_5/config")
+        config_file = data_dir.joinpath("0_5b_config.json")
+        kv_config = ModelArgs(**json.loads(config_file.read_text()))
 
     # TODO: support batch inputs if necessary
     kv_config.max_batch_size = 1
@@ -505,7 +496,7 @@ def permute(w, heads):
             apply_spinquant(
                 model,
                 use_r1=True,
-                use_r2=True,
+                use_r2=False,
                 use_r4=False,
                 pretrained_rotation_path=None,
                 qkv_split=True,