Debug

jackzhxng · jackzhxng · commit 2ced59e05da4 · 2025-02-19T09:29:18.000-08:00
diff --git a/examples/models/llama/attention.py b/examples/models/llama/attention.py
@@ -221,6 +221,8 @@ def forward(
         bsz, seqlen, _ = x.shape
 
         # QKV
+        # breakpoint()
+        print("Calculating q_proj")
         q, k, v = self.wq(x), self.wk(x), self.wv(x)
         # We need view_copy elimination
         q = q.view(bsz, seqlen, self.n_local_heads, self.head_dim)
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -620,7 +620,11 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
     )
 
     quantized = torch.load("/home/jackzhxng/torchrepos/executorch/fake_quantized_weights.pt")
-    breakpoint()
+    # Test run the model to trace.
+    edge_manager.model(
+        torch.tensor([[2, 3, 4]], dtype=torch.long),
+        {"input_pos": torch.tensor([0], dtype=torch.long)},
+    )
     # torch.testing.assert_close()
 
     # We want to do compute the actual ops in the precision of the dtype_override.