Enable QAT for static llama definition (pytorch#13285)

navsud · facebook-github-bot · commit 6bae1aec1e6e · 2025-08-11T14:52:31.000-07:00
Summary:

The model needed small modifications to be able to run QAT on GPUs.

Reviewed By: YIWENX14

Differential Revision: D79841467
diff --git a/examples/models/llama/rope.py b/examples/models/llama/rope.py
@@ -47,9 +47,10 @@ def precompute_freqs_cis(
     use_scaled: bool = False,
     scale_factor: Optional[int] = None,
     high_freq_factor: int = 4,
+    device: torch.device = torch.device("cpu"),
 ):
     freqs = 1.0 / (
-        theta ** (torch.arange(0, dim, 2, device="cpu")[: (dim // 2)].float() / dim)
+        theta ** (torch.arange(0, dim, 2, device=device)[: (dim // 2)].float() / dim)
     )
     t = torch.arange(end, device=freqs.device)  # pyre-ignore
     if use_scaled: