Update rope to support QAT on GPU (#14619)

navsud · facebook-github-bot · commit b04778f3a323 · 2025-10-01T17:14:44.000-07:00
Summary: Pull Request resolved: #14619 As part of enabling QAT for HTP model, we need to run QAT on the model that we use during export. Currently Rope is explicitly hardcoded to "cpu". This change enables us to create rope params on "cuda" if it is run on GPU machine. Differential Revision: D82239525
diff --git a/examples/models/llama/rope.py b/examples/models/llama/rope.py
@@ -47,8 +47,8 @@ def precompute_freqs_cis(
     use_scaled: bool = False,
     scale_factor: Optional[int] = None,
     high_freq_factor: int = 4,
-    device: Union[str, torch.device] = "cpu",
 ):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     freqs = 1.0 / (
         theta ** (torch.arange(0, dim, 2, device=device)[: (dim // 2)].float() / dim)
     )