Update rope to support QAT on GPU (#14619)

navsud · facebook-github-bot · commit 37ee08748ba6 · 2025-10-01T18:13:37.000-07:00
Summary:

As part of enabling QAT for HTP model, we need to run QAT on the model that we use during export. Currently Rope is explicitly hardcoded to "cpu". This change enables us to create rope params on "cuda" if it is run on GPU machine.

Differential Revision: D82239525
diff --git a/examples/models/llama/rope.py b/examples/models/llama/rope.py
@@ -9,7 +9,7 @@
 
 import math
 from functools import partial
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple
 
 import torch
 from executorch.examples.models.llama.model_args import ModelArgs
@@ -47,8 +47,8 @@ def precompute_freqs_cis(
     use_scaled: bool = False,
     scale_factor: Optional[int] = None,
     high_freq_factor: int = 4,
-    device: Union[str, torch.device] = "cpu",
 ):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     freqs = 1.0 / (
         theta ** (torch.arange(0, dim, 2, device=device)[: (dim // 2)].float() / dim)
     )