add no_rope_layer_interval into config

DannyYuyang-quic · DannyYuyang-quic · commit 4f3d12ec1ac9 · 2025-09-12T00:55:58.000+08:00
diff --git a/examples/models/llama/model_args.py b/examples/models/llama/model_args.py
@@ -78,6 +78,9 @@ class ModelArgs:
     use_qk_norm: bool = False  # apply normalization to q and k in the attention
     qk_norm_before_rope: bool = False  # when to apply qk norm
     use_hf_rope: bool = False  # Use HuggingFace's RoPE implementation
+    no_rope_layer_interval: Optional[int] = (
+        None  # Interval at which to skip RoPE. From Rope to Nope and Back Again: A New Hybrid Attention Strategy (https://huggingface.co/papers/2501.18795).
+    )
     partial_rotary_factor: float = 1.0
     rope_theta: Optional[float] = (
         None  # The official name to override self.rope_freq_base.
diff --git a/examples/models/smollm3/3b_config.json b/examples/models/smollm3/3b_config.json
@@ -10,5 +10,6 @@
     "use_scaled_rope": false,
     "vocab_size": 128256,
     "use_hf_rope": false,
+    "no_rope_layer_interval": 4,
     "attention_qkv_bias": false
   }
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -445,13 +445,6 @@ def compile(
         else:
             kv_config.enable_masked_softmax = True
 
-    if args.decoder_model == "smollm3-3b":
-        from transformers import AutoConfig
-
-        kv_config.apply_rope_layers = AutoConfig.from_pretrained(
-            decoder_model_config.repo_id
-        ).no_rope_layers
-
     prefill_config = copy.copy(kv_config)
     prefill_config.use_kv_cache = (
         False if args.max_seq_len == args.prefill_ar_len else True
diff --git a/examples/qualcomm/oss_scripts/llama/model/static_llama.py b/examples/qualcomm/oss_scripts/llama/model/static_llama.py
@@ -75,9 +75,9 @@ def __init__(self, layer_idx: int, config: ModelArgs, output_new_cache_only=Fals
         self.enable_masked_softmax = getattr(config, "enable_masked_softmax", False)
         self.use_qk_norm = config.use_qk_norm
         self.qk_norm_before_rope = config.qk_norm_before_rope
-        apply_rope_layers = getattr(config, "apply_rope_layers", None)
         self.use_rope = (
-            apply_rope_layers[layer_idx] if apply_rope_layers is not None else True
+            config.no_rope_layer_interval
+            and (layer_idx + 1) % config.no_rope_layer_interval
         )
 
         if self.use_qk_norm:

Original file line number	Diff line number	Diff line change
`@@ -10,5 +10,6 @@`
`10`	`10`	`"use_scaled_rope": false,`
`11`	`11`	`"vocab_size": 128256,`
`12`	`12`	`"use_hf_rope": false,`
	`13`	`+ "no_rope_layer_interval": 4,`
`13`	`14`	`"attention_qkv_bias": false`
`14`	`15`	`}`