Apply mask_as_input and transpose_kv_cache flags to OpenELM and AMD Llama

protobird-git · copybara-github · commit 8e53f948f110 · 2025-05-12T11:10:07.000-07:00
- It's to make them compatible to other models
- OpenELM is working on CPU regardless of this CL or flags
- OpenELM is NOT working on GPU regardless of this CL or flags
- AMD Llama is NOT working regardless of this CL or flags

PiperOrigin-RevId: 757836926
diff --git a/ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py b/ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py
@@ -49,7 +49,9 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=2048,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
diff --git a/ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py b/ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py
@@ -21,7 +21,6 @@
 from ai_edge_torch.generative.utilities import export_config
 
 flags = converter.define_conversion_flags("amd-llama-135m")
-ExportConfig = export_config.ExportConfig
 
 
 def main(_):
@@ -35,7 +34,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )
 
 
diff --git a/ai_edge_torch/generative/examples/openelm/convert_to_tflite.py b/ai_edge_torch/generative/examples/openelm/convert_to_tflite.py
@@ -21,7 +21,6 @@
 from ai_edge_torch.generative.utilities import export_config
 
 flags = converter.define_conversion_flags("openelm")
-ExportConfig = export_config.ExportConfig
 
 
 def main(_):
@@ -35,7 +34,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )
 
 
diff --git a/ai_edge_torch/generative/examples/openelm/openelm.py b/ai_edge_torch/generative/examples/openelm/openelm.py
@@ -51,7 +51,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
     The model config for an OpenELM model.
   """
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, enable_hlfb=True
   )
   num_heads = [12] * 4 + [16] * 14 + [20] * 12 + [24] * 6
   num_query_groups = [3] * 4 + [4] * 14 + [5] * 12 + [6] * 6

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:`
`51`	`51`	`The model config for an OpenELM model.`
`52`	`52`	`"""`
`53`	`53`	`norm_config = cfg.NormalizationConfig(`
`54`		`- type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6`
	`54`	`+ type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, enable_hlfb=True`
`55`	`55`	`)`
`56`	`56`	`num_heads = [12] * 4 + [16] * 14 + [20] * 12 + [24] * 6`
`57`	`57`	`num_query_groups = [3] * 4 + [4] * 14 + [5] * 12 + [6] * 6`