Support Gemma 2 loading from Hugging Face

talumbau · copybara-github · commit 46aa45682c07 · 2025-01-29T18:47:59.000-08:00
PiperOrigin-RevId: 721195210
diff --git a/ai_edge_torch/generative/examples/experimental/gemma/convert_gemma2_gpu_to_tflite.py b/ai_edge_torch/generative/examples/experimental/gemma/convert_gemma2_gpu_to_tflite.py
@@ -38,7 +38,7 @@
 )
 _OUTPUT_NAME_PREFIX = flags.DEFINE_string(
     'output_name_prefix',
-    'gemma2',
+    'gemma2_gpu',
     'The prefix of the output tflite model name.',
 )
 _PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
diff --git a/ai_edge_torch/generative/examples/experimental/gemma/gemma2_gpu.py b/ai_edge_torch/generative/examples/experimental/gemma/gemma2_gpu.py
@@ -50,6 +50,21 @@
     lm_head=None,
 )
 
+ALT_TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    ff_up_proj="model.layers.{}.mlp.up_proj",
+    ff_down_proj="model.layers.{}.mlp.down_proj",
+    ff_gate_proj="model.layers.{}.mlp.gate_proj",
+    attn_query_proj="model.layers.{}.self_attn.q_proj",
+    attn_key_proj="model.layers.{}.self_attn.k_proj",
+    attn_value_proj="model.layers.{}.self_attn.v_proj",
+    attn_output_proj="model.layers.{}.self_attn.o_proj",
+    pre_attn_norm="model.layers.{}.input_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
+    pre_ff_norm="model.layers.{}.pre_feedforward_layernorm",
+    post_ff_norm="model.layers.{}.post_feedforward_layernorm",
+    embedding="model.embed_tokens",
+    final_norm="model.norm",
+)
 
 class Gemma2Block(attention.TransformerBlock):
 
@@ -289,9 +304,18 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
 
 
 def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_model_config_2b(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Gemma2,
-  )
+  try:
+    return model_builder.build_decoder_only_model(
+        checkpoint_path=checkpoint_path,
+        config=get_model_config_2b(**kwargs),
+        tensor_names=TENSOR_NAMES,
+        model_class=Gemma2,
+    )
+  except KeyError as ke:
+    # Also attempt to load with an alternative naming scheme.
+    return model_builder.build_decoder_only_model(
+        checkpoint_path=checkpoint_path,
+        config=get_model_config_2b(**kwargs),
+        tensor_names=ALT_TENSOR_NAMES,
+        model_class=Gemma2,
+    )

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@`
`38`	`38`	`)`
`39`	`39`	`_OUTPUT_NAME_PREFIX = flags.DEFINE_string(`
`40`	`40`	`'output_name_prefix',`
`41`		`- 'gemma2',`
	`41`	`+ 'gemma2_gpu',`
`42`	`42`	`'The prefix of the output tflite model name.',`
`43`	`43`	`)`
`44`	`44`	`_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(`