Add DeepSeek-R1-Distill-Qwen as an example

ai-edge-bot · copybara-github · commit 42eb2ef89f6a · 2025-01-24T15:38:38.000-08:00
- It's based on Qwen with some slight changes including a separate lm_head weights.

PiperOrigin-RevId: 719455977
diff --git a/ai_edge_torch/generative/examples/README.md b/ai_edge_torch/generative/examples/README.md
@@ -52,6 +52,10 @@ same architecture as SmolLM but it has been trained on improved training data.
 Alibaba's [Qwen 2.5](https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e)
 0.5B, 1B, 3B modes are also provided as examples.
 
+## DeepSeek
+[DeepSeek-R1 distilled](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)
+model based on Qwen is also provided as an example.
+
 ## AMD-Llama-135m
 
 [AMD-Llama-135m](https://huggingface.co/amd/AMD-Llama-135m) is a 135M parameter model based on the Llama2 architecture and uses the same tokenizer as Llama2. It was trained on AMD Instinct MI250 accelerators. 
diff --git a/ai_edge_torch/generative/examples/deepseek/__init__.py b/ai_edge_torch/generative/examples/deepseek/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
diff --git a/ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py b/ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py
@@ -0,0 +1,80 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Example of converting DeepSeek R1 distilled models to tflite model."""
+
+import os
+import pathlib
+
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.deepseek import deepseek
+from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
+
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/deepseek'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_OUTPUT_PATH = flags.DEFINE_string(
+    'output_path',
+    '/tmp/',
+    'The path to export the tflite model.',
+)
+_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
+    'output_name_prefix',
+    'deepseek',
+    'The prefix of the output tflite model name.',
+)
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+_LORA_RANKS = flags.DEFINE_multi_integer(
+    'lora_ranks',
+    None,
+    'If set, the model will be converted with the provided list of LoRA ranks.',
+)
+
+
+def main(_):
+  pytorch_model = deepseek.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  converter.convert_to_tflite(
+      pytorch_model,
+      output_path=_OUTPUT_PATH.value,
+      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
+      quantize=_QUANTIZE.value,
+      lora_ranks=_LORA_RANKS.value,
+      export_config=ExportConfig(),
+  )
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/ai_edge_torch/generative/examples/deepseek/deepseek.py b/ai_edge_torch/generative/examples/deepseek/deepseek.py
@@ -0,0 +1,92 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Example of building DeepSeek R1 distilled models."""
+
+import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
+
+TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
+
+
+class DeepSeekDistillQwen(model_builder.DecoderOnlyModel):
+  """A DeepSeek distilled model based on Qwen."""
+  pass
+
+
+def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 2.5 3B model.
+
+  Args:
+    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
+      is 1024.
+
+  Returns:
+    The model config for a SmolLM model.
+  """
+  attn_config = cfg.AttentionConfig(
+      num_heads=12,
+      head_dim=128,
+      num_query_groups=2,
+      rotary_base=10000,
+      rotary_percentage=1.0,
+      qkv_use_bias=True,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+      intermediate_size=8960,
+  )
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM,
+      epsilon=1e-06,
+  )
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=151936,
+      num_layers=28,
+      max_seq_len=4096,
+      embedding_dim=1536,
+      kv_cache_max_len=kv_cache_max_len,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+      lm_head_share_weight_with_embedding=False,
+      enable_hlfb=True,
+  )
+  return config
+
+
+def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
+  config = get_model_config(**kwargs)
+  config.vocab_size = 128
+  config.num_layers = 2
+  # DeepSeek-R1-Distill-Qwen has only one block config.
+  config.block_config(0).ff_config.intermediate_size = 64
+  return config
+
+
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=DeepSeekDistillQwen,
+  )
diff --git a/ai_edge_torch/generative/examples/deepseek/verify.py b/ai_edge_torch/generative/examples/deepseek/verify.py
@@ -0,0 +1,70 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Verifies the reauthored DeepSeek R1 distilled 1.5B model."""
+
+import logging
+import pathlib
+
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.deepseek import deepseek
+from ai_edge_torch.generative.utilities import transformers_verifier
+from ai_edge_torch.generative.utilities import verifier
+import transformers
+
+
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+
+
+def main(_):
+  checkpoint = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+  logging.info("Loading the original model from: %s", checkpoint)
+  original_model = transformers.AutoModelForCausalLM.from_pretrained(checkpoint)
+
+  # Locate the cached dir.
+  cached_config_file = transformers.utils.cached_file(
+      checkpoint, transformers.utils.CONFIG_NAME
+  )
+  reauthored_checkpoint = pathlib.Path(cached_config_file).parent
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
+  reauthored_model = deepseek.build_model(reauthored_checkpoint)
+
+  logging.info("Loading the tokenizer from: %s", checkpoint)
+  tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)
+
+  verifier.verify_reauthored_model(
+      original_model=transformers_verifier.TransformersModelWrapper(
+          original_model
+      ),
+      reauthored_model=verifier.ReauthoredModelWrapper(reauthored_model),
+      tokenizer=verifier.TokenizerWrapper(tokenizer),
+      generate_prompts=_PROMPTS.value,
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+      atol=1e-04,
+  )
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/ai_edge_torch/generative/test/test_model_conversion_large.py b/ai_edge_torch/generative/test/test_model_conversion_large.py
@@ -17,6 +17,7 @@
 
 import ai_edge_torch
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
+from ai_edge_torch.generative.examples.deepseek import deepseek
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.examples.llama import llama
@@ -150,16 +151,15 @@ def test_smollm(self):
       ai_edge_torch.config.in_oss,
       reason="tests with custom ops are not supported in oss",
   )
-
   def test_smollm2(self):
     config = smollm.get_fake_model_config_v2()
     pytorch_model = smollm.SmolLM2(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+
   @googletest.skipIf(
       ai_edge_torch.config.in_oss,
       reason="tests with custom ops are not supported in oss",
   )
-
   def test_openelm(self):
     config = openelm.get_fake_model_config()
     pytorch_model = openelm.OpenELM(config).eval()
@@ -174,6 +174,15 @@ def test_qwen(self):
     pytorch_model = qwen.Qwen(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
 
+  @googletest.skipIf(
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
+  )
+  def test_deepseek(self):
+    config = deepseek.get_fake_model_config()
+    pytorch_model = deepseek.DeepSeekDistillQwen(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5)
+
   @googletest.skipIf(
       ai_edge_torch.config.in_oss,
       reason="tests with custom ops are not supported in oss",