From aa5479e35267b4920e011d59406a5de398b98195 Mon Sep 17 00:00:00 2001 From: name Date: Wed, 1 Oct 2025 12:44:35 -0700 Subject: [PATCH 1/3] Add support for Qwen/Qwen3-0.6B-Base model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds support for the base (non-instruct) version of Qwen3-0.6B. The base model (Qwen/Qwen3-0.6B-Base) and instruct model (Qwen/Qwen3-0.6B) share the same architecture but have different weights. The base model is suitable for fine-tuning, while the instruct model is optimized for instruction-following and chat. Changes: - Added "Qwen/Qwen3-0.6B-Base" to OFFICIAL_MODEL_NAMES - Added alias "qwen3-0.6b-base" to MODEL_ALIASES 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- transformer_lens/loading_from_pretrained.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index 8bfb6315d..3bc901dba 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -236,6 +236,7 @@ "Qwen/Qwen2.5-72B-Instruct", "Qwen/QwQ-32B-Preview", "Qwen/Qwen3-0.6B", + "Qwen/Qwen3-0.6B-Base", "Qwen/Qwen3-1.7B", "Qwen/Qwen3-4B", "Qwen/Qwen3-8B", @@ -692,6 +693,7 @@ "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"], "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"], "Qwen/Qwen3-0.6B": ["qwen3-0.6b"], + "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"], "Qwen/Qwen3-1.7B": ["qwen3-1.7b"], "Qwen/Qwen3-4B": ["qwen3-4b"], "Qwen/Qwen3-8B": ["qwen3-8b"], From fc7c5b12c607a21741154b56d6b261ce00ecff9d Mon Sep 17 00:00:00 2001 From: name Date: Wed, 1 Oct 2025 13:08:25 -0700 Subject: [PATCH 2/3] Update Colab_Compatibility notebook to include Qwen3-0.6B-Base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Qwen/Qwen3-0.6B-Base to the free_compatible list in the Colab_Compatibility notebook to ensure all models in OFFICIAL_MODEL_NAMES are accounted for in the test suite. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- demos/Colab_Compatibility.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/demos/Colab_Compatibility.ipynb b/demos/Colab_Compatibility.ipynb index f1a567b5f..0a14cb28e 100644 --- a/demos/Colab_Compatibility.ipynb +++ b/demos/Colab_Compatibility.ipynb @@ -289,6 +289,7 @@ " \"Qwen/Qwen2.5-1.5B\",\n", " \"Qwen/Qwen2.5-1.5B-Instruct\",\n", " \"Qwen/Qwen3-0.6B\",\n", + " \"Qwen/Qwen3-0.6B-Base\",\n", " \"Qwen/Qwen3-1.7B\",\n", " \"roneneldan/TinyStories-1Layer-21M\",\n", " \"roneneldan/TinyStories-1M\",\n", From 587609558c1c540e3dff0d1eb39b004c9833c974 Mon Sep 17 00:00:00 2001 From: name Date: Wed, 1 Oct 2025 13:12:47 -0700 Subject: [PATCH 3/3] Fix notebook output to reflect 217 models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the model count in Colab_Compatibility notebook output from 216 to 217 to reflect the addition of Qwen3-0.6B-Base. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- demos/Colab_Compatibility.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/Colab_Compatibility.ipynb b/demos/Colab_Compatibility.ipynb index 0a14cb28e..5d8638a26 100644 --- a/demos/Colab_Compatibility.ipynb +++ b/demos/Colab_Compatibility.ipynb @@ -65,7 +65,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "TransformerLens currently supports 216 models out of the box.\n" + "TransformerLens currently supports 217 models out of the box.\n" ] } ],