update

rasbt · rasbt · commit 497d5c50f126 · 2026-03-03T16:15:18.000-06:00
diff --git a/ch05/16_qwen3.5/README.md b/ch05/16_qwen3.5/README.md
@@ -6,6 +6,7 @@ This folder contains a from-scratch style implementation of [Qwen/Qwen3.5-0.8B](
 
 Qwen3.5 is based on the Qwen3-Next architecture, which I described in more detail in section [2. (Linear) Attention Hybrids](https://magazine.sebastianraschka.com/i/177848019/2-linear-attention-hybrids) of my [Beyond Standard LLMs](https://magazine.sebastianraschka.com/p/beyond-standard-llms) article
 
+<a href="https://magazine.sebastianraschka.com/p/beyond-standard-llms"><img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/bonus/qwen3.5/02.webp" width="500px"></a>
 
 Note that Qwen3.5 alternates `linear_attention` and `full_attention` layers.  
 The notebooks keep the full model flow readable while reusing the linear-attention building blocks from the [qwen3_5_transformers.py](qwen3_5_transformers.py), which contains the linear attention code from Hugging Face under an Apache version 2.0 open source license.
diff --git a/ch05/16_qwen3.5/qwen3.5-plus-kv-cache.ipynb b/ch05/16_qwen3.5/qwen3.5-plus-kv-cache.ipynb
@@ -60,6 +60,14 @@
     "- Qwen3.5 is based on the Qwen3-Next architecture, which I described in more detail in section [2. (Linear) Attention Hybrids](https://magazine.sebastianraschka.com/i/177848019/2-linear-attention-hybrids) of my [Beyond Standard LLMs](https://magazine.sebastianraschka.com/p/beyond-standard-llms) article"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "21d38944-0c98-40a6-a6f8-c745769b4618",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://magazine.sebastianraschka.com/p/beyond-standard-llms\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/bonus/qwen3.5/02.webp\" width=\"500px\"></a>"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -136,7 +144,6 @@
    "source": [
     "import torch\n",
     "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
     "\n",
     "\n",
     "class FeedForward(nn.Module):\n",
diff --git a/ch05/16_qwen3.5/qwen3.5.ipynb b/ch05/16_qwen3.5/qwen3.5.ipynb
@@ -60,6 +60,14 @@
     "- Qwen3.5 is based on the Qwen3-Next architecture, which I described in more detail in section [2. (Linear) Attention Hybrids](https://magazine.sebastianraschka.com/i/177848019/2-linear-attention-hybrids) of my [Beyond Standard LLMs](https://magazine.sebastianraschka.com/p/beyond-standard-llms) article"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "402a446f-4efe-41f5-acc0-4f8455846aa5",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://magazine.sebastianraschka.com/p/beyond-standard-llms\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/bonus/qwen3.5/02.webp\" width=\"500px\"></a>"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -136,7 +144,6 @@
    "source": [
     "import torch\n",
     "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
     "\n",
     "\n",
     "class FeedForward(nn.Module):\n",
diff --git a/ch05/16_qwen3.5/tests/qwen3_5_layer_debugger.py b/ch05/16_qwen3.5/tests/qwen3_5_layer_debugger.py
@@ -102,7 +102,7 @@ def _hf_config_from_dict(cfg):
     return hf_cfg
 
 
-def load_notebook_defs(nb_name="standalone-qwen3.5.ipynb"):
+def load_notebook_defs(nb_name="qwen3.5.ipynb"):
     nb_dir = Path(__file__).resolve().parents[1]
     if str(nb_dir) not in sys.path:
         sys.path.insert(0, str(nb_dir))
diff --git a/ch05/16_qwen3.5/tests/test_qwen3_5_nb.py b/ch05/16_qwen3.5/tests/test_qwen3_5_nb.py
@@ -44,7 +44,8 @@ def import_notebook_defs():
     nb_dir = Path(__file__).resolve().parents[1]
     if str(nb_dir) not in sys.path:
         sys.path.insert(0, str(nb_dir))
-    mod = import_definitions_from_notebook(nb_dir, "standalone-qwen3.5.ipynb")
+
+    mod = import_definitions_from_notebook(nb_dir, "qwen3.5.ipynb")
     return mod