rasbt
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb‎
Lines changed: 32 additions & 20 deletions b/‎ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb‎
Lines changed: 32 additions & 20 deletions
@@ -188,6 +188,7 @@ Several folders contain optional materials as a bonus for interested readers:
     - [Gemma 3 From Scratch](ch05/12_gemma3/)
     - [Olmo 3 From Scratch](ch05/13_olmo3/)
     - [Tiny Aya From Scratch](ch05/15_tiny-aya/)
+    - [Qwen3.5 From Scratch](ch05/16_qwen3.5/)
   - [Chapter 5 with other LLMs as Drop-In Replacement (e.g., Llama 3, Qwen 3)](ch05/14_ch05_with_other_llms/)
 - **Chapter 6: Finetuning for classification**
   - [Additional Experiments Finetuning Different Layers and Using Larger Models](ch06/02_bonus_additional-experiments)
 
@@ -82,9 +82,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "huggingface_hub version: 0.35.3\n",
-      "tokenizers version: 0.22.1\n",
-      "torch version: 2.8.0\n"
+      "huggingface_hub version: 1.5.0\n",
+      "tokenizers version: 0.22.2\n",
+      "torch version: 2.8.0+cu128\n"
      ]
     }
    ],
@@ -659,16 +659,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "adf0a6b7-b688-42c9-966e-c223d34db99f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[[-0.2256, -0.0164, -0.7070,  ...,  0.4414,  0.1245,  1.0703],\n",
-       "         [-0.6602,  0.5352, -0.0718,  ..., -0.0737,  0.5391,  0.3086],\n",
-       "         [-0.4785, -0.1562,  0.1045,  ..., -0.2324,  0.2354,  0.6328]]],\n",
+       "tensor([[[-0.2334, -0.0134, -0.7031,  ...,  0.4316,  0.1177,  1.0703],\n",
+       "         [-0.6641,  0.5352, -0.0752,  ..., -0.0698,  0.5430,  0.3203],\n",
+       "         [-0.4785, -0.1748,  0.1074,  ..., -0.2354,  0.2354,  0.6289]]],\n",
        "       dtype=torch.bfloat16, grad_fn=<UnsafeViewBackward0>)"
       ]
      },
@@ -922,16 +922,7 @@
     "id": "699cb1b8-a67d-49fb-80a6-0dad9d81f392",
     "outputId": "55b2f28c-142f-4698-9d23-d27456d3ed6d"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/sebastian/Developer/LLMs-from-scratch/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import json\n",
     "import os\n",
@@ -1182,29 +1173,50 @@
       "<think>\n",
       "Okay, the user wants a short introduction to large language models. Let me start by recalling what I know. Large language models are AI systems that can understand and generate human language. They're trained on massive datasets, so they can learn complex patterns and nuances.\n",
       "\n",
-      "I should mention their ability to understand and generate text, not just specific tasks. Maybe include examples like chatbots or content generation. Also, emphasize their adaptability and efficiency. Oh, and maybe touch on their applications in various fields. Let me check if I'm covering all key points without being too technical. Keep it concise, around a sentence or two. Make sure it's clear and easy to understand.\n",
+      "I should mention their ability to understand and generate text, not just specific tasks. Maybe include examples like chatbots or language assistants. Also, emphasize their adaptability and versatility. Oh, and maybe touch on their applications in various fields. Let me check if I'm covering all key points without being too technical. Keep it concise, around a sentence or two. Make sure it's clear and easy to understand.\n",
       "</think>\n",
       "\n",
-      "Large language models (LLMs) are AI systems designed to understand and generate human language, enabling tasks like text generation, translation, and content creation. They are trained on vast datasets, allowing them to learn complex patterns and nuances, making them versatile for a wide range of applications."
+      "Large language models (LLMs) are AI systems designed to understand and generate human language, enabling tasks like text generation, translation, and answering questions. They are trained on vast datasets, allowing them to learn complex patterns and nuances, making them versatile for applications in various domains.\n",
+      "\n",
+      "Generation speed: 48.46 tokens/sec\n",
+      "GPU memory used: 1.50 GB\n"
      ]
     }
    ],
    "source": [
+    "import time\n",
+    "\n",
     "input_token_ids_tensor = torch.tensor(input_token_ids, device=device).unsqueeze(0)\n",
     "\n",
+    "if torch.cuda.is_available():\n",
+    "    torch.cuda.reset_peak_memory_stats()\n",
+    "\n",
+    "start_time = time.perf_counter()\n",
+    "generated_tokens = 0\n",
     "\n",
     "for token in generate_text_basic_stream(\n",
     "    model=model,\n",
     "    token_ids=input_token_ids_tensor,\n",
     "    max_new_tokens=500,\n",
     "    eos_token_id=tokenizer.eos_token_id\n",
     "):\n",
+    "    generated_tokens += 1\n",
     "    token_id = token.squeeze(0).tolist()\n",
     "    print(\n",
     "        tokenizer.decode(token_id),\n",
     "        end=\"\",\n",
     "        flush=True\n",
-    "    )"
+    "    )\n",
+    "\n",
+    "elapsed = time.perf_counter() - start_time\n",
+    "tokens_per_sec = generated_tokens / elapsed if elapsed > 0 else 0.0\n",
+    "print(f\"\\n\\nGeneration speed: {tokens_per_sec:.2f} tokens/sec\")\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "    def calc_gpu_gb(x):\n",
+    "        return f\"{x / 1024 / 1024 / 1024:.2f} GB\"\n",
+    "\n",
+    "    print(f\"GPU memory used: {calc_gpu_gb(torch.cuda.max_memory_allocated())}\")\n"
    ]
   },
   {