Update descriptions.

wang2yn84 · wang2yn84 · commit 2761922d5214 · 2025-10-06T17:41:58.000Z
diff --git a/examples/dpo_demo_gemma3.ipynb b/examples/dpo_demo_gemma3.ipynb
@@ -745,6 +745,8 @@
       },
       "outputs": [],
       "source": [
+        "# The first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per step, please open a bug. Really appreciated!\n",
+        "\n",
         "if mesh is None:\n",
         "  dpo_trainer.train(train_dataset)\n",
         "else:\n",
diff --git a/examples/grpo_demo.ipynb b/examples/grpo_demo.ipynb
@@ -242,9 +242,9 @@
         "## Data preprocessing\n",
         "\n",
         "First, let's define some special tokens. We instruct the model to first reason\n",
-        "between the `<reasoning>` and `</reasoning>` tokens. After\n",
-        "reasoning, we expect it to provide the answer between the `<answer>` and\n",
-        "`</answer>` tokens."
+        "between the `\u003creasoning\u003e` and `\u003c/reasoning\u003e` tokens. After\n",
+        "reasoning, we expect it to provide the answer between the `\u003canswer\u003e` and\n",
+        "`\u003c/answer\u003e` tokens."
       ]
     },
     {
@@ -254,22 +254,22 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "reasoning_start = \"<reasoning>\"\n",
-        "reasoning_end = \"</reasoning>\"\n",
-        "solution_start = \"<answer>\"\n",
-        "solution_end = \"</answer>\"\n",
+        "reasoning_start = \"\u003creasoning\u003e\"\n",
+        "reasoning_end = \"\u003c/reasoning\u003e\"\n",
+        "solution_start = \"\u003canswer\u003e\"\n",
+        "solution_end = \"\u003c/answer\u003e\"\n",
         "\n",
         "\n",
         "SYSTEM_PROMPT = f\"\"\"You are given a problem. Think about the problem and \\\n",
         "provide your reasoning. Place it between {reasoning_start} and \\\n",
         "{reasoning_end}. Then, provide the final answer (i.e., just one numerical \\\n",
         "value) between {solution_start} and {solution_end}.\"\"\"\n",
         "\n",
-        "TEMPLATE = \"\"\"<start_of_turn>user\n",
+        "TEMPLATE = \"\"\"\u003cstart_of_turn\u003euser\n",
         "{system_prompt}\n",
         "\n",
-        "{question}<end_of_turn>\n",
-        "<start_of_turn>model\"\"\""
+        "{question}\u003cend_of_turn\u003e\n",
+        "\u003cstart_of_turn\u003emodel\"\"\""
       ]
     },
     {
@@ -287,7 +287,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "def extract_hash_answer(text: str) -> str | None:\n",
+        "def extract_hash_answer(text: str) -\u003e str | None:\n",
         "  if \"####\" not in text:\n",
         "    return None\n",
         "  return text.split(\"####\")[1].strip()\n",
@@ -315,7 +315,7 @@
         "  return target_dir\n",
         "\n",
         "\n",
-        "def get_dataset(data_dir, split=\"train\", source=\"tfds\") -> grain.MapDataset:\n",
+        "def get_dataset(data_dir, split=\"train\", source=\"tfds\") -\u003e grain.MapDataset:\n",
         "  # Download data\n",
         "  if not os.path.exists(data_dir):\n",
         "    os.makedirs(data_dir)\n",
@@ -508,6 +508,7 @@
       "outputs": [],
       "source": [
         "!rm /tmp/content/intermediate_ckpt/* -rf\n",
+        "\n",
         "!rm /tmp/content/ckpts/* -rf\n",
         "\n",
         "if model_family == \"gemma2\":\n",
@@ -651,7 +652,7 @@
         "- reward if the format of the output approximately matches the instruction given\n",
         "in `TEMPLATE`;\n",
         "- reward if the answer is correct/partially correct;\n",
-        "- Sometimes, the text between `<answer>`, `</answer>` might not be one\n",
+        "- Sometimes, the text between `\u003canswer\u003e`, `\u003c/answer\u003e` might not be one\n",
         "  number. So, we extract the number, and reward the model if the answer is correct.\n",
         "\n",
         "The reward functions are inspired from\n",
@@ -779,9 +780,9 @@
         "      # Ie if the answer is within some range, reward it!\n",
         "      try:\n",
         "        ratio = float(guess) / float(true_answer)\n",
-        "        if ratio >= 0.9 and ratio <= 1.1:\n",
+        "        if ratio \u003e= 0.9 and ratio \u003c= 1.1:\n",
         "          score += 0.5\n",
-        "        elif ratio >= 0.8 and ratio <= 1.2:\n",
+        "        elif ratio \u003e= 0.8 and ratio \u003c= 1.2:\n",
         "          score += 0.25\n",
         "        else:\n",
         "          score -= 1.0  # Penalize wrong answers\n",
@@ -796,7 +797,7 @@
       "id": "nIpOVv78Tn1k",
       "metadata": {},
       "source": [
-        "Sometimes, the text between `<answer>` and `</answer>` might not be one\n",
+        "Sometimes, the text between `\u003canswer\u003e` and `\u003c/answer\u003e` might not be one\n",
         "number; it can be a sentence. So, we extract the number and compare the answer."
       ]
     },
@@ -873,7 +874,7 @@
         "ratio lies between 0.9 and 1.1.  \n",
         "* **Format Accuracy**: percentage of samples for which the model outputs the\n",
         "correct format, i.e., reasoning between the reasoning special tokens, and the\n",
-        "final answer between the \\`\\<start\\_answer\\>\\`, \\`\\<end\\_answer\\>\\` tokens.\n",
+        "final answer between the \\`\\\u003cstart\\_answer\\\u003e\\`, \\`\\\u003cend\\_answer\\\u003e\\` tokens.\n",
         "\n",
         "**Qualitative**\n",
         "\n",
@@ -995,7 +996,7 @@
         "            corr_ctr_per_question += 1\n",
         "\n",
         "          ratio = float(extracted_response.strip()) / float(answer.strip())\n",
-        "          if ratio >= 0.9 and ratio <= 1.1:\n",
+        "          if ratio \u003e= 0.9 and ratio \u003c= 1.1:\n",
         "            partially_corr_per_question += 1\n",
         "        except:\n",
         "          print(\"SKIPPED\")\n",
@@ -1005,28 +1006,28 @@
         "          corr_format_per_question += 1\n",
         "\n",
         "        if (\n",
-        "            corr_ctr_per_question > 0\n",
-        "            and partially_corr_per_question > 0\n",
-        "            and corr_format_per_question > 0\n",
+        "            corr_ctr_per_question \u003e 0\n",
+        "            and partially_corr_per_question \u003e 0\n",
+        "            and corr_format_per_question \u003e 0\n",
         "        ):\n",
         "          break\n",
         "\n",
-        "      if corr_ctr_per_question > 0:\n",
+        "      if corr_ctr_per_question \u003e 0:\n",
         "        corr += 1\n",
         "        if corr_lst and make_lst:\n",
         "          response_lst.append((question, answer, multiple_call_response))\n",
         "      else:\n",
         "        if not corr_lst and make_lst:\n",
         "          response_lst.append((question, answer, multiple_call_response))\n",
-        "      if partially_corr_per_question > 0:\n",
+        "      if partially_corr_per_question \u003e 0:\n",
         "        partially_corr += 1\n",
-        "      if corr_format_per_question > 0:\n",
+        "      if corr_format_per_question \u003e 0:\n",
         "        corr_format += 1\n",
         "\n",
         "      total += 1\n",
         "      if total % 10 == 0:\n",
         "        print(\n",
-        "            f\"===> {corr=}, {total=}, {corr / total * 100=}, \"\n",
+        "            f\"===\u003e {corr=}, {total=}, {corr / total * 100=}, \"\n",
         "            f\"{partially_corr / total * 100=}, {corr_format / total * 100=}\"\n",
         "        )\n",
         "\n",
@@ -1066,7 +1067,7 @@
       "id": "UOAQe06DyVlQ",
       "metadata": {},
       "source": [
-        "Now let's see how the original model does on the test set. You can see the percentages of the mode outputs that are fully correct, partially correct and just correct in format. "
+        "Now let's see how the original model does on the test set. You can see the percentages of the mode outputs that are fully correct, partially correct and just correct in format. The following step might take couple of minutes to finish."
       ]
     },
     {
@@ -1076,6 +1077,8 @@
       "metadata": {},
       "outputs": [],
       "source": [
+        "# The evaluation might take up to couple of minutes to finish. Please be patient.\n",
+        "\n",
         "(corr, total, accuracy, partial_accuracy, format_accuracy) = evaluate(\n",
         "    test_dataset,\n",
         "    sampler,\n",
@@ -1211,11 +1214,11 @@
         "\n",
         "We then create a `GRPOLearner`, the specialized trainer that uses a list of **reward functions** to evaluate and optimize the model's output, completing the RL training setup.\n",
         "\n",
-        "Tunix trainers are integrated with [Weights & Biases](https://wandb.ai/) to help you visualize the training progress. You can choose how you want to use it:\n",
+        "Tunix trainers are integrated with [Weights \u0026 Biases](https://wandb.ai/) to help you visualize the training progress. You can choose how you want to use it:\n",
         "\n",
         "**Option 1 (Type 1)**: If you're running a quick experiment or just testing things out, choose this. It creates a temporary, private dashboard right in your browser without requiring you to log in or create an account.\n",
         "\n",
-        "**Option 2 (Type 2)**: If you have an existing W&B account and want to save your project's history to your personal dashboard, choose this. You'll be prompted to enter your API key or log in."
+        "**Option 2 (Type 2)**: If you have an existing W\u0026B account and want to save your project's history to your personal dashboard, choose this. You'll be prompted to enter your API key or log in."
       ]
     },
     {
@@ -1246,6 +1249,14 @@
         ")"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "id": "e8b71ed5",
+      "metadata": {},
+      "source": [
+        "The first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per step, please open a bug. Really appreciated!"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1323,6 +1334,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
+        "# The evaluation might take up to couple of minutes to finish. Please be patient.\n",
         "(corr, total, accuracy, partial_accuracy, format_accuracy) = evaluate(\n",
         "    test_dataset,\n",
         "    sampler,\n",
diff --git a/examples/logit_distillation.ipynb b/examples/logit_distillation.ipynb
@@ -392,7 +392,7 @@
         "    training_config=config,\n",
         ").with_gen_model_input_fn(gen_model_input_fn)\n",
         "\n",
-        "# 5. Run training within the mesh context\n",
+        "# 5. Run training within the mesh context, the first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per, please open a bug. Really appreciated!\n",
         "print(\"Starting distillation training...\")\n",
         "with mesh:\n",
         "  trainer.train(train_ds, validation_ds)\n",
diff --git a/examples/qlora_demo.ipynb b/examples/qlora_demo.ipynb
@@ -607,6 +607,7 @@
         ")\n",
         "trainer = trainer.with_gen_model_input_fn(gen_model_input_fn)\n",
         "\n",
+        "# The first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per step, please open a bug. Really appreciated!\n",
         "with jax.profiler.trace(os.path.join(PROFILING_DIR, \"full_training\")):\n",
         "  with mesh:\n",
         "    trainer.train(train_ds, validation_ds)"
@@ -641,6 +642,7 @@
         "    lora_model, optax.adamw(1e-3), training_config\n",
         ").with_gen_model_input_fn(gen_model_input_fn)\n",
         "\n",
+        "# The first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per step, please open a bug. Really appreciated!\n",
         "with jax.profiler.trace(os.path.join(PROFILING_DIR, \"peft with LoRA\")):\n",
         "  with mesh:\n",
         "    lora_trainer.train(train_ds, validation_ds)"
@@ -663,6 +665,7 @@
         "    qlora_model, optax.adamw(1e-3), training_config\n",
         ").with_gen_model_input_fn(gen_model_input_fn)\n",
         "\n",
+        "# The first couple of training step might take up to 5 minutes to finish. Please be patient. If you experience long training steps, e.g. \u003e10 minutes per step, please open a bug. Really appreciated!\n",
         "with jax.profiler.trace(os.path.join(PROFILING_DIR, \"peft with QLoRA\")):\n",
         "  with mesh:\n",
         "    qlora_trainer.train(train_ds, validation_ds)"