Updated QAT Walkthrough ntoebook

farshadghodsian · farshadghodsian · commit b3283d149bc1 · 2025-08-29T13:56:26.000-07:00
Signed-off-by: Farshad Ghodsian &lt;47931571+farshadghodsian@users.noreply.github.com&gt;
diff --git a/examples/llm_qat/notebooks/QAT_Walkthrough.ipynb b/examples/llm_qat/notebooks/QAT_Walkthrough.ipynb
@@ -91,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "id": "6d25c2b1-a68b-4748-ac29-e8a893ce1762",
    "metadata": {},
    "outputs": [],
@@ -109,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "id": "0ec71181-770a-4ee6-8760-c62cfab8340f",
    "metadata": {},
    "outputs": [],
@@ -129,14 +129,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
    "id": "5f946576-83ac-45b5-a290-9a2167193e3d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "37c5f366ef204794bad4711ae6056d6c",
+       "model_id": "9d21656629a64d6187b68dc703cb57c7",
        "version_major": 2,
        "version_minor": 0
       },
@@ -151,8 +151,7 @@
    "source": [
     "model = AutoModelForCausalLM.from_pretrained(model_name).cuda()\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
-    "tokenizer.pad_token = tokenizer.eos_token\n",
-    "tokenizer.padding_side = \"left\" # Setting this as tokenizer with the right padding_side may impact calibration accuracy."
+    "tokenizer.pad_token = tokenizer.eos_token"
    ]
   },
   {
@@ -165,10 +164,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
    "id": "f3b618e9-fdee-46b2-8d7e-f11f1f7ada8d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/fghodsian/.venv/jupyter/lib/python3.12/site-packages/modelopt/torch/utils/dataset_utils.py:157: UserWarning: Tokenizer with the right padding_side may impact calibration accuracy. Recommend set to left\n",
+      "  warn(\n"
+     ]
+    }
+   ],
    "source": [
     "\n",
     "# Calibration dataloader\n",
@@ -201,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 5,
    "id": "51c0c1bb-2804-45ae-873f-e33388458e04",
    "metadata": {},
    "outputs": [
@@ -217,7 +225,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████████████████████| 64/64 [01:14<00:00,  1.16s/it]\n"
+      "100%|██████████████████████████████████████████████████████████████████████████████| 64/64 [01:13<00:00,  1.15s/it]\n"
      ]
     }
    ],
@@ -237,7 +245,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
    "id": "c1a15f93-ee06-42a5-ab3b-ca3428a62fe7",
    "metadata": {},
    "outputs": [],
@@ -248,27 +256,144 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "95411f4c-b1d3-4e82-9afb-2608bd21a9a4",
+   "execution_count": 7,
+   "id": "e5ff221a-d807-450b-a099-6481cb3b00d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from transformers import DataCollatorForLanguageModeling\n",
+    "\n",
+    "# Load training dataset (for demonstration, use cnn_dailymail \"train\" split)\n",
+    "train_dataset = load_dataset(\"cnn_dailymail\", '3.0.0', split=\"train[:1000]\")  # Smaller subset for example\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    # Concatenate the article and highlights for training\n",
+    "    inputs = [a + \" \" + h for a, h in zip(examples[\"article\"], examples[\"highlights\"])]\n",
+    "    model_inputs = tokenizer(inputs, padding=\"max_length\", truncation=True, max_length=512)\n",
+    "    model_inputs[\"labels\"] = model_inputs[\"input_ids\"].copy()  # Language modeling: teacher-forced\n",
+    "    return model_inputs\n",
+    "\n",
+    "tokenized_train = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)\n",
+    "\n",
+    "# Data collator (for causal language modeling)\n",
+    "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0f78bdcf-e2fc-49bd-b5b7-79de7260068d",
    "metadata": {},
    "outputs": [
     {
-     "ename": "SyntaxError",
-     "evalue": "incomplete input (1262456024.py, line 1)",
-     "output_type": "error",
-     "traceback": [
-      "  \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[31m    \u001b[39m\u001b[31mtrainer = Trainer(model=model, processing_class=tokenizer, args=training_args, **data_module\u001b[39m\n                                                                                                ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m incomplete input\n"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_2585829/1505564370.py:15: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
+      "  trainer = Trainer(\n"
      ]
     }
    ],
    "source": [
-    "trainer = Trainer(model=model, processing_class=tokenizer, args=training_args, **data_module"
+    "from transformers import TrainingArguments, Trainer\n",
+    "\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./qat_model_output\",\n",
+    "    per_device_train_batch_size=2,\n",
+    "    num_train_epochs=2,\n",
+    "    learning_rate=1e-5,             # As recommended for QAT in README\n",
+    "    logging_steps=50,\n",
+    "    save_steps=200,\n",
+    "    save_total_limit=2,\n",
+    "    report_to=\"none\",\n",
+    "    fp16=False\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_train,\n",
+    "    data_collator=data_collator,\n",
+    "    tokenizer=tokenizer,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "d6ad0ebd-3804-4264-95f6-b6522bbb5e90",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='126' max='126' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [126/126 04:30, Epoch 2/2]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>50</td>\n",
+       "      <td>0.268200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>100</td>\n",
+       "      <td>0.250100</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=126, training_loss=0.25205686735728433, metrics={'train_runtime': 274.3474, 'train_samples_per_second': 7.29, 'train_steps_per_second': 0.459, 'total_flos': 4.6110257184768e+16, 'train_loss': 0.25205686735728433, 'epoch': 2.0})"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b6ca7a04-163e-498f-9a83-f22718fa5141",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save quantizer state for later resume/deploy\n",
+    "import modelopt.torch.opt as mto\n",
+    "torch.save(mto.modelopt_state(model), \"modelopt_quantizer_states.pt\")\n",
+    "\n",
+    "# Save the final weights\n",
+    "trainer.save_model(\"./qat_model_output\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e5ff221a-d807-450b-a099-6481cb3b00d0",
+   "id": "e69417a6-a4e4-4541-a0ee-8035cfe7df76",
    "metadata": {},
    "outputs": [],
    "source": []