added note about prompt_without_references validation warning

raosukrit67 · raosukrit67 · commit 30987c27e6e5 · 2025-09-10T14:05:16.000-07:00
diff --git a/nemo/NeMo-Data-Designer/advanced/multimodal/visual-question-answering-using-vlm.ipynb b/nemo/NeMo-Data-Designer/advanced/multimodal/visual-question-answering-using-vlm.ipynb
@@ -170,7 +170,7 @@
    "source": [
     "# Dataset processing configuration\n",
     "IMG_COUNT = 512  # Number of images to process\n",
-    "CHAT_IMAGE_HEIGHT = 512  # Standardized height for model input\n",
+    "BASE64_IMAGE_HEIGHT = 512  # Standardized height for model input\n",
     "\n",
     "# Load ColPali dataset for visual documents\n",
     "img_dataset_cfg = {\n",
@@ -210,7 +210,7 @@
     "        height: Target height for image resizing\n",
     "\n",
     "    Returns:\n",
-    "        Updated record with chat_image and uuid fields\n",
+    "        Updated record with base64_image and uuid fields\n",
     "    \"\"\"\n",
     "    # Resize image for consistent processing\n",
     "    image = resize_image(record[\"image\"], height)\n",
@@ -240,7 +240,7 @@
     "\n",
     "img_dataset_iter = iter(\n",
     "    load_dataset(**img_dataset_cfg)\n",
-    "    .map(convert_image_to_chat_format, fn_kwargs={\"height\": CHAT_IMAGE_HEIGHT})\n",
+    "    .map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n",
     ")\n",
     "img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n",
     "\n",
@@ -296,9 +296,6 @@
     "    name=\"summary\",\n",
     "    type=\"llm-code\",\n",
     "    model_alias=model_alias,\n",
-    "    system_prompt=(\"You are a helpful assistant that summarizes images. \"\n",
-    "                   \"The image will be provided to you as a base64 encoded format.\"\n",
-    "                   \"You must decode the image and then summarize the content of the image.\"),\n",
     "    prompt=(\"Provide a detailed summary of the content in this image in Markdown format.\"\n",
     "            \"Start from the top of the image and then describe it from top to bottom.\"\n",
     "            \"Place a summary at the bottom.\"),\n",
@@ -364,10 +361,7 @@
     "        type=P.SamplerType.CATEGORY,\n",
     "        params=P.CategorySamplerParams(values=[\"easy\", \"medium\", \"hard\"]),\n",
     "        description=\"The difficulty of the generated question\",\n",
-    "    ))\n",
-    "\n",
-    "# Optionally validate that the columns are configured correctly.\n",
-    "config_builder.validate()\n"
+    "    ))\n"
    ]
   },
   {
@@ -422,9 +416,7 @@
     "        system_prompt=(\"Generate a short 1-3 word topic for the question: {{ question }} based on the given context. {{ summary }}\"),\n",
     "        output_format=QuestionTopic,\n",
     "    )\n",
-    ")\n",
-    "\n",
-    "config_builder.validate()"
+    ")\n"
    ]
   },
   {
@@ -442,6 +434,13 @@
     "Use this step to fine-tune your configuration before full-scale generation.\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note** Please ignore the validation warning, `PROMPT_WITHOUT_REFERENCES` that shows up. The image context is being passed to the LLM using the `multi_modal_context` and so the prompt does not need to reference any other column. "
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -489,7 +488,7 @@
     ")\n",
     "\n",
     "print(\"📄 Original Document Image:\")\n",
-    "display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n",
+    "display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n",
     "\n",
     "print(\"\\n📝 Generated Summary:\")\n",
     "rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n",
@@ -568,7 +567,7 @@
     ")\n",
     "\n",
     "print(\"📄 Original Document Image:\")\n",
-    "display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n",
+    "display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n",
     "\n",
     "print(\"\\n📝 Generated Summary:\")\n",
     "rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n",