Skip to content

Commit 30987c2

Browse files
committed
added note about prompt_without_references validation warning
1 parent 42080b2 commit 30987c2

File tree

1 file changed

+14
-15
lines changed

1 file changed

+14
-15
lines changed

nemo/NeMo-Data-Designer/advanced/multimodal/visual-question-answering-using-vlm.ipynb

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@
170170
"source": [
171171
"# Dataset processing configuration\n",
172172
"IMG_COUNT = 512 # Number of images to process\n",
173-
"CHAT_IMAGE_HEIGHT = 512 # Standardized height for model input\n",
173+
"BASE64_IMAGE_HEIGHT = 512 # Standardized height for model input\n",
174174
"\n",
175175
"# Load ColPali dataset for visual documents\n",
176176
"img_dataset_cfg = {\n",
@@ -210,7 +210,7 @@
210210
" height: Target height for image resizing\n",
211211
"\n",
212212
" Returns:\n",
213-
" Updated record with chat_image and uuid fields\n",
213+
" Updated record with base64_image and uuid fields\n",
214214
" \"\"\"\n",
215215
" # Resize image for consistent processing\n",
216216
" image = resize_image(record[\"image\"], height)\n",
@@ -240,7 +240,7 @@
240240
"\n",
241241
"img_dataset_iter = iter(\n",
242242
" load_dataset(**img_dataset_cfg)\n",
243-
" .map(convert_image_to_chat_format, fn_kwargs={\"height\": CHAT_IMAGE_HEIGHT})\n",
243+
" .map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n",
244244
")\n",
245245
"img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n",
246246
"\n",
@@ -296,9 +296,6 @@
296296
" name=\"summary\",\n",
297297
" type=\"llm-code\",\n",
298298
" model_alias=model_alias,\n",
299-
" system_prompt=(\"You are a helpful assistant that summarizes images. \"\n",
300-
" \"The image will be provided to you as a base64 encoded format.\"\n",
301-
" \"You must decode the image and then summarize the content of the image.\"),\n",
302299
" prompt=(\"Provide a detailed summary of the content in this image in Markdown format.\"\n",
303300
" \"Start from the top of the image and then describe it from top to bottom.\"\n",
304301
" \"Place a summary at the bottom.\"),\n",
@@ -364,10 +361,7 @@
364361
" type=P.SamplerType.CATEGORY,\n",
365362
" params=P.CategorySamplerParams(values=[\"easy\", \"medium\", \"hard\"]),\n",
366363
" description=\"The difficulty of the generated question\",\n",
367-
" ))\n",
368-
"\n",
369-
"# Optionally validate that the columns are configured correctly.\n",
370-
"config_builder.validate()\n"
364+
" ))\n"
371365
]
372366
},
373367
{
@@ -422,9 +416,7 @@
422416
" system_prompt=(\"Generate a short 1-3 word topic for the question: {{ question }} based on the given context. {{ summary }}\"),\n",
423417
" output_format=QuestionTopic,\n",
424418
" )\n",
425-
")\n",
426-
"\n",
427-
"config_builder.validate()"
419+
")\n"
428420
]
429421
},
430422
{
@@ -442,6 +434,13 @@
442434
"Use this step to fine-tune your configuration before full-scale generation.\n"
443435
]
444436
},
437+
{
438+
"cell_type": "markdown",
439+
"metadata": {},
440+
"source": [
441+
"**Note** Please ignore the validation warning, `PROMPT_WITHOUT_REFERENCES` that shows up. The image context is being passed to the LLM using the `multi_modal_context` and so the prompt does not need to reference any other column. "
442+
]
443+
},
445444
{
446445
"cell_type": "code",
447446
"execution_count": null,
@@ -489,7 +488,7 @@
489488
")\n",
490489
"\n",
491490
"print(\"📄 Original Document Image:\")\n",
492-
"display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n",
491+
"display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n",
493492
"\n",
494493
"print(\"\\n📝 Generated Summary:\")\n",
495494
"rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n",
@@ -568,7 +567,7 @@
568567
")\n",
569568
"\n",
570569
"print(\"📄 Original Document Image:\")\n",
571-
"display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n",
570+
"display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n",
572571
"\n",
573572
"print(\"\\n📝 Generated Summary:\")\n",
574573
"rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n",

0 commit comments

Comments
 (0)