|
170 | 170 | "source": [ |
171 | 171 | "# Dataset processing configuration\n", |
172 | 172 | "IMG_COUNT = 512 # Number of images to process\n", |
173 | | - "CHAT_IMAGE_HEIGHT = 512 # Standardized height for model input\n", |
| 173 | + "BASE64_IMAGE_HEIGHT = 512 # Standardized height for model input\n", |
174 | 174 | "\n", |
175 | 175 | "# Load ColPali dataset for visual documents\n", |
176 | 176 | "img_dataset_cfg = {\n", |
|
210 | 210 | " height: Target height for image resizing\n", |
211 | 211 | "\n", |
212 | 212 | " Returns:\n", |
213 | | - " Updated record with chat_image and uuid fields\n", |
| 213 | + " Updated record with base64_image and uuid fields\n", |
214 | 214 | " \"\"\"\n", |
215 | 215 | " # Resize image for consistent processing\n", |
216 | 216 | " image = resize_image(record[\"image\"], height)\n", |
|
240 | 240 | "\n", |
241 | 241 | "img_dataset_iter = iter(\n", |
242 | 242 | " load_dataset(**img_dataset_cfg)\n", |
243 | | - " .map(convert_image_to_chat_format, fn_kwargs={\"height\": CHAT_IMAGE_HEIGHT})\n", |
| 243 | + " .map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n", |
244 | 244 | ")\n", |
245 | 245 | "img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n", |
246 | 246 | "\n", |
|
296 | 296 | " name=\"summary\",\n", |
297 | 297 | " type=\"llm-code\",\n", |
298 | 298 | " model_alias=model_alias,\n", |
299 | | - " system_prompt=(\"You are a helpful assistant that summarizes images. \"\n", |
300 | | - " \"The image will be provided to you as a base64 encoded format.\"\n", |
301 | | - " \"You must decode the image and then summarize the content of the image.\"),\n", |
302 | 299 | " prompt=(\"Provide a detailed summary of the content in this image in Markdown format.\"\n", |
303 | 300 | " \"Start from the top of the image and then describe it from top to bottom.\"\n", |
304 | 301 | " \"Place a summary at the bottom.\"),\n", |
|
364 | 361 | " type=P.SamplerType.CATEGORY,\n", |
365 | 362 | " params=P.CategorySamplerParams(values=[\"easy\", \"medium\", \"hard\"]),\n", |
366 | 363 | " description=\"The difficulty of the generated question\",\n", |
367 | | - " ))\n", |
368 | | - "\n", |
369 | | - "# Optionally validate that the columns are configured correctly.\n", |
370 | | - "config_builder.validate()\n" |
| 364 | + " ))\n" |
371 | 365 | ] |
372 | 366 | }, |
373 | 367 | { |
|
422 | 416 | " system_prompt=(\"Generate a short 1-3 word topic for the question: {{ question }} based on the given context. {{ summary }}\"),\n", |
423 | 417 | " output_format=QuestionTopic,\n", |
424 | 418 | " )\n", |
425 | | - ")\n", |
426 | | - "\n", |
427 | | - "config_builder.validate()" |
| 419 | + ")\n" |
428 | 420 | ] |
429 | 421 | }, |
430 | 422 | { |
|
442 | 434 | "Use this step to fine-tune your configuration before full-scale generation.\n" |
443 | 435 | ] |
444 | 436 | }, |
| 437 | + { |
| 438 | + "cell_type": "markdown", |
| 439 | + "metadata": {}, |
| 440 | + "source": [ |
| 441 | + "**Note** Please ignore the validation warning, `PROMPT_WITHOUT_REFERENCES` that shows up. The image context is being passed to the LLM using the `multi_modal_context` and so the prompt does not need to reference any other column. " |
| 442 | + ] |
| 443 | + }, |
445 | 444 | { |
446 | 445 | "cell_type": "code", |
447 | 446 | "execution_count": null, |
|
489 | 488 | ")\n", |
490 | 489 | "\n", |
491 | 490 | "print(\"📄 Original Document Image:\")\n", |
492 | | - "display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n", |
| 491 | + "display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n", |
493 | 492 | "\n", |
494 | 493 | "print(\"\\n📝 Generated Summary:\")\n", |
495 | 494 | "rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n", |
|
568 | 567 | ")\n", |
569 | 568 | "\n", |
570 | 569 | "print(\"📄 Original Document Image:\")\n", |
571 | | - "display(resize_image(comparison_dataset.image[index], CHAT_IMAGE_HEIGHT))\n", |
| 570 | + "display(resize_image(comparison_dataset.image[index], BASE64_IMAGE_HEIGHT))\n", |
572 | 571 | "\n", |
573 | 572 | "print(\"\\n📝 Generated Summary:\")\n", |
574 | 573 | "rich.print(Panel(comparison_dataset.summary[index], title=\"Document Summary\", title_align=\"left\"))\n", |
|
0 commit comments