diff --git a/examples/multimodal/image_understanding_with_rag.ipynb b/examples/multimodal/image_understanding_with_rag.ipynb
index 97473732f1..70353ecd82 100644
--- a/examples/multimodal/image_understanding_with_rag.ipynb
+++ b/examples/multimodal/image_understanding_with_rag.ipynb
@@ -6,7 +6,7 @@
"source": [
"# Image Understanding with RAG using OpenAI's Vision & Responses APIs\n",
"\n",
- "Welcome! This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using OpenAI’s Vision and Responses APIs. It focuses on multimodal data, combining image and text inputs to analyze customer experiences. The system leverages GPT-4.1 and integrates image understanding with file search to provide context-aware responses.\n",
+ "Welcome! This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using OpenAI’s Vision and Responses APIs. It focuses on multimodal data, combining image and text inputs to analyze customer experiences. The system leverages GPT-5 and integrates image understanding with file search to provide context-aware responses.\n",
"\n",
"Multimodal datasets are increasingly common, particularly in domains like healthcare, where records often contain both visual data (e.g. radiology scans) and accompanying text (e.g. clinical notes). Real-world datasets also tend to be noisy, with incomplete or missing information, making it critical to analyze multiple modalities in tandem.\n",
"\n",
@@ -115,9 +115,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Order arrived 10 minutes early, food was hot and packaged securely. Tacos were fresh, well-seasoned, and the salsa tasted homemade. Driver was friendly, followed instructions, and left it at the door. Will definitely order again.\n"
+ ]
+ }
+ ],
"source": [
"def generate_food_delivery_review(sentiment: str = 'positive') -> str:\n",
" \"\"\"\n",
@@ -133,7 +141,8 @@
" prompt += f\" The review should reflect a {sentiment} experience.\"\n",
" \n",
" response = client.responses.create(\n",
- " model=\"gpt-4.1\",\n",
+ " model=\"gpt-5\",\n",
+ " reasoning={\"effort\": \"minimal\"},\n",
" input=[{\"role\": \"user\", \"content\": prompt}]\n",
" )\n",
" return response.output_text\n",
@@ -174,7 +183,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -188,7 +197,8 @@
" \"\"\"Analyze food delivery image and return sentiment analysis.\"\"\"\n",
" base64_image = encode_image(image_path)\n",
" response = client.responses.create(\n",
- " model=\"gpt-4.1\",\n",
+ " model=\"gpt-5\",\n",
+ " reasoning={\"effort\": \"minimal\"},\n",
" input=[{\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
@@ -202,8 +212,6 @@
" },\n",
" ],\n",
" }],\n",
- " max_output_tokens=50,\n",
- " temperature=0.2\n",
" )\n",
" return response.output_text.strip()"
]
@@ -232,9 +240,122 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " id \n",
+ " month \n",
+ " text \n",
+ " image_path \n",
+ " label \n",
+ " full_sentiment \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " june \n",
+ " Absolutely delicious! The sushi was fresh, beautifully packed, and arrived right on time. Will d... \n",
+ " NaN \n",
+ " positive \n",
+ " Absolutely delicious! The sushi was fresh, beautifully packed, and arrived right on time. Will d... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " july \n",
+ " Half my order was missing and the burger looked thrown together. Not worth the money. \n",
+ " NaN \n",
+ " negative \n",
+ " Half my order was missing and the burger looked thrown together. Not worth the money. \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " july \n",
+ " Packaging was leaking sauce everywhere. Presentation was a mess. Tasted like leftovers. \n",
+ " NaN \n",
+ " negative \n",
+ " Packaging was leaking sauce everywhere. Presentation was a mess. Tasted like leftovers. \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " july \n",
+ " Burger was hot, fries were still crispy, and the milkshake wasn’t melted at all. Fantastic deliv... \n",
+ " 3.png \n",
+ " positive \n",
+ " Burger was hot, fries were still crispy, and the milkshake wasn’t melted at all. Fantastic deliv... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 5 \n",
+ " june \n",
+ " Received the wrong items. I ordered vegetarian and got meat. Totally unacceptable. \n",
+ " NaN \n",
+ " negative \n",
+ " Received the wrong items. I ordered vegetarian and got meat. Totally unacceptable. \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id month \\\n",
+ "0 1 june \n",
+ "1 2 july \n",
+ "2 3 july \n",
+ "3 4 july \n",
+ "4 5 june \n",
+ "\n",
+ " text \\\n",
+ "0 Absolutely delicious! The sushi was fresh, beautifully packed, and arrived right on time. Will d... \n",
+ "1 Half my order was missing and the burger looked thrown together. Not worth the money. \n",
+ "2 Packaging was leaking sauce everywhere. Presentation was a mess. Tasted like leftovers. \n",
+ "3 Burger was hot, fries were still crispy, and the milkshake wasn’t melted at all. Fantastic deliv... \n",
+ "4 Received the wrong items. I ordered vegetarian and got meat. Totally unacceptable. \n",
+ "\n",
+ " image_path label \\\n",
+ "0 NaN positive \n",
+ "1 NaN negative \n",
+ "2 NaN negative \n",
+ "3 3.png positive \n",
+ "4 NaN negative \n",
+ "\n",
+ " full_sentiment \n",
+ "0 Absolutely delicious! The sushi was fresh, beautifully packed, and arrived right on time. Will d... \n",
+ "1 Half my order was missing and the burger looked thrown together. Not worth the money. \n",
+ "2 Packaging was leaking sauce everywhere. Presentation was a mess. Tasted like leftovers. \n",
+ "3 Burger was hot, fries were still crispy, and the milkshake wasn’t melted at all. Fantastic deliv... \n",
+ "4 Received the wrong items. I ordered vegetarian and got meat. Totally unacceptable. "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"pd.set_option('display.max_colwidth', 100) # Increase from default (50) to view full sentiment\n",
"display(df.head())"
@@ -287,7 +408,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -339,9 +460,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🔍 Query: Where there any comments about the 'spaghetti'?\n",
+ "\n",
+ "📝 Response:\n",
+ "----------------------------------------\n",
+ "I couldn’t find any comments that explicitly mention “spaghetti.” The closest related note says “Pasta was overcooked” in context_9_july.txt . If you have a specific date or file in mind, I can check that directly.\n"
+ ]
+ }
+ ],
"source": [
"# Query the vector store for spaghetti reviews in July\n",
"query = \"Where there any comments about the 'spaghetti'?\"\n",
@@ -349,7 +482,7 @@
"\n",
"# Execute the search with filtering\n",
"response = client.responses.create(\n",
- " model=\"gpt-4.1\",\n",
+ " model=\"gpt-5\",\n",
" input=query,\n",
" tools=[{\n",
" \"type\": \"file_search\",\n",
@@ -370,15 +503,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🔍 Query: Where there any comments about the 'spaghetti'?\n",
+ "\n",
+ "📝 Response:\n",
+ "----------------------------------------\n",
+ "Yes. There’s a positive note describing “a neatly plated spaghetti in tomato sauce with parsley, served alongside arugula, garlic bread, and grated cheese.” \n"
+ ]
+ }
+ ],
"source": [
"query = \"Where there any comments about the 'spaghetti'?\"\n",
"print(f\"🔍 Query: {query}\\n\")\n",
"\n",
"response = client.responses.create(\n",
- " model=\"gpt-4.1\",\n",
+ " model=\"gpt-5\",\n",
" input=query,\n",
" tools=[{\n",
" \"type\": \"file_search\",\n",
@@ -430,7 +575,7 @@
" \"\"\"\n",
" # Get the annotations from the response\n",
" try:\n",
- " annotations = response.output[1].content[0].annotations\n",
+ " annotations = response.output[3].content[0].annotations\n",
" retrieved_files = {result.filename for result in annotations}\n",
" except (AttributeError, IndexError):\n",
" print(\"No search results found in the response.\")\n",
@@ -461,15 +606,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🔍 Query: Were there any negative reviews for pizza, and if so, was the pizza burnt?\n",
+ "\n",
+ "📝 Response:\n",
+ "----------------------------------------\n",
+ "Yes. One review explicitly describes a “burnt pepperoni pizza with charred crust and grease stains in the box” and is marked as negative sentiment .\n"
+ ]
+ }
+ ],
"source": [
"query = \"Were there any negative reviews for pizza, and if so, was the pizza burnt?\"\n",
"print(f\"🔍 Query: {query}\\n\")\n",
"\n",
"response = client.responses.create(\n",
- " model=\"gpt-4.1\",\n",
+ " model=\"gpt-5\",\n",
" input=query,\n",
" tools=[{\n",
" \"type\": \"file_search\",\n",
@@ -520,16 +677,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
- "def prepare_evaluation_data(df, text_col=\"full_sentiment\", label_col=\"label\"):\n",
- " \"\"\"Prepare data items for evaluation from DataFrame.\"\"\"\n",
- " return [{\"item\": {\"input\": str(row[text_col]), \"ground_truth\": row[label_col]}} \n",
- " for _, row in df.iterrows()]\n",
- "\n",
- "\n",
"def prepare_evaluation_data(\n",
" df: pd.DataFrame,\n",
" text_col: str = \"full_sentiment\",\n",
@@ -564,7 +715,7 @@
" \"\"\"\n",
" eval_config = {\n",
" \"type\": \"completions\",\n",
- " \"model\": \"gpt-4.1\",\n",
+ " \"model\": \"gpt-5\",\n",
" \"input_messages\": {\n",
" \"type\": \"template\",\n",
" \"template\": [\n",
@@ -656,7 +807,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
@@ -675,9 +826,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"# Calculate passed and total for text_only_run\n",
"text_only_data = text_only_run_output_items.to_dict()['data']\n",
@@ -726,9 +888,62 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " Input \n",
+ " Model Output \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " [{'content': 'Classify the sentiment of this food delivery review: The food came looking like this... Categorize the request into one of \"positive\", \"negative\" or \"unclear\". Respond with only one of those words.', 'role': 'user'}] \n",
+ " [{'content': 'negative', 'role': 'assistant'}] \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " [{'content': 'Classify the sentiment of this food delivery review: nan. Categorize the request into one of \"positive\", \"negative\" or \"unclear\". Respond with only one of those words.', 'role': 'user'}] \n",
+ " [{'content': 'unclear', 'role': 'assistant'}] \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " [{'content': 'Classify the sentiment of this food delivery review: nan. Categorize the request into one of \"positive\", \"negative\" or \"unclear\". Respond with only one of those words.', 'role': 'user'}] \n",
+ " [{'content': 'unclear', 'role': 'assistant'}] \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " [{'content': 'Classify the sentiment of this food delivery review: nan. Categorize the request into one of \"positive\", \"negative\" or \"unclear\". Respond with only one of those words.', 'role': 'user'}] \n",
+ " [{'content': 'unclear', 'role': 'assistant'}] \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " [{'content': 'Classify the sentiment of this food delivery review: Wow look at this pizza!. Categorize the request into one of \"positive\", \"negative\" or \"unclear\". Respond with only one of those words.', 'role': 'user'}] \n",
+ " [{'content': 'positive', 'role': 'assistant'}] \n",
+ " \n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"failed_samples = [\n",
" {\n",
@@ -787,7 +1002,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.8"
+ "version": "3.12.9"
}
},
"nbformat": 4,