openai · CorwinCheung · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/examples/gpt-5/prompt-optimization-cookbook/prompt-optimization-cookbook.ipynb b/examples/gpt-5/prompt-optimization-cookbook/prompt-optimization-cookbook.ipynb
@@ -15,9 +15,9 @@
    "source": [
     "The GPT-5 Family of models are the smartest models we’ve released to date, representing a step change in the models’ capabilities across the board. GPT-5 is particularly specialized in agentic task performance, coding, and steerability, making it a great fit for everyone from curious users to advanced researchers. \n",
     "\n",
-    "GPT-5 will benefit from all the traditional prompting best practices, and to help you construct the best prompt we are introducing a [Prompting Guide for GPT-5](#) explaining how to make the most of its state-of-the-art capabilities. Alongside that, we are introducing a [GPT-5 Specific Prompt Optimizer](#https://platform.openai.com/chat/edit?optimize=true) in our Playground to help users get started on **improving existing prompts** and **migrating prompts** for GPT-5 and other OpenAI models.\n",
+    "GPT-5 will benefit from all the traditional prompting best practices, and to help you construct the best prompt we are introducing a [Prompting Guide for GPT-5](https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide) explaining how to make the most of its state-of-the-art capabilities. Alongside that, we are introducing a [GPT-5 Specific Prompt Optimizer](https://platform.openai.com/chat/edit?optimize=true) in our Playground to help users get started on **improving existing prompts** and **migrating prompts** for GPT-5 and other OpenAI models.\n",
     "\n",
-    "In this cookbook we will go through how you can get spun up quickly to solve your task with GPT-5. We will share results of measurable improvements on common tasks and walk you through how you can use the Prompt Optimizer to do the same.\n"
+    "In this cookbook we will go through how you can get spun up quickly to solve your task with GPT-5. We will share results of measurable improvements on common tasks and walk you through how you can use the Prompt Optimizer to do the same."
    ]
   },
   {
@@ -548,6 +548,22 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "### Prompt Optimization Results - Coding Tasks\n",
+      "\n",
+      "| Metric                      | Baseline | Optimized | Δ (Opt − Base) |\n",
+      "|----------------------------|---------:|----------:|---------------:|\n",
+      "| Avg Time (s)                |    7.906 |     6.977 |        -0.929 |\n",
+      "| Peak Memory (KB)            |   3626.3 |     577.5 |       -3048.8 |\n",
+      "| Exact (%)                   |    100.0 |     100.0 |           0.0 |\n",
+      "| Sorted (%)                  |    100.0 |     100.0 |           0.0 |\n",
+      "| LLM Adherence (1–5)         |     4.40 |      4.90 |         +0.50 |\n",
+      "| Code Quality (1–5)          |     4.73 |      4.90 |         +0.16 |\n"
+     ]
     }
    ],
    "source": [
@@ -573,7 +589,9 @@
     "    judge_optimized=Path(\"results_llm_as_judge_optimized\")/\"judgement_summary.csv\",\n",
     ")\n",
     "\n",
-    "display(Markdown(md))"
+    "display(Markdown(md))\n",
+    "\n",
+    "print(md)"
    ]
   },
   {
@@ -619,14 +637,6 @@
     "We will run FailSafeQA evaluations via the helper script and compare Baseline vs Optimized prompts side by side."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c5849f77",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -834,7 +844,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 11,
    "id": "c20097e6",
    "metadata": {},
    "outputs": [
@@ -845,10 +855,10 @@
        "\n",
        "**Compliance threshold:** ≥ 6\n",
        "\n",
-       "| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
-       "|---|---:|---:|---:|\n",
-       "| Robustness (avg across datapoints) | 0.320 | 0.540 | +0.220 |\n",
-       "| Context Grounding (avg across datapoints) | 0.800 | 0.950 | +0.150 |\n",
+       "| Metric                                    | Baseline | Optimized | Δ (Opt − Base) |\n",
+       "| ----------------------------------------- | -------- | --------- | -------------- |\n",
+       "| Robustness (avg across datapoints)        | 0.320    | 0.540     | +0.220         |\n",
+       "| Context Grounding (avg across datapoints) | 0.800    | 0.950     | +0.150         |\n",
        "\n",
        "_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`"
       ],
@@ -858,6 +868,22 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## FailSafeQA — Summary\n",
+      "\n",
+      "**Compliance threshold:** ≥ 6\n",
+      "\n",
+      "| Metric                                    | Baseline | Optimized | Δ (Opt − Base) |\n",
+      "| ----------------------------------------- | -------- | --------- | -------------- |\n",
+      "| Robustness (avg across datapoints)        | 0.320    | 0.540     | +0.220         |\n",
+      "| Context Grounding (avg across datapoints) | 0.800    | 0.950     | +0.150         |\n",
+      "\n",
+      "_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`\n"
+     ]
     }
    ],
    "source": [
@@ -872,20 +898,39 @@
     ") -> str:\n",
     "    d_r = robust_opt - robust_base\n",
     "    d_g = ground_opt - ground_base\n",
+    "\n",
+    "    # Data rows\n",
+    "    rows = [\n",
+    "        [\"Metric\", \"Baseline\", \"Optimized\", \"Δ (Opt − Base)\"],\n",
+    "        [\"Robustness (avg across datapoints)\", f\"{robust_base:.3f}\", f\"{robust_opt:.3f}\", f\"{d_r:+.3f}\"],\n",
+    "        [\"Context Grounding (avg across datapoints)\", f\"{ground_base:.3f}\", f\"{ground_opt:.3f}\", f\"{d_g:+.3f}\"],\n",
+    "    ]\n",
+    "\n",
+    "    # Calculate column widths for alignment\n",
+    "    col_widths = [max(len(str(row[i])) for row in rows) for i in range(len(rows[0]))]\n",
+    "\n",
+    "    # Build table lines with padding\n",
+    "    lines = []\n",
+    "    for i, row in enumerate(rows):\n",
+    "        padded = [str(cell).ljust(col_widths[j]) for j, cell in enumerate(row)]\n",
+    "        lines.append(\"| \" + \" | \".join(padded) + \" |\")\n",
+    "        if i == 0:  # after header\n",
+    "            sep = [\"-\" * col_widths[j] for j in range(len(row))]\n",
+    "            lines.append(\"| \" + \" | \".join(sep) + \" |\")\n",
+    "\n",
+    "    table = \"\\n\".join(lines)\n",
+    "\n",
     "    return f\"\"\"\n",
     "## FailSafeQA — Summary\n",
     "\n",
     "**Compliance threshold:** ≥ {threshold}\n",
     "\n",
-    "| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
-    "|---|---:|---:|---:|\n",
-    "| Robustness (avg across datapoints) | {robust_base:.3f} | {robust_opt:.3f} | {d_r:+.3f} |\n",
-    "| Context Grounding (avg across datapoints) | {ground_base:.3f} | {ground_opt:.3f} | {d_g:+.3f} |\n",
+    "{table}\n",
     "\n",
     "_Source files:_ `{src_base}` · `{src_opt}`\n",
     "\"\"\".strip()\n",
     "\n",
-    "# Fill in with your reported numbers\n",
+    "# Usage\n",
     "md = build_markdown_summary_from_metrics(\n",
     "    robust_base=0.320, ground_base=0.800,\n",
     "    robust_opt=0.540, ground_opt=0.950,\n",
@@ -894,7 +939,10 @@
     "    src_opt=\"results_failsafeqa.csv\",\n",
     ")\n",
     "\n",
-    "display(Markdown(md))"
+    "# Notebook pretty\n",
+    "display(Markdown(md))\n",
+    "\n",
+    "print(md)"
    ]
   },
   {