rendering the raw markdown

corwin-oai · corwin-oai · commit a3723324bc22 · 2025-08-07T14:25:20.000-07:00
diff --git a/examples/gpt-5/prompt-optimization-cookbook/prompt-optimization-cookbook.ipynb b/examples/gpt-5/prompt-optimization-cookbook/prompt-optimization-cookbook.ipynb
@@ -548,6 +548,22 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "### Prompt Optimization Results - Coding Tasks\n",
+      "\n",
+      "| Metric                      | Baseline | Optimized | Δ (Opt − Base) |\n",
+      "|----------------------------|---------:|----------:|---------------:|\n",
+      "| Avg Time (s)                |    7.906 |     6.977 |        -0.929 |\n",
+      "| Peak Memory (KB)            |   3626.3 |     577.5 |       -3048.8 |\n",
+      "| Exact (%)                   |    100.0 |     100.0 |           0.0 |\n",
+      "| Sorted (%)                  |    100.0 |     100.0 |           0.0 |\n",
+      "| LLM Adherence (1–5)         |     4.40 |      4.90 |         +0.50 |\n",
+      "| Code Quality (1–5)          |     4.73 |      4.90 |         +0.16 |\n"
+     ]
     }
    ],
    "source": [
@@ -573,7 +589,9 @@
     "    judge_optimized=Path(\"results_llm_as_judge_optimized\")/\"judgement_summary.csv\",\n",
     ")\n",
     "\n",
-    "display(Markdown(md))"
+    "display(Markdown(md))\n",
+    "\n",
+    "print(md)"
    ]
   },
   {
@@ -619,14 +637,6 @@
     "We will run FailSafeQA evaluations via the helper script and compare Baseline vs Optimized prompts side by side."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c5849f77",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -834,7 +844,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 11,
    "id": "c20097e6",
    "metadata": {},
    "outputs": [
@@ -845,10 +855,10 @@
        "\n",
        "**Compliance threshold:** ≥ 6\n",
        "\n",
-       "| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
-       "|---|---:|---:|---:|\n",
-       "| Robustness (avg across datapoints) | 0.320 | 0.540 | +0.220 |\n",
-       "| Context Grounding (avg across datapoints) | 0.800 | 0.950 | +0.150 |\n",
+       "| Metric                                    | Baseline | Optimized | Δ (Opt − Base) |\n",
+       "| ----------------------------------------- | -------- | --------- | -------------- |\n",
+       "| Robustness (avg across datapoints)        | 0.320    | 0.540     | +0.220         |\n",
+       "| Context Grounding (avg across datapoints) | 0.800    | 0.950     | +0.150         |\n",
        "\n",
        "_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`"
       ],
@@ -858,6 +868,22 @@
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## FailSafeQA — Summary\n",
+      "\n",
+      "**Compliance threshold:** ≥ 6\n",
+      "\n",
+      "| Metric                                    | Baseline | Optimized | Δ (Opt − Base) |\n",
+      "| ----------------------------------------- | -------- | --------- | -------------- |\n",
+      "| Robustness (avg across datapoints)        | 0.320    | 0.540     | +0.220         |\n",
+      "| Context Grounding (avg across datapoints) | 0.800    | 0.950     | +0.150         |\n",
+      "\n",
+      "_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`\n"
+     ]
     }
    ],
    "source": [
@@ -872,20 +898,39 @@
     ") -> str:\n",
     "    d_r = robust_opt - robust_base\n",
     "    d_g = ground_opt - ground_base\n",
+    "\n",
+    "    # Data rows\n",
+    "    rows = [\n",
+    "        [\"Metric\", \"Baseline\", \"Optimized\", \"Δ (Opt − Base)\"],\n",
+    "        [\"Robustness (avg across datapoints)\", f\"{robust_base:.3f}\", f\"{robust_opt:.3f}\", f\"{d_r:+.3f}\"],\n",
+    "        [\"Context Grounding (avg across datapoints)\", f\"{ground_base:.3f}\", f\"{ground_opt:.3f}\", f\"{d_g:+.3f}\"],\n",
+    "    ]\n",
+    "\n",
+    "    # Calculate column widths for alignment\n",
+    "    col_widths = [max(len(str(row[i])) for row in rows) for i in range(len(rows[0]))]\n",
+    "\n",
+    "    # Build table lines with padding\n",
+    "    lines = []\n",
+    "    for i, row in enumerate(rows):\n",
+    "        padded = [str(cell).ljust(col_widths[j]) for j, cell in enumerate(row)]\n",
+    "        lines.append(\"| \" + \" | \".join(padded) + \" |\")\n",
+    "        if i == 0:  # after header\n",
+    "            sep = [\"-\" * col_widths[j] for j in range(len(row))]\n",
+    "            lines.append(\"| \" + \" | \".join(sep) + \" |\")\n",
+    "\n",
+    "    table = \"\\n\".join(lines)\n",
+    "\n",
     "    return f\"\"\"\n",
     "## FailSafeQA — Summary\n",
     "\n",
     "**Compliance threshold:** ≥ {threshold}\n",
     "\n",
-    "| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
-    "|---|---:|---:|---:|\n",
-    "| Robustness (avg across datapoints) | {robust_base:.3f} | {robust_opt:.3f} | {d_r:+.3f} |\n",
-    "| Context Grounding (avg across datapoints) | {ground_base:.3f} | {ground_opt:.3f} | {d_g:+.3f} |\n",
+    "{table}\n",
     "\n",
     "_Source files:_ `{src_base}` · `{src_opt}`\n",
     "\"\"\".strip()\n",
     "\n",
-    "# Fill in with your reported numbers\n",
+    "# Usage\n",
     "md = build_markdown_summary_from_metrics(\n",
     "    robust_base=0.320, ground_base=0.800,\n",
     "    robust_opt=0.540, ground_opt=0.950,\n",
@@ -894,7 +939,10 @@
     "    src_opt=\"results_failsafeqa.csv\",\n",
     ")\n",
     "\n",
-    "display(Markdown(md))"
+    "# Notebook pretty\n",
+    "display(Markdown(md))\n",
+    "\n",
+    "print(md)"
    ]
   },
   {