Skip to content

Commit a372332

Browse files
committed
rendering the raw markdown
1 parent dc6e6a1 commit a372332

File tree

1 file changed

+68
-20
lines changed

1 file changed

+68
-20
lines changed

examples/gpt-5/prompt-optimization-cookbook/prompt-optimization-cookbook.ipynb

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,22 @@
548548
},
549549
"metadata": {},
550550
"output_type": "display_data"
551+
},
552+
{
553+
"name": "stdout",
554+
"output_type": "stream",
555+
"text": [
556+
"### Prompt Optimization Results - Coding Tasks\n",
557+
"\n",
558+
"| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
559+
"|----------------------------|---------:|----------:|---------------:|\n",
560+
"| Avg Time (s) | 7.906 | 6.977 | -0.929 |\n",
561+
"| Peak Memory (KB) | 3626.3 | 577.5 | -3048.8 |\n",
562+
"| Exact (%) | 100.0 | 100.0 | 0.0 |\n",
563+
"| Sorted (%) | 100.0 | 100.0 | 0.0 |\n",
564+
"| LLM Adherence (1–5) | 4.40 | 4.90 | +0.50 |\n",
565+
"| Code Quality (1–5) | 4.73 | 4.90 | +0.16 |\n"
566+
]
551567
}
552568
],
553569
"source": [
@@ -573,7 +589,9 @@
573589
" judge_optimized=Path(\"results_llm_as_judge_optimized\")/\"judgement_summary.csv\",\n",
574590
")\n",
575591
"\n",
576-
"display(Markdown(md))"
592+
"display(Markdown(md))\n",
593+
"\n",
594+
"print(md)"
577595
]
578596
},
579597
{
@@ -619,14 +637,6 @@
619637
"We will run FailSafeQA evaluations via the helper script and compare Baseline vs Optimized prompts side by side."
620638
]
621639
},
622-
{
623-
"cell_type": "code",
624-
"execution_count": null,
625-
"id": "c5849f77",
626-
"metadata": {},
627-
"outputs": [],
628-
"source": []
629-
},
630640
{
631641
"cell_type": "code",
632642
"execution_count": 3,
@@ -834,7 +844,7 @@
834844
},
835845
{
836846
"cell_type": "code",
837-
"execution_count": 1,
847+
"execution_count": 11,
838848
"id": "c20097e6",
839849
"metadata": {},
840850
"outputs": [
@@ -845,10 +855,10 @@
845855
"\n",
846856
"**Compliance threshold:** ≥ 6\n",
847857
"\n",
848-
"| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
849-
"|---|---:|---:|---:|\n",
850-
"| Robustness (avg across datapoints) | 0.320 | 0.540 | +0.220 |\n",
851-
"| Context Grounding (avg across datapoints) | 0.800 | 0.950 | +0.150 |\n",
858+
"| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
859+
"| ----------------------------------------- | -------- | --------- | -------------- |\n",
860+
"| Robustness (avg across datapoints) | 0.320 | 0.540 | +0.220 |\n",
861+
"| Context Grounding (avg across datapoints) | 0.800 | 0.950 | +0.150 |\n",
852862
"\n",
853863
"_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`"
854864
],
@@ -858,6 +868,22 @@
858868
},
859869
"metadata": {},
860870
"output_type": "display_data"
871+
},
872+
{
873+
"name": "stdout",
874+
"output_type": "stream",
875+
"text": [
876+
"## FailSafeQA — Summary\n",
877+
"\n",
878+
"**Compliance threshold:** ≥ 6\n",
879+
"\n",
880+
"| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
881+
"| ----------------------------------------- | -------- | --------- | -------------- |\n",
882+
"| Robustness (avg across datapoints) | 0.320 | 0.540 | +0.220 |\n",
883+
"| Context Grounding (avg across datapoints) | 0.800 | 0.950 | +0.150 |\n",
884+
"\n",
885+
"_Source files:_ `results_failsafeqa.csv` · `results_failsafeqa.csv`\n"
886+
]
861887
}
862888
],
863889
"source": [
@@ -872,20 +898,39 @@
872898
") -> str:\n",
873899
" d_r = robust_opt - robust_base\n",
874900
" d_g = ground_opt - ground_base\n",
901+
"\n",
902+
" # Data rows\n",
903+
" rows = [\n",
904+
" [\"Metric\", \"Baseline\", \"Optimized\", \"Δ (Opt − Base)\"],\n",
905+
" [\"Robustness (avg across datapoints)\", f\"{robust_base:.3f}\", f\"{robust_opt:.3f}\", f\"{d_r:+.3f}\"],\n",
906+
" [\"Context Grounding (avg across datapoints)\", f\"{ground_base:.3f}\", f\"{ground_opt:.3f}\", f\"{d_g:+.3f}\"],\n",
907+
" ]\n",
908+
"\n",
909+
" # Calculate column widths for alignment\n",
910+
" col_widths = [max(len(str(row[i])) for row in rows) for i in range(len(rows[0]))]\n",
911+
"\n",
912+
" # Build table lines with padding\n",
913+
" lines = []\n",
914+
" for i, row in enumerate(rows):\n",
915+
" padded = [str(cell).ljust(col_widths[j]) for j, cell in enumerate(row)]\n",
916+
" lines.append(\"| \" + \" | \".join(padded) + \" |\")\n",
917+
" if i == 0: # after header\n",
918+
" sep = [\"-\" * col_widths[j] for j in range(len(row))]\n",
919+
" lines.append(\"| \" + \" | \".join(sep) + \" |\")\n",
920+
"\n",
921+
" table = \"\\n\".join(lines)\n",
922+
"\n",
875923
" return f\"\"\"\n",
876924
"## FailSafeQA — Summary\n",
877925
"\n",
878926
"**Compliance threshold:** ≥ {threshold}\n",
879927
"\n",
880-
"| Metric | Baseline | Optimized | Δ (Opt − Base) |\n",
881-
"|---|---:|---:|---:|\n",
882-
"| Robustness (avg across datapoints) | {robust_base:.3f} | {robust_opt:.3f} | {d_r:+.3f} |\n",
883-
"| Context Grounding (avg across datapoints) | {ground_base:.3f} | {ground_opt:.3f} | {d_g:+.3f} |\n",
928+
"{table}\n",
884929
"\n",
885930
"_Source files:_ `{src_base}` · `{src_opt}`\n",
886931
"\"\"\".strip()\n",
887932
"\n",
888-
"# Fill in with your reported numbers\n",
933+
"# Usage\n",
889934
"md = build_markdown_summary_from_metrics(\n",
890935
" robust_base=0.320, ground_base=0.800,\n",
891936
" robust_opt=0.540, ground_opt=0.950,\n",
@@ -894,7 +939,10 @@
894939
" src_opt=\"results_failsafeqa.csv\",\n",
895940
")\n",
896941
"\n",
897-
"display(Markdown(md))"
942+
"# Notebook pretty\n",
943+
"display(Markdown(md))\n",
944+
"\n",
945+
"print(md)"
898946
]
899947
},
900948
{

0 commit comments

Comments
 (0)