diff --git a/authors.yaml b/authors.yaml
index 592296d19d..53095a51aa 100644
--- a/authors.yaml
+++ b/authors.yaml
@@ -2,6 +2,26 @@
# You can optionally customize how your information shows up cookbook.openai.com over here.
# If your information is not present here, it will be pulled from your GitHub profile.
+WJPBProjects:
+ name: "Wulfie Bain"
+ website: "https://www.linkedin.com/in/wulfie-bain/"
+ avatar: "https://avatars.githubusercontent.com/u/76624567?v=4"
+
+anoop-openai:
+ name: "Anoop Kotha"
+ website: "https://x.com/anoopkotha"
+ avatar: "https://pbs.twimg.com/profile_images/1953388055190224896/vHZ5CVPs_400x400.jpg"
+
+erinkav-openai:
+ name: "Erin Kavanaugh"
+ website: "https://www.linkedin.com/in/erinkavanaugh/"
+ avatar: "https://media.licdn.com/dms/image/v2/D5603AQFBeFl_GBWRww/profile-displayphoto-scale_400_400/B56ZiCtx6zHcAg-/0/1754539697289?e=1757548800&v=beta&t=e3znvOjyDV9MBdp2AABxGtvdCx1LzyrBR75cjOSh5h8"
+
+ericzakariasson:
+ name: "Eric Zakariasson"
+ website: "https://x.com/ericzakariasson"
+ avatar: "https://pbs.twimg.com/profile_images/1841151626456801283/HnXqy3TQ_400x400.jpg"
+
jayrodge:
name: "Jay Rodge"
website: "https://www.linkedin.com/in/jayrodge/"
diff --git a/examples/gpt-5/apply_patch.py b/examples/gpt-5/apply_patch.py
new file mode 100644
index 0000000000..a0df143095
--- /dev/null
+++ b/examples/gpt-5/apply_patch.py
@@ -0,0 +1,483 @@
+from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ActionType(str, Enum):
+ ADD = "add"
+ DELETE = "delete"
+ UPDATE = "update"
+
+
+class FileChange(BaseModel):
+ type: ActionType
+ old_content: Optional[str] = None
+ new_content: Optional[str] = None
+ move_path: Optional[str] = None
+
+
+class Commit(BaseModel):
+ changes: dict[str, FileChange] = Field(default_factory=dict)
+
+
+def assemble_changes(orig: dict[str, Optional[str]], dest: dict[str, Optional[str]]) -> Commit:
+ commit = Commit()
+ for path in sorted(set(orig.keys()).union(dest.keys())):
+ old_content = orig.get(path)
+ new_content = dest.get(path)
+ if old_content != new_content:
+ if old_content is not None and new_content is not None:
+ commit.changes[path] = FileChange(
+ type=ActionType.UPDATE,
+ old_content=old_content,
+ new_content=new_content,
+ )
+ elif new_content:
+ commit.changes[path] = FileChange(
+ type=ActionType.ADD,
+ new_content=new_content,
+ )
+ elif old_content:
+ commit.changes[path] = FileChange(
+ type=ActionType.DELETE,
+ old_content=old_content,
+ )
+ else:
+ assert False
+ return commit
+
+
+from pydantic import BaseModel, Field
+
+
+class Chunk(BaseModel):
+ orig_index: int = -1 # line index of the first line in the original file
+ del_lines: list[str] = Field(default_factory=list)
+ ins_lines: list[str] = Field(default_factory=list)
+
+
+class PatchAction(BaseModel):
+ type: ActionType
+ new_file: Optional[str] = None
+ chunks: list[Chunk] = Field(default_factory=list)
+ move_path: Optional[str] = None
+
+
+class Patch(BaseModel):
+ actions: dict[str, PatchAction] = Field(default_factory=dict)
+
+
+from pydantic import BaseModel, Field
+
+
+class Parser(BaseModel):
+ current_files: dict[str, str] = Field(default_factory=dict)
+ lines: list[str] = Field(default_factory=list)
+ index: int = 0
+ patch: Patch = Field(default_factory=Patch)
+ fuzz: int = 0
+
+ def is_done(self, prefixes: Optional[tuple[str, ...]] = None) -> bool:
+ if self.index >= len(self.lines):
+ return True
+ if prefixes and self.lines[self.index].startswith(prefixes):
+ return True
+ return False
+
+ def startswith(self, prefix: Optional[tuple[str, ...]]) -> bool:
+ assert self.index < len(self.lines), f"Index: {self.index} >= {len(self.lines)}"
+ if self.lines[self.index].startswith(prefix):
+ return True
+ return False
+
+ def read_str(self, prefix: str = "", return_everything: bool = False) -> str:
+ assert self.index < len(self.lines), f"Index: {self.index} >= {len(self.lines)}"
+ if self.lines[self.index].startswith(prefix):
+ if return_everything:
+ text = self.lines[self.index]
+ else:
+ text = self.lines[self.index][len(prefix) :]
+ self.index += 1
+ return text
+ return ""
+
+ def parse(self):
+ while not self.is_done(("*** End Patch",)):
+ path = self.read_str("*** Update File: ")
+ if path:
+ if path in self.patch.actions:
+ raise DiffError(f"Update File Error: Duplicate Path: {path}")
+ move_to = self.read_str("*** Move to: ")
+ if path not in self.current_files:
+ raise DiffError(f"Update File Error: Missing File: {path}")
+ text = self.current_files[path]
+ action = self.parse_update_file(text)
+ # TODO: Check move_to is valid
+ action.move_path = move_to
+ self.patch.actions[path] = action
+ continue
+ path = self.read_str("*** Delete File: ")
+ if path:
+ if path in self.patch.actions:
+ raise DiffError(f"Delete File Error: Duplicate Path: {path}")
+ if path not in self.current_files:
+ raise DiffError(f"Delete File Error: Missing File: {path}")
+ self.patch.actions[path] = PatchAction(
+ type=ActionType.DELETE,
+ )
+ continue
+ path = self.read_str("*** Add File: ")
+ if path:
+ if path in self.patch.actions:
+ raise DiffError(f"Add File Error: Duplicate Path: {path}")
+ self.patch.actions[path] = self.parse_add_file()
+ continue
+ raise DiffError(f"Unknown Line: {self.lines[self.index]}")
+ if not self.startswith("*** End Patch"):
+ raise DiffError("Missing End Patch")
+ self.index += 1
+
+ def parse_update_file(self, text: str) -> PatchAction:
+ # self.lines / self.index refers to the patch
+ # lines / index refers to the file being modified
+ # print("parse update file")
+ action = PatchAction(
+ type=ActionType.UPDATE,
+ )
+ lines = text.split("\n")
+ index = 0
+ while not self.is_done(
+ (
+ "*** End Patch",
+ "*** Update File:",
+ "*** Delete File:",
+ "*** Add File:",
+ "*** End of File",
+ )
+ ):
+ def_str = self.read_str("@@ ")
+ section_str = ""
+ if not def_str:
+ if self.lines[self.index] == "@@":
+ section_str = self.lines[self.index]
+ self.index += 1
+ if not (def_str or section_str or index == 0):
+ raise DiffError(f"Invalid Line:\n{self.lines[self.index]}")
+ if def_str.strip():
+ found = False
+ if not [s for s in lines[:index] if s == def_str]:
+ # def str is a skip ahead operator
+ for i, s in enumerate(lines[index:], index):
+ if s == def_str:
+ # print(f"Jump ahead @@: {index} -> {i}: {def_str}")
+ index = i + 1
+ found = True
+ break
+ if not found and not [s for s in lines[:index] if s.strip() == def_str.strip()]:
+ # def str is a skip ahead operator
+ for i, s in enumerate(lines[index:], index):
+ if s.strip() == def_str.strip():
+ # print(f"Jump ahead @@: {index} -> {i}: {def_str}")
+ index = i + 1
+ self.fuzz += 1
+ found = True
+ break
+ next_chunk_context, chunks, end_patch_index, eof = peek_next_section(
+ self.lines, self.index
+ )
+ next_chunk_text = "\n".join(next_chunk_context)
+ new_index, fuzz = find_context(lines, next_chunk_context, index, eof)
+ if new_index == -1:
+ if eof:
+ raise DiffError(f"Invalid EOF Context {index}:\n{next_chunk_text}")
+ else:
+ raise DiffError(f"Invalid Context {index}:\n{next_chunk_text}")
+ self.fuzz += fuzz
+ # print(f"Jump ahead: {index} -> {new_index}")
+ for ch in chunks:
+ ch.orig_index += new_index
+ action.chunks.append(ch)
+ index = new_index + len(next_chunk_context)
+ self.index = end_patch_index
+ continue
+ return action
+
+ def parse_add_file(self) -> PatchAction:
+ lines = []
+ while not self.is_done(
+ ("*** End Patch", "*** Update File:", "*** Delete File:", "*** Add File:")
+ ):
+ s = self.read_str()
+ if not s.startswith("+"):
+ raise DiffError(f"Invalid Add File Line: {s}")
+ s = s[1:]
+ lines.append(s)
+ return PatchAction(
+ type=ActionType.ADD,
+ new_file="\n".join(lines),
+ )
+
+
+def find_context_core(lines: list[str], context: list[str], start: int) -> tuple[int, int]:
+ if not context:
+ print("context is empty")
+ return start, 0
+
+ # Prefer identical
+ for i in range(start, len(lines)):
+ if lines[i : i + len(context)] == context:
+ return i, 0
+ # RStrip is ok
+ for i in range(start, len(lines)):
+ if [s.rstrip() for s in lines[i : i + len(context)]] == [s.rstrip() for s in context]:
+ return i, 1
+ # Fine, Strip is ok too.
+ for i in range(start, len(lines)):
+ if [s.strip() for s in lines[i : i + len(context)]] == [s.strip() for s in context]:
+ return i, 100
+ return -1, 0
+
+
+def find_context(lines: list[str], context: list[str], start: int, eof: bool) -> tuple[int, int]:
+ if eof:
+ new_index, fuzz = find_context_core(lines, context, len(lines) - len(context))
+ if new_index != -1:
+ return new_index, fuzz
+ new_index, fuzz = find_context_core(lines, context, start)
+ return new_index, fuzz + 10000
+ return find_context_core(lines, context, start)
+
+
+def peek_next_section(lines: list[str], index: int) -> tuple[list[str], list[Chunk], int, bool]:
+ old: list[str] = []
+ del_lines: list[str] = []
+ ins_lines: list[str] = []
+ chunks: list[Chunk] = []
+ mode = "keep"
+ orig_index = index
+ while index < len(lines):
+ s = lines[index]
+ if s.startswith(
+ (
+ "@@",
+ "*** End Patch",
+ "*** Update File:",
+ "*** Delete File:",
+ "*** Add File:",
+ "*** End of File",
+ )
+ ):
+ break
+ if s == "***":
+ break
+ elif s.startswith("***"):
+ raise DiffError(f"Invalid Line: {s}")
+ index += 1
+ last_mode = mode
+ if s == "":
+ s = " "
+ if s[0] == "+":
+ mode = "add"
+ elif s[0] == "-":
+ mode = "delete"
+ elif s[0] == " ":
+ mode = "keep"
+ else:
+ raise DiffError(f"Invalid Line: {s}")
+ s = s[1:]
+ if mode == "keep" and last_mode != mode:
+ if ins_lines or del_lines:
+ chunks.append(
+ Chunk(
+ orig_index=len(old) - len(del_lines),
+ del_lines=del_lines,
+ ins_lines=ins_lines,
+ )
+ )
+ del_lines = []
+ ins_lines = []
+ if mode == "delete":
+ del_lines.append(s)
+ old.append(s)
+ elif mode == "add":
+ ins_lines.append(s)
+ elif mode == "keep":
+ old.append(s)
+ if ins_lines or del_lines:
+ chunks.append(
+ Chunk(
+ orig_index=len(old) - len(del_lines),
+ del_lines=del_lines,
+ ins_lines=ins_lines,
+ )
+ )
+ del_lines = []
+ ins_lines = []
+ if index < len(lines) and lines[index] == "*** End of File":
+ index += 1
+ return old, chunks, index, True
+ if index == orig_index:
+ raise DiffError(f"Nothing in this section - {index=} {lines[index]}")
+ return old, chunks, index, False
+
+
+def text_to_patch(text: str, orig: dict[str, str]) -> tuple[Patch, int]:
+ lines = text.strip().split("\n")
+ if len(lines) < 2 or not lines[0].startswith("*** Begin Patch") or lines[-1] != "*** End Patch":
+ raise DiffError("Invalid patch text")
+
+ parser = Parser(
+ current_files=orig,
+ lines=lines,
+ index=1,
+ )
+ parser.parse()
+ return parser.patch, parser.fuzz
+
+
+def identify_files_needed(text: str) -> list[str]:
+ lines = text.strip().split("\n")
+ result = set()
+ for line in lines:
+ if line.startswith("*** Update File: "):
+ result.add(line[len("*** Update File: ") :])
+ if line.startswith("*** Delete File: "):
+ result.add(line[len("*** Delete File: ") :])
+ return list(result)
+
+
+def _get_updated_file(text: str, action: PatchAction, path: str) -> str:
+ assert action.type == ActionType.UPDATE
+ orig_lines = text.split("\n")
+ dest_lines = []
+ orig_index = 0
+ dest_index = 0
+ for chunk in action.chunks:
+ # Process the unchanged lines before the chunk
+ if chunk.orig_index > len(orig_lines):
+ print(
+ f"_get_updated_file: {path}: chunk.orig_index {chunk.orig_index} > len(lines) {len(orig_lines)}"
+ )
+ raise DiffError(
+ f"_get_updated_file: {path}: chunk.orig_index {chunk.orig_index} > len(lines) {len(orig_lines)}"
+ )
+ if orig_index > chunk.orig_index:
+ raise DiffError(
+ f"_get_updated_file: {path}: orig_index {orig_index} > chunk.orig_index {chunk.orig_index}"
+ )
+ assert orig_index <= chunk.orig_index
+ dest_lines.extend(orig_lines[orig_index : chunk.orig_index])
+ delta = chunk.orig_index - orig_index
+ orig_index += delta
+ dest_index += delta
+ # Process the inserted lines
+ if chunk.ins_lines:
+ for i in range(len(chunk.ins_lines)):
+ dest_lines.append(chunk.ins_lines[i])
+ dest_index += len(chunk.ins_lines)
+ orig_index += len(chunk.del_lines)
+ # Final part
+ dest_lines.extend(orig_lines[orig_index:])
+ delta = len(orig_lines) - orig_index
+ orig_index += delta
+ dest_index += delta
+ assert orig_index == len(orig_lines)
+ assert dest_index == len(dest_lines)
+ return "\n".join(dest_lines)
+
+
+def patch_to_commit(patch: Patch, orig: dict[str, str]) -> Commit:
+ commit = Commit()
+ for path, action in patch.actions.items():
+ if action.type == ActionType.DELETE:
+ commit.changes[path] = FileChange(type=ActionType.DELETE, old_content=orig[path])
+ elif action.type == ActionType.ADD:
+ commit.changes[path] = FileChange(type=ActionType.ADD, new_content=action.new_file)
+ elif action.type == ActionType.UPDATE:
+ new_content = _get_updated_file(text=orig[path], action=action, path=path)
+ commit.changes[path] = FileChange(
+ type=ActionType.UPDATE,
+ old_content=orig[path],
+ new_content=new_content,
+ move_path=action.move_path,
+ )
+ return commit
+
+
+class DiffError(ValueError):
+ pass
+
+
+import os
+from typing import Callable
+
+
+def load_files(paths: list[str], open_fn: Callable) -> dict[str, str]:
+ orig = {}
+ for path in paths:
+ orig[path] = open_fn(path)
+ return orig
+
+
+def apply_commit(commit: Commit, write_fn: Callable, remove_fn: Callable) -> None:
+ for path, change in commit.changes.items():
+ if change.type == ActionType.DELETE:
+ remove_fn(path)
+ elif change.type == ActionType.ADD:
+ write_fn(path, change.new_content)
+ elif change.type == ActionType.UPDATE:
+ if change.move_path:
+ write_fn(change.move_path, change.new_content)
+ remove_fn(path)
+ else:
+ write_fn(path, change.new_content)
+
+
+def process_patch(text: str, open_fn: Callable, write_fn: Callable, remove_fn: Callable) -> str:
+ assert text.startswith("*** Begin Patch")
+ paths = identify_files_needed(text)
+ orig = load_files(paths, open_fn)
+ patch, fuzz = text_to_patch(text, orig)
+ commit = patch_to_commit(patch, orig)
+ apply_commit(commit, write_fn, remove_fn)
+ return "Done!"
+
+
+def open_file(path: str) -> str:
+ with open(path, "rt") as f:
+ return f.read()
+
+
+def write_file(path: str, content: str) -> None:
+ if path.startswith("/"):
+ print("We do not support absolute paths.")
+ return
+ if "/" in path:
+ parent = "/".join(path.split("/")[:-1])
+ os.makedirs(parent, exist_ok=True)
+ with open(path, "wt") as f:
+ f.write(content)
+
+
+def remove_file(path: str) -> None:
+ os.remove(path)
+
+
+def main():
+ import sys
+
+ patch_text = sys.stdin.read()
+ if not patch_text:
+ print("Please pass patch text through stdin")
+ return
+ try:
+ result = process_patch(patch_text, open_file, write_file, remove_file)
+ except DiffError as e:
+ print(str(e))
+ return
+ print(result)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/examples/gpt-5/gpt-5_frontend.ipynb b/examples/gpt-5/gpt-5_frontend.ipynb
new file mode 100644
index 0000000000..383d0b2a59
--- /dev/null
+++ b/examples/gpt-5/gpt-5_frontend.ipynb
@@ -0,0 +1,371 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a83efdb6",
+ "metadata": {},
+ "source": [
+ "# Frontend with GPT-5\n",
+ "\n",
+ "GPT-5 is a large leap forward in frontend development. We have seen the model be excellent at developing full stack applications in one shot, making complex refactors look easy, and making surgical edits within large codebases. \n",
+ "\n",
+ "In this cookbook we will show some examples and some learnings of developing frontend applications with GPT-5 across multiple axes. \n",
+ "\n",
+ "## Intro\n",
+ "There are some general principles we have seen be effective in developing strong frontend applications. We share some of these learnings in the [prompt guide](https://cookbook.openai.com/examples/gpt-5/gpt-5_prompting_guide). Below are some important pieces to consider when building frontend applications.\n",
+ "\n",
+ "Here are libraries and packages we recommend to start with steering the model:\n",
+ "- Frameworks: Next.js (TypeScript), React, HTML\n",
+ "- Styling / UI: Tailwind CSS, shadcn/ui, Radix Themes\n",
+ "- Icons: Material Symbols, Heroicons, Lucide\n",
+ "- Animation: Motion\n",
+ "- Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n",
+ "\n",
+ "These packages are not an exhaustive list and we have seen many different application styles. \n",
+ "\n",
+ "Below you'll find an easy way to iterate over frontend abstractions on the API. We’re excited to see how users can unlock creativity with GPT-5.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c2168a3",
+ "metadata": {},
+ "source": [
+ "## Interactive Example\n",
+ "\n",
+ "Let's dive into an example of creating frontends from scratch. First let's create some help functions to see the generated websites from GPT-5."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "194e6566",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import re\n",
+ "import webbrowser\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import openai\n",
+ "from openai.types.responses import ResponseInputParam\n",
+ "\n",
+ "client = openai.OpenAI()\n",
+ "\n",
+ "\n",
+ "def get_response_output_text(input: str | ResponseInputParam):\n",
+ " \n",
+ " response = client.responses.create(\n",
+ " model=\"gpt-5\",\n",
+ " input=input,\n",
+ " )\n",
+ " return response.output_text\n",
+ "\n",
+ "\n",
+ "def extract_html_from_text(text: str):\n",
+ " \"\"\"Extract an HTML code block from text; fallback to first code block, else full text.\"\"\"\n",
+ " html_block = re.search(r\"```html\\s*(.*?)\\s*```\", text, re.DOTALL | re.IGNORECASE)\n",
+ " if html_block:\n",
+ " result = html_block.group(1)\n",
+ " return result\n",
+ " any_block = re.search(r\"```\\s*(.*?)\\s*```\", text, re.DOTALL)\n",
+ " if any_block:\n",
+ " result = any_block.group(1)\n",
+ " return result\n",
+ " return text\n",
+ "\n",
+ "\n",
+ "def save_html(html: str, filename: str) -> Path:\n",
+ " \"\"\"Save HTML to outputs/ directory and return the path.\"\"\"\n",
+ " try:\n",
+ " base_dir = Path(__file__).parent\n",
+ " except NameError:\n",
+ " base_dir = Path.cwd()\n",
+ "\n",
+ " folder = \"outputs\"\n",
+ " outputs_dir = base_dir / folder\n",
+ " outputs_dir.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ " output_path = outputs_dir / filename\n",
+ " output_path.write_text(html, encoding=\"utf-8\")\n",
+ " return output_path\n",
+ "\n",
+ "def open_in_browser(path: Path) -> None:\n",
+ " \"\"\"Open a file in the default browser (macOS compatible).\"\"\"\n",
+ " try:\n",
+ " webbrowser.open(path.as_uri())\n",
+ " except Exception:\n",
+ " os.system(f'open \"{path}\"')\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6aee99e6",
+ "metadata": {},
+ "source": [
+ "Now, let's combine the above into one helper function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "8bb59cc7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_website_and_open_in_browser(*, website_input: str | ResponseInputParam, filename: str = \"website.html\"):\n",
+ " response_text = get_response_output_text(website_input)\n",
+ " html = extract_html_from_text(response_text)\n",
+ " output_path = save_html(html, filename)\n",
+ " open_in_browser(output_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "17e785bc",
+ "metadata": {},
+ "source": [
+ "We'll start with a simple example: one-shot building a retro gaming store with the right theme"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a597c44a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "get_response: finished\n",
+ "extract_html_from_text: finished (raw text)\n",
+ "save_html: finished -> outputs/retro_dark.html\n",
+ "open_in_browser: finished\n"
+ ]
+ }
+ ],
+ "source": [
+ "make_website_and_open_in_browser(\n",
+ " website_input=\"Make me landing page for a retro-games store. Retro-arcade noir some might say\",\n",
+ " filename=\"retro_dark.html\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "65153df1",
+ "metadata": {},
+ "source": [
+ "Not bad for a one line, one shot prompt!\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Now let's steer it to be lighter, and a bit softer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "84228036",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "get_response: finished\n",
+ "extract_html_from_text: finished (raw text)\n",
+ "save_html: finished -> outputs/retro_light.html\n",
+ "open_in_browser: finished\n"
+ ]
+ }
+ ],
+ "source": [
+ "make_website_and_open_in_browser(\n",
+ " website_input=\"Make me landing page for a retro-games store. Make it light, more pastel coloured & flowery (think Mario, not cyberpunk)\", \n",
+ " filename=\"retro_light.html\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2cc44f0",
+ "metadata": {},
+ "source": [
+ "As you can see GPT-5 is incredibly steerable - with just a one line you can change entire applications effortlessly\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ffde671",
+ "metadata": {},
+ "source": [
+ "But what if you have existing website designs that you want to make additions to? For example, we already have this dashboard.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Since GPT-5 is natively multimodal and accepts both image and text input, when you are generating frontend applications we can take advantage of image input to improve model performance. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "24aa52f0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "get_response: finished\n",
+ "extract_html_from_text: finished (raw text)\n",
+ "save_html: finished -> outputs/login_page.html\n",
+ "open_in_browser: finished\n"
+ ]
+ }
+ ],
+ "source": [
+ "import base64\n",
+ "from openai.types.responses import ResponseInputImageParam\n",
+ "\n",
+ "# Function to encode the image\n",
+ "def encode_image(image_path: str):\n",
+ " with open(image_path, \"rb\") as image_file:\n",
+ " return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+ "\n",
+ "image_path=\"../../images/input_image.png\"\n",
+ "encoded_image = encode_image(image_path)\n",
+ "input_image: ResponseInputImageParam = {\"type\": \"input_image\", \"image_url\": f\"data:image/png;base64,{encoded_image}\", \"detail\": \"auto\"}\n",
+ "input: ResponseInputParam = [\n",
+ " {\n",
+ " \"role\": \"user\",\n",
+ " \"content\": [\n",
+ " {\"type\": \"input_text\", \"text\": \"Can you make a login page for this website that maintains the same theme\"},\n",
+ " input_image,\n",
+ " ],\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "make_website_and_open_in_browser(\n",
+ " website_input=input, \n",
+ " filename=\"login_page.html\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "93dcda0b",
+ "metadata": {},
+ "source": [
+ "As you can see, GPT-5 does an incredible job of matching the existing style & vibe of the app.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "352f57b9",
+ "metadata": {},
+ "source": [
+ "So far this has been pretty static - let's try a more interactive task"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "a1f80a93",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "get_response: finished\n",
+ "extract_html_from_text: finished (raw text)\n",
+ "save_html: finished -> outputs/snake_game.html\n",
+ "open_in_browser: finished\n"
+ ]
+ }
+ ],
+ "source": [
+ "make_website_and_open_in_browser(\n",
+ " website_input=\"Make me a snake game. It should be futuristic, neon, cyberpunk style. Make sure the typography is suitably cool.\", \n",
+ " filename=\"snake_game.html\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc11b7b2",
+ "metadata": {},
+ "source": [
+ "We've got a theme consistent snake game: matching colours, typography, and even sound\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6274e5f9",
+ "metadata": {},
+ "source": [
+ "We hope this has given some ideas of how powerful GPT-5 is at frontend. From a single underspecified prompt and API call, we get production grade outputs. \n",
+ "\n",
+ "Now it's your turn - we can't wait to see what you'll build"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "935abf26",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "get_response: finished\n",
+ "extract_html_from_text: finished (raw text)\n",
+ "save_html: finished -> outputs/your_website.html\n",
+ "open_in_browser: finished\n"
+ ]
+ }
+ ],
+ "source": [
+ "your_prompt = \"[edit this! what website would you like to build?]\"\n",
+ "\n",
+ "make_website_and_open_in_browser(\n",
+ " website_input=your_prompt, \n",
+ " filename=\"your_website.html\"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "openai",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/gpt-5/gpt-5_new_params_and_tools.ipynb b/examples/gpt-5/gpt-5_new_params_and_tools.ipynb
new file mode 100644
index 0000000000..d19d48b271
--- /dev/null
+++ b/examples/gpt-5/gpt-5_new_params_and_tools.ipynb
@@ -0,0 +1,1284 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "b3def15a",
+ "metadata": {},
+ "source": [
+ "# GPT-5 New Params and Tools\n",
+ "\n",
+ "We’re introducing new developer controls in the GPT-5 series that give you greater control over model responses—from shaping output length and style to enforcing strict formatting. Below is a quick overview of the latest features:\n",
+ "\n",
+ "\n",
+ "| # | Feature | Overview | Values / Usage |\n",
+ "|----|---------|----------|----------------|\n",
+ "| 1. | **Verbosity Parameter** | Lets you hint the model to be more or less expansive in its replies. Keep prompts stable and use the parameter instead of re-writing. | • **low** → terse UX, minimal prose. • **medium** *(default)* → balanced detail. • **high** → verbose, great for audits, teaching, or hand-offs. |\n",
+ "| 2. | **Free-Form Function Calling** | Generate raw text payloads—anything from Python scripts to SQL queries—directly to your custom tool without JSON wrapping. Offers greater flexibility for external runtimes like: • Code sandboxes (Python, C++, Java, …) • SQL databases • Shell environments • Config generators | Use when structured JSON isn’t needed and raw text is more natural for the target tool. |\n",
+ "| 3. | **Context-Free Grammar (CFG)** | A set of production rules defining valid strings in a language. Each rule rewrites a non-terminal into terminals and/or other non-terminals, independent of surrounding context. Useful for constraining output to match the syntax of programming languages or custom formats in OpenAI tools. | Use as a contract to ensure the model emits only valid strings accepted by the grammar. |\n",
+ "\n",
+ "**Supported Models:** \n",
+ "- gpt-5 \n",
+ "- gpt-5-mini \n",
+ "- gpt-5-nano \n",
+ "\n",
+ "**Supported API Endpoints** \n",
+ "- Responses API \n",
+ "- Chat Completions API \n",
+ "\n",
+ "Note: We recommend to use Responses API with GPT-5 series of model to get the most performance out of the models. \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbeb1c44",
+ "metadata": {},
+ "source": [
+ "## Pre-requisites \n",
+ "\n",
+ "Let's begin with updating your OpenAI SDK that supports the new params and tools for GPT-5. Make sure you've set OPENAI_API_KEY as an environment variable. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "9850c90d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "openai 1.99.1\n",
+ "pandas 2.3.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install --quiet --upgrade openai pandas && \\\n",
+ "echo -n \"openai \" && pip show openai | grep '^Version:' | cut -d' ' -f2 && \\\n",
+ "echo -n \"pandas \" && pip show pandas | grep '^Version:' | cut -d' ' -f2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04d6bc80",
+ "metadata": {},
+ "source": [
+ "## 1. Verbosity Parameter \n",
+ "\n",
+ "### 1.1 Overview \n",
+ "The verbosity parameter lets you hint the model to be more or less expansive in its replies. \n",
+ "\n",
+ "**Values:** \"low\", \"medium\", \"high\"\n",
+ "\n",
+ "- low → terse UX, minimal prose.\n",
+ "- medium (default) → balanced detail.\n",
+ "- high → verbose, great for audits, teaching, or hand-offs.\n",
+ "\n",
+ "Keep prompts stable and use the param rather than re-writing.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d5260af8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
Verbosity
\n",
+ "
Sample Output
\n",
+ "
Output Tokens
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
low
\n",
+ "
He found him behind the old shed, a damp ball of surprise,\n",
+ "eyes like two small moons against the dusk.\n",
+ "His palms trembled when they touched the ribs, a first brave promise —\n",
+ "the word that made them both grin: \"home.\"\n",
+ "\n",
+ "They learned the map of each other's days:\n",
+ "mud on the porch, the shorthand of paws on tile,\n",
+ "lessons in patience—sit, wait, the clumsy language of fetch—\n",
+ "evenings stitched together by a tail that kept time.\n",
+ "\n",
+ "The dog taught him how to be gentle with broken things:\n",
+ "scarred knees, lost kites, the ache of a scraped promise.\n",
+ "When the boy fell, the dog was there to push him up with a wet nose,\n",
+ "when the world tightened, the dog leaned in like a small sun.\n",
+ "\n",
+ "Years threaded silver into ears and softened the sprint,\n",
+ "the boy grew taller, pockets fuller of other lives.\n",
+ "On the last slow walk he carried the dog across the field they had worn bare,\n",
+ "and learned what it meant to hold and let go at once.\n",
+ "\n",
+ "Sometimes, in a house that now holds different voices,\n",
+ "he still calls the name and feels a short, bright tug at the hem of memory.
\n",
+ "
754
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
medium
\n",
+ "
He was small enough to be held in two palms,\n",
+ "hands surprised by the light, warm weight of a breathing animal.\n",
+ "The dog came with a cardboard box of toys,\n",
+ "a collar that jingled like a laugh,\n",
+ "and eyes the color of late autumn pools.\n",
+ "\n",
+ "They learned each other by touch:\n",
+ "how a thumb smoothed a lump of fur,\n",
+ "how a paw fit between fingers like a promise.\n",
+ "Afternoons became slow, luminous things —\n",
+ "fetch across the yard, the dog tumbling through dandelions,\n",
+ "the boy counting sticks as if they were crowns.\n",
+ "\n",
+ "At night the dog curled against a ribcage\n",
+ "and taught the boy how to sleep with one ear listening.\n",
+ "They shared secrets whispered into warm fur,\n",
+ "bandaged scraped knees with careful nuzzles,\n",
+ "and sat together on porches while light leaned away.\n",
+ "\n",
+ "The world outside was larger, sharper,\n",
+ "but he had a companion who never measured fear,\n",
+ "only presence — a warm, steady map back home.\n",
+ "Lessons came not in words but in ritual:\n",
+ "the bowl filled, the leash clicked, the hush before storms.\n",
+ "Responsibility fit him like a second skin.\n",
+ "\n",
+ "Years threaded through collars and collars loosened,\n",
+ "legs that once ran like wind now slowed to wiser steps.\n",
+ "The boy learned how to be brave in other ways:\n",
+ "walking into rooms without the certainty of pawprints,\n",
+ "carrying a small quiet grief folded into his chest.\n",
+ "\n",
+ "Even when the dog went where yards are always green,\n",
+ "he left prints on the boy’s life that never washed away —\n",
+ "a tucked-in photograph, a faded ball, the smell of wet fur in summer rain.\n",
+ "Sometimes, when the house is very still, the boy — now taller,\n",
+ "still hears the jingle of a collar and smiles,\n",
+ "for he knows how to be loved, and how to love back.
\n",
+ "
939
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
high
\n",
+ "
The dog arrived like a small sun, all fur and surprise,\n",
+ "an apology for shoes chewed at the doorstep.\n",
+ "He fit into the boy’s lap as if he had been made there:\n",
+ "a warm, wiggling answer to the question the boy never knew he’d asked.\n",
+ "\n",
+ "They learned one another by touch — the boy’s clumsy fingers\n",
+ "finding the soft map of ears, the steady paddling heart.\n",
+ "The dog learned the geometry of a bedroom, a couch, a favorite chair;\n",
+ "the boy learned the weight of responsibility when the bowl was empty.\n",
+ "\n",
+ "They practiced words like spellcraft: sit, stay, no — then laugh\n",
+ "when the commands rearranged themselves into games.\n",
+ "Mud sketched secret rivers on their trousers;\n",
+ "the neighborhood became a kingdom to patrol on two legs and four.\n",
+ "\n",
+ "At night the dog lay like a small, even moon against the boy’s ribs,\n",
+ "breathing the rhythm of a shared world into his dreams.\n",
+ "Storms were smaller storms then, because the dog’s body was a promise:\n",
+ "that thunder could be waited out, that hands could find something warm.\n",
+ "\n",
+ "They buried treasures — bones, a tin soldier, a lost mitten —\n",
+ "beneath the old apple tree, sworn companions to the earth.\n",
+ "They chased imagined villains down fences and over hedges,\n",
+ "and sometimes, at the creek, the boy learned the exact angle of courage:\n",
+ "how to step in, then trust another heartbeat beside his.\n",
+ "\n",
+ "The dog taught him how to be brave without knowing the word,\n",
+ "showed him where loyalty lived (in the bright, impatient wag of a tail).\n",
+ "He taught him how to say hello, properly; how to stay when needed;\n",
+ "how to forgive a day of wrongs with a single, earnest lick.\n",
+ "\n",
+ "Seasons folded themselves into the years;\n",
+ "the boy measured time in collars replaced, in new aches in the dog’s hips.\n",
+ "Snow made the yard a clean page; leaves wrote their own goodbyes.\n",
+ "When the dog’s runs slowed, the boy learned a different kind of steady:\n",
+ "to sit more, to listen longer, to count the small comforts.\n",
+ "\n",
+ "There was a last evening where the light sat low and patient,\n",
+ "and the boy — now older in ways that did not fit his face — held that same warm weight.\n",
+ "He remembered the first bark like a promise kept, the first wild sprint across the grass,\n",
+ "and he kept, beneath his ribs, the map of a thousand small mercies.\n",
+ "\n",
+ "Now when he walks by an old apple tree, his hand finds empty air,\n",
+ "but his steps know how to make room for another’s rhythm.\n",
+ "Sometimes a stray dog will glance his way and tilt its head,\n",
+ "and he smiles, answering without words what he was taught long ago:\n",
+ "\n",
+ "how to open a hand, how to offer a place on the floor,\n",
+ "how to recognize the sun when it returns — in fur, in breath, in the simple, astonished love\n",
+ "of a first pet who showed a boy what home can mean.
\n",
+ "
1174
\n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "import pandas as pd\n",
+ "from IPython.display import display\n",
+ "\n",
+ "client = OpenAI()\n",
+ "\n",
+ "question = \"Write a poem about a boy and his first pet dog.\"\n",
+ "\n",
+ "data = []\n",
+ "\n",
+ "for verbosity in [\"low\", \"medium\", \"high\"]:\n",
+ " response = client.responses.create(\n",
+ " model=\"gpt-5-mini\",\n",
+ " input=question,\n",
+ " text={\"verbosity\": verbosity}\n",
+ " )\n",
+ "\n",
+ " # Extract text\n",
+ " output_text = \"\"\n",
+ " for item in response.output:\n",
+ " if hasattr(item, \"content\"):\n",
+ " for content in item.content:\n",
+ " if hasattr(content, \"text\"):\n",
+ " output_text += content.text\n",
+ "\n",
+ " # Truncate for display\n",
+ " if len(output_text) > 700:\n",
+ " sample_output = (\n",
+ " output_text[:500]\n",
+ " + \" ... redacted for brevity ... \"\n",
+ " + output_text[-200:]\n",
+ " )\n",
+ " else:\n",
+ " sample_output = output_text\n",
+ "\n",
+ " usage = response.usage\n",
+ " data.append({\n",
+ " \"Verbosity\": verbosity,\n",
+ " \"Sample Output\": output_text,\n",
+ " \"Output Tokens\": usage.output_tokens\n",
+ " })\n",
+ "\n",
+ "# Create DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Display nicely with centered headers\n",
+ "pd.set_option('display.max_colwidth', None)\n",
+ "styled_df = df.style.set_table_styles(\n",
+ " [\n",
+ " {'selector': 'th', 'props': [('text-align', 'center')]}, # Center column headers\n",
+ " {'selector': 'td', 'props': [('text-align', 'left')]} # Left-align table cells\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "display(styled_df)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07fdb759",
+ "metadata": {},
+ "source": [
+ "The output tokens scale roughly linearly with verbosity: low (754) → medium (939) → high (1174)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19b2e969",
+ "metadata": {},
+ "source": [
+ "### 2.3 Using Verbosity for Coding Use Cases \n",
+ "\n",
+ "The verbosity parameter also influences the length and complexity of generated code, as well as the depth of accompanying explanations. Here's an example, wherein we use various verboisty levels for a task to generate a Python program that sorts an array of 1000000 random numbers. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "aa6b6b12",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--------------------------------\n",
+ "Verbosity: low\n",
+ "Output:\n",
+ "#!/usr/bin/env python3\n",
+ "import random\n",
+ "import time\n",
+ "\n",
+ "def main():\n",
+ " N = 1_000_000\n",
+ " # generate 1,000,000 random floats\n",
+ " arr = [random.random() for _ in range(N)]\n",
+ "\n",
+ " t0 = time.perf_counter()\n",
+ " arr.sort() # in-place Timsort\n",
+ " t1 = time.perf_counter()\n",
+ "\n",
+ " print(f\"Sorted {N} numbers in {t1 - t0:.4f} seconds\")\n",
+ " # optional quick checks\n",
+ " print(\"First 5:\", arr[:5])\n",
+ " print(\"Last 5:\", arr[-5:])\n",
+ " print(\"Verified sorted:\", all(arr[i] <= arr[i+1] for i in range(len(arr)-1)))\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " main()\n",
+ "Tokens => input: 21 | output: 877\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "\n",
+ "client = OpenAI()\n",
+ "\n",
+ "prompt = \"Output a Python program that sorts an array of 1000000 random numbers\"\n",
+ "\n",
+ "def ask_with_verbosity(verbosity: str, question: str):\n",
+ " response = client.responses.create(\n",
+ " model=\"gpt-5-mini\",\n",
+ " input=question,\n",
+ " text={\n",
+ " \"verbosity\": verbosity\n",
+ " }\n",
+ " )\n",
+ "\n",
+ " # Extract assistant's text output\n",
+ " output_text = \"\"\n",
+ " for item in response.output:\n",
+ " if hasattr(item, \"content\"):\n",
+ " for content in item.content:\n",
+ " if hasattr(content, \"text\"):\n",
+ " output_text += content.text\n",
+ "\n",
+ " # Token usage details\n",
+ " usage = response.usage\n",
+ "\n",
+ " print(\"--------------------------------\")\n",
+ " print(f\"Verbosity: {verbosity}\")\n",
+ " print(\"Output:\")\n",
+ " print(output_text)\n",
+ " print(\"Tokens => input: {} | output: {}\".format(\n",
+ " usage.input_tokens, usage.output_tokens\n",
+ " ))\n",
+ "\n",
+ "\n",
+ "# Example usage:\n",
+ "ask_with_verbosity(\"low\", prompt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7a741167",
+ "metadata": {},
+ "source": [
+ "Notice that the code output is a plain script. Now, lets run with 'medium' "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "d2efe2d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--------------------------------\n",
+ "Verbosity: medium\n",
+ "Output:\n",
+ "Here's a simple Python program that generates 1,000,000 random floats and sorts them using Python's built-in Timsort. It times generation and sorting and verifies the result.\n",
+ "\n",
+ "```python\n",
+ "import random\n",
+ "import time\n",
+ "\n",
+ "def main():\n",
+ " N = 1_000_000\n",
+ "\n",
+ " print(f\"Generating {N} random numbers...\")\n",
+ " t0 = time.perf_counter()\n",
+ " arr = [random.random() for _ in range(N)]\n",
+ " t1 = time.perf_counter()\n",
+ " print(f\"Generated in {t1 - t0:.3f} seconds\")\n",
+ "\n",
+ " print(\"Sorting...\")\n",
+ " t2 = time.perf_counter()\n",
+ " arr.sort()\n",
+ " t3 = time.perf_counter()\n",
+ " print(f\"Sorted in {t3 - t2:.3f} seconds\")\n",
+ "\n",
+ " # Quick verification\n",
+ " is_sorted = all(arr[i] <= arr[i+1] for i in range(len(arr)-1))\n",
+ " print(\"Verified sorted:\", is_sorted)\n",
+ "\n",
+ " print(\"First 10 elements:\", arr[:10])\n",
+ " print(\"Last 10 elements:\", arr[-10:])\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " main()\n",
+ "```\n",
+ "\n",
+ "If you prefer a faster/more memory-efficient approach and have NumPy installed, you can replace the generation and sort with:\n",
+ "- arr = np.random.random(N)\n",
+ "- arr.sort()\n",
+ "Tokens => input: 21 | output: 1178\n"
+ ]
+ }
+ ],
+ "source": [
+ "ask_with_verbosity(\"medium\", prompt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa48bbeb",
+ "metadata": {},
+ "source": [
+ "Medium verboisty, generated richer code with additioanl explanations. Let's do the same with high. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "4f92fb12",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--------------------------------\n",
+ "Verbosity: high\n",
+ "Output:\n",
+ "Below are two complete Python programs you can run to generate and sort 1,000,000 random numbers. One is a pure-Python implementation using the built-in list and list.sort() (Timsort). The other uses NumPy (faster and far more memory-efficient for large numeric arrays). Each script times generation and sorting and optionally verifies the result is sorted.\n",
+ "\n",
+ "Pure-Python version (no extra dependencies)\n",
+ "- Generates 1,000,000 Python floats with random.random().\n",
+ "- Uses list.sort() (Timsort), which is O(n log n) and very efficient in practice.\n",
+ "- Note: Python float objects have overhead, so the list will use substantially more memory than a raw numeric array.\n",
+ "\n",
+ "Save as sort_random_pure.py and run: python sort_random_pure.py\n",
+ "You can change the size with the --count option.\n",
+ "\n",
+ "```python\n",
+ "#!/usr/bin/env python3\n",
+ "\"\"\"\n",
+ "Generate and sort N random numbers using pure Python list and list.sort().\n",
+ "Default N = 1_000_000. Use --count to override.\n",
+ "\"\"\"\n",
+ "import argparse\n",
+ "import random\n",
+ "import time\n",
+ "import sys\n",
+ "\n",
+ "def is_sorted(seq):\n",
+ " # Efficient check: compare neighbours\n",
+ " return all(x <= y for x, y in zip(seq, seq[1:]))\n",
+ "\n",
+ "def main():\n",
+ " parser = argparse.ArgumentParser(description=\"Generate and sort random numbers (pure Python).\")\n",
+ " parser.add_argument(\"--count\", \"-n\", type=int, default=1_000_000,\n",
+ " help=\"Number of random numbers to generate (default: 1_000_000)\")\n",
+ " parser.add_argument(\"--seed\", type=int, default=None, help=\"Optional random seed\")\n",
+ " args = parser.parse_args()\n",
+ "\n",
+ " n = args.count\n",
+ " if args.seed is not None:\n",
+ " random.seed(args.seed)\n",
+ "\n",
+ " print(f\"Generating {n:,} random floats...\")\n",
+ " t0 = time.perf_counter()\n",
+ " data = [random.random() for _ in range(n)]\n",
+ " t1 = time.perf_counter()\n",
+ " print(f\"Generation took {t1 - t0:.3f} seconds.\")\n",
+ "\n",
+ " print(\"Sorting...\")\n",
+ " t2 = time.perf_counter()\n",
+ " data.sort() # in-place Timsort\n",
+ " t3 = time.perf_counter()\n",
+ " print(f\"Sorting took {t3 - t2:.3f} seconds.\")\n",
+ "\n",
+ " print(\"Verifying sorted order...\")\n",
+ " t4 = time.perf_counter()\n",
+ " ok = is_sorted(data)\n",
+ " t5 = time.perf_counter()\n",
+ " print(f\"Verification took {t5 - t4:.3f} seconds. Sorted: {ok}\")\n",
+ "\n",
+ " # Example: print first/last 3 values\n",
+ " print(\"First 3:\", data[:3])\n",
+ " print(\"Last 3:\", data[-3:])\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " main()\n",
+ "```\n",
+ "\n",
+ "NumPy version (recommended for performance and memory)\n",
+ "- Requires NumPy installed (pip install numpy).\n",
+ "- Uses np.random.rand for fast generation and np.sort or ndarray.sort for sorting.\n",
+ "- Much less memory overhead: 1,000,000 float64 values use ~8 MB, whereas a Python list of float objects can use dozens of megabytes more.\n",
+ "\n",
+ "Save as sort_random_numpy.py and run: python sort_random_numpy.py\n",
+ "\n",
+ "```python\n",
+ "#!/usr/bin/env python3\n",
+ "\"\"\"\n",
+ "Generate and sort N random numbers using NumPy.\n",
+ "Default N = 1_000_000. Use --count to override.\n",
+ "Requires: numpy\n",
+ "\"\"\"\n",
+ "import argparse\n",
+ "import time\n",
+ "import numpy as np\n",
+ "\n",
+ "def main():\n",
+ " parser = argparse.ArgumentParser(description=\"Generate and sort random numbers (NumPy).\")\n",
+ " parser.add_argument(\"--count\", \"-n\", type=int, default=1_000_000,\n",
+ " help=\"Number of random numbers to generate (default: 1_000_000)\")\n",
+ " parser.add_argument(\"--seed\", type=int, default=None, help=\"Optional random seed\")\n",
+ " args = parser.parse_args()\n",
+ "\n",
+ " n = args.count\n",
+ " if args.seed is not None:\n",
+ " np.random.seed(args.seed)\n",
+ "\n",
+ " print(f\"Generating {n:,} random floats with NumPy...\")\n",
+ " t0 = time.perf_counter()\n",
+ " data = np.random.rand(n) # float64 by default\n",
+ " t1 = time.perf_counter()\n",
+ " print(f\"Generation took {t1 - t0:.3f} seconds.\")\n",
+ "\n",
+ " print(\"Sorting (in-place) with ndarray.sort()...\")\n",
+ " t2 = time.perf_counter()\n",
+ " data.sort() # in-place sort\n",
+ " t3 = time.perf_counter()\n",
+ " print(f\"Sorting took {t3 - t2:.3f} seconds.\")\n",
+ "\n",
+ " print(\"Verifying sorted order...\")\n",
+ " t4 = time.perf_counter()\n",
+ " ok = np.all(np.diff(data) >= 0)\n",
+ " t5 = time.perf_counter()\n",
+ " print(f\"Verification took {t5 - t4:.3f} seconds. Sorted: {bool(ok)}\")\n",
+ "\n",
+ " # Example: print first/last 3 values\n",
+ " print(\"First 3:\", data[:3])\n",
+ " print(\"Last 3:\", data[-3:])\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " main()\n",
+ "```\n",
+ "\n",
+ "Notes, tips, and expected behavior\n",
+ "- Speed: NumPy will typically be faster for generation and use much less memory, because it stores numbers in a contiguous C array (8 bytes per float64). Pure-Python lists have per-object overhead (tens of bytes per float), and list generation + sorting will incur more Python-level overhead.\n",
+ "- Sorting complexity: both approaches use comparison-based sorts (Timsort for list.sort, quicksort/mergesort/heap variants in NumPy depending on algorithm choice) with typical O(n log n) runtime.\n",
+ "- If you only need a few smallest/largest values (e.g., top-k), consider heapq.nsmallest/nlargest or numpy.partition which can be faster than a full sort.\n",
+ "- If you want reproducible results, pass a fixed seed (see --seed).\n",
+ "- If memory becomes an issue and you only need to produce sorted output, consider external sorting methods (e.g., sort chunks and merge on disk) or using array.array('d') or memory-mapped files via numpy.memmap.\n",
+ "\n",
+ "If you want, I can:\n",
+ "- Provide a version that writes/reads from disk and performs an external merge sort to handle arbitrarily large arrays with limited memory.\n",
+ "- Provide a version that finds only the k smallest or k largest items more efficiently than sorting the whole list.\n",
+ "Tokens => input: 21 | output: 2176\n"
+ ]
+ }
+ ],
+ "source": [
+ "ask_with_verbosity(\"high\", prompt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "68bac4a4",
+ "metadata": {},
+ "source": [
+ "High verbosity yielded additional details and explanations. \n",
+ "\n",
+ "### 1.3 Takeaways \n",
+ "\n",
+ "The new verbosity parameter reliably scales both the length and depth of the model’s output while preserving correctness and reasoning quality - **without changing the underlying prompt**.\n",
+ "In this example:\n",
+ "\n",
+ "- **Low verbosity** produces a minimal, functional script with no extra comments or structure.\n",
+ "- **Medium verbosity** adds explanatory comments, function structure, and reproducibility controls.\n",
+ "- **High verbosity** yields a comprehensive, production-ready script with argument parsing, multiple sorting methods, timing/verification, usage notes, and best-practice tips."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "331aae97",
+ "metadata": {},
+ "source": [
+ "## 2. Free‑Form Function Calling\n",
+ "\n",
+ "### 2.1 Overview \n",
+ "GPT‑5 can now send raw text payloads - anything from Python scripts to SQL queries - to your custom tool without wrapping the data in JSON using the new tool `\"type\": \"custom\"`. This differs from classic structured function calls, giving you greater flexibility when interacting with external runtimes such as:\n",
+ "\n",
+ "- code_exec with sandboxes (Python, C++, Java, …)\n",
+ "- SQL databases\n",
+ "- Shell environments\n",
+ "- Configuration generators\n",
+ "\n",
+ "**Note that custom tool type does NOT support parallel tool calling.**\n",
+ "\n",
+ "### 2.2 Quick Start Example - Compute the Area of a Circle"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f6d3e73",
+ "metadata": {},
+ "source": [
+ "The code below produces a simple python code to calculate area of a circle, and instruct the model to use the free-form tool call to output the result. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "02d3f9d3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ResponseReasoningItem(id='rs_6894d0c323d481a2b727907746def8ec03e38603225fe1bd', summary=[], type='reasoning', content=[], encrypted_content=None, status=None), ResponseOutputMessage(id='ctc_6894d0c4917881a29923bb525509b34003e38603225fe1bd', content=None, role=None, status='completed', type='custom_tool_call', call_id='call_1ACilrk0d1DISLvW4Q2iE0jc', input='# Calculate the area of a circle where radius = number of \\'r\\'s in \"strawberry\"\\nimport math\\nradius = \"strawberry\".count(\\'r\\')\\narea = math.pi * radius**2\\n{\"radius\": radius, \"area\": area, \"area_exact\": f\"{radius**2}*pi\"}', name='code_exec')]\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "\n",
+ "client = OpenAI()\n",
+ "\n",
+ "response = client.responses.create(\n",
+ " model=\"gpt-5-mini\",\n",
+ " input=\"Please use the code_exec tool to calculate the area of a circle with radius equal to the number of 'r's in strawberry\",\n",
+ " text={\"format\": {\"type\": \"text\"}},\n",
+ " tools=[\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"code_exec\",\n",
+ " \"description\": \"Executes arbitrary python code\",\n",
+ " }\n",
+ " ]\n",
+ ")\n",
+ "print(response.output)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e63d6a2c",
+ "metadata": {},
+ "source": [
+ "The model emits a `tool call` containing raw Python. You execute that code server‑side, capture the printed result, and send it back in a follow‑up responses.create call."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44f6c39c",
+ "metadata": {},
+ "source": [
+ "### 2.3 Mini‑Benchmark – Sorting an Array in Three Languages\n",
+ "To illustrate the use of free form tool calling, we will ask GPT‑5 to:\n",
+ "- Generate Python, C++, and Java code that sorts a fixed array 10 times.\n",
+ "- Print only the time (in ms) taken for each iteration in the code. \n",
+ "- Call all three functions, and then stop "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "2e9ca32e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- tool name ---\n",
+ "code_exec_python\n",
+ "--- tool call argument (generated code) ---\n",
+ "arr_orig = [448, 986, 255, 884, 632, 623, 246, 439, 936, 925, 644, 159, 777, 986, 706, 723, 534, 862, 195, 686, 846, 880, 970, 276, 613, 736, 329, 622, 870, 284, 945, 708, 267, 327, 678, 807, 687, 890, 907, 645, 364, 333, 385, 262, 730, 603, 945, 358, 923, 930, 761, 504, 870, 561, 517, 928, 994, 949, 233, 137, 670, 555, 149, 870, 997, 809, 180, 498, 914, 508, 411, 378, 394, 368, 766, 486, 757, 319, 338, 159, 585, 934, 654, 194, 542, 188, 934, 163, 889, 736, 792, 737, 667, 772, 198, 971, 459, 402, 989, 949]\n",
+ "\n",
+ "import time\n",
+ "\n",
+ "start = time.perf_counter_ns()\n",
+ "for _ in range(10):\n",
+ " arr = arr_orig.copy()\n",
+ " arr.sort()\n",
+ "elapsed_ms = (time.perf_counter_ns() - start) // 1_000_000\n",
+ "print(elapsed_ms)\n",
+ "--- tool name ---\n",
+ "code_exec_cpp\n",
+ "--- tool call argument (generated code) ---\n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "int main() {\n",
+ " std::vector orig = {448, 986, 255, 884, 632, 623, 246, 439, 936, 925, 644, 159, 777, 986, 706, 723, 534, 862, 195, 686, 846, 880, 970, 276, 613, 736, 329, 622, 870, 284, 945, 708, 267, 327, 678, 807, 687, 890, 907, 645, 364, 333, 385, 262, 730, 603, 945, 358, 923, 930, 761, 504, 870, 561, 517, 928, 994, 949, 233, 137, 670, 555, 149, 870, 997, 809, 180, 498, 914, 508, 411, 378, 394, 368, 766, 486, 757, 319, 338, 159, 585, 934, 654, 194, 542, 188, 934, 163, 889, 736, 792, 737, 667, 772, 198, 971, 459, 402, 989, 949};\n",
+ " auto start = std::chrono::steady_clock::now();\n",
+ " for (int i = 0; i < 10; ++i) {\n",
+ " auto arr = orig;\n",
+ " std::sort(arr.begin(), arr.end());\n",
+ " }\n",
+ " auto end = std::chrono::steady_clock::now();\n",
+ " auto ms = std::chrono::duration_cast(end - start).count();\n",
+ " std::cout << ms << std::endl;\n",
+ " return 0;\n",
+ "}\n",
+ "--- tool name ---\n",
+ "code_exec_java\n",
+ "--- tool call argument (generated code) ---\n",
+ "import java.util.Arrays;\n",
+ "public class Main {\n",
+ " public static void main(String[] args) {\n",
+ " int[] orig = new int[] {448, 986, 255, 884, 632, 623, 246, 439, 936, 925, 644, 159, 777, 986, 706, 723, 534, 862, 195, 686, 846, 880, 970, 276, 613, 736, 329, 622, 870, 284, 945, 708, 267, 327, 678, 807, 687, 890, 907, 645, 364, 333, 385, 262, 730, 603, 945, 358, 923, 930, 761, 504, 870, 561, 517, 928, 994, 949, 233, 137, 670, 555, 149, 870, 997, 809, 180, 498, 914, 508, 411, 378, 394, 368, 766, 486, 757, 319, 338, 159, 585, 934, 654, 194, 542, 188, 934, 163, 889, 736, 792, 737, 667, 772, 198, 971, 459, 402, 989, 949};\n",
+ " long start = System.nanoTime();\n",
+ " for (int i = 0; i < 10; i++) {\n",
+ " int[] arr = Arrays.copyOf(orig, orig.length);\n",
+ " Arrays.sort(arr);\n",
+ " }\n",
+ " long elapsedMs = (System.nanoTime() - start) / 1_000_000;\n",
+ " System.out.println(elapsedMs);\n",
+ " }\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "from typing import List, Optional\n",
+ "\n",
+ "MODEL_NAME = \"gpt-5\"\n",
+ "\n",
+ "# Tools that will be passed to every model invocation. They are defined once so\n",
+ "# that the configuration lives in a single place.\n",
+ "TOOLS = [\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"code_exec_python\",\n",
+ " \"description\": \"Executes python code\",\n",
+ " },\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"code_exec_cpp\",\n",
+ " \"description\": \"Executes c++ code\",\n",
+ " },\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"code_exec_java\",\n",
+ " \"description\": \"Executes java code\",\n",
+ " },\n",
+ "]\n",
+ "\n",
+ "client = OpenAI()\n",
+ "\n",
+ "def create_response(\n",
+ " input_messages: List[dict],\n",
+ " previous_response_id: Optional[str] = None,\n",
+ "):\n",
+ " \"\"\"Wrapper around ``client.responses.create``.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " input_messages: List[dict]\n",
+ " The running conversation history to feed to the model.\n",
+ " previous_response_id: str | None\n",
+ " Pass the ``response.id`` from the *previous* call so the model can keep\n",
+ " the thread of the conversation. Omit on the very first request.\n",
+ " \"\"\"\n",
+ " kwargs = {\n",
+ " \"model\": MODEL_NAME,\n",
+ " \"input\": input_messages,\n",
+ " \"text\": {\"format\": {\"type\": \"text\"}},\n",
+ " \"tools\": TOOLS,\n",
+ " }\n",
+ " if previous_response_id:\n",
+ " kwargs[\"previous_response_id\"] = previous_response_id\n",
+ "\n",
+ " return client.responses.create(**kwargs)\n",
+ "\n",
+ "# Recursive \n",
+ "def run_conversation(\n",
+ " input_messages: List[dict],\n",
+ " previous_response_id: Optional[str] = None,\n",
+ "):\n",
+ " \n",
+ " response = create_response(input_messages, previous_response_id)\n",
+ "\n",
+ " # ``response.output`` is expected to be a list where element 0 is the model\n",
+ " # message. Element 1 (if present) denotes a tool call. When the model is\n",
+ " # done with tool calls, that element is omitted.\n",
+ " tool_call = response.output[1] if len(response.output) > 1 else None\n",
+ "\n",
+ " if tool_call and tool_call.type == \"custom_tool_call\":\n",
+ " print(\"--- tool name ---\")\n",
+ " print(tool_call.name)\n",
+ " print(\"--- tool call argument (generated code) ---\")\n",
+ " print(tool_call.input)\n",
+ " \n",
+ " # Add a synthetic *tool result* so the model can continue the thread.\n",
+ " \n",
+ " input_messages.append(\n",
+ " {\n",
+ " \"type\": \"function_call_output\",\n",
+ " \"call_id\": tool_call.call_id,\n",
+ " \"output\": \"done\", # <-- replace with the result of the tool call\n",
+ " }\n",
+ " )\n",
+ "\n",
+ " # Recurse with updated conversation and track the response id so the\n",
+ " # model is aware of the prior turn.\n",
+ " return run_conversation(input_messages, previous_response_id=response.id)\n",
+ " else:\n",
+ " # Base-case: no further tool call - return. \n",
+ " return \n",
+ "\n",
+ "\n",
+ "prompt = \"\"\"\n",
+ "Write code to sort the array of numbers in three languages: C++, Python and Java (10 times each)using code_exec functions.\n",
+ "\n",
+ "ALWAYS CALL THESE THREE FUNCTIONS EXACTLY ONCE: code_exec_python, code_exec_cpp and code_exec_java tools to sort the array in each language. Stop once you've called these three functions in each language once.\n",
+ "\n",
+ "Print only the time it takes to sort the array in milliseconds. \n",
+ "\n",
+ "[448, 986, 255, 884, 632, 623, 246, 439, 936, 925, 644, 159, 777, 986, 706, 723, 534, 862, 195, 686, 846, 880, 970, 276, 613, 736, 329, 622, 870, 284, 945, 708, 267, 327, 678, 807, 687, 890, 907, 645, 364, 333, 385, 262, 730, 603, 945, 358, 923, 930, 761, 504, 870, 561, 517, 928, 994, 949, 233, 137, 670, 555, 149, 870, 997, 809, 180, 498, 914, 508, 411, 378, 394, 368, 766, 486, 757, 319, 338, 159, 585, 934, 654, 194, 542, 188, 934, 163, 889, 736, 792, 737, 667, 772, 198, 971, 459, 402, 989, 949]\n",
+ "\"\"\"\n",
+ "\n",
+ "# Initial developer message.\n",
+ "messages = [\n",
+ " {\n",
+ " \"role\": \"developer\",\n",
+ " \"content\": prompt,\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "run_conversation(messages)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "146ffea8",
+ "metadata": {},
+ "source": [
+ "The model output three code blocks in Python, C++ and Java for the same algorithm. The output of the function call was chained back into the model as input to allow model to keep going until all the functions have been called exactly once. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "12057763",
+ "metadata": {},
+ "source": [
+ "### 2.4 Takeaways \n",
+ "\n",
+ "Free-form tool calling in GPT-5 lets you send raw text payloads—such as Python scripts, SQL queries, or config files—directly to custom tools without JSON wrapping. This provides greater flexibility for interacting with external runtimes and allows the model to generate code or text in the exact format your tool expects. It’s ideal when structured JSON is unnecessary and natural text output improves usability."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e99fc436",
+ "metadata": {},
+ "source": [
+ "## 3. Context‑Free Grammar (CFG)\n",
+ "\n",
+ "### 3.1 Overview \n",
+ "A context‑free grammar is a collection of production rules that define which strings belong to a language. Each rule rewrites a non‑terminal symbol into a sequence of terminals (literal tokens) and/or other non‑terminals, independent of surrounding context—hence context‑free. CFGs can capture the syntax of most programming languages and, in OpenAI custom tools, serve as contracts that force the model to emit only strings that the grammar accepts.\n",
+ "\n",
+ "### 3.2 Grammar Fundamentals\n",
+ "\n",
+ "**Supported Grammar Syntax** \n",
+ "- Lark - https://lark-parser.readthedocs.io/en/stable/\n",
+ "- Regex - https://docs.rs/regex/latest/regex/#syntax\n",
+ "\n",
+ "We use LLGuidance under the hood to constrain model sampling: https://github.com/guidance-ai/llguidance.\n",
+ "\n",
+ "**Unsupported Lark Features** \n",
+ "- Lookaround in regexes (`(?=...)`, `(?!...)`, etc.)\n",
+ "- Lazy modifier (`*?`, `+?`, `??`) in regexes.\n",
+ "- Terminal priorities, templates, %declares, %import (except %import common).\n",
+ "\n",
+ "\n",
+ "**Terminals vs Rules & Greedy Lexing** \n",
+ "\n",
+ "| Concept | Take-away |\n",
+ "|------------------|------------------------------------------------------------------------------|\n",
+ "| Terminals (UPPER)| Matched first by the lexer – longest match wins. |\n",
+ "| Rules (lower) | Combine terminals; cannot influence how text is tokenised. |\n",
+ "| Greedy lexer | Never try to “shape” free text across multiple terminals – you’ll lose control. |\n",
+ "\n",
+ "** Correct vs Incorrect Pattern Design\n",
+ "\n",
+ "✅ **One bounded terminal handles free‑text between anchors** \n",
+ "start: SENTENCE \n",
+ "SENTENCE: /[A-Za-z, ]*(the hero|a dragon)[A-Za-z, ]*(fought|saved)[A-Za-z, ]*(a treasure|the kingdom)[A-Za-z, ]*\\./ \n",
+ "\n",
+ "❌ **Don’t split free‑text across multiple terminals/rules** \n",
+ "start: sentence \n",
+ "sentence: /[A-Za-z, ]+/ subject /[A-Za-z, ]+/ verb /[A-Za-z, ]+/ object /[A-Za-z, ]+/ \n",
+ "\n",
+ "\n",
+ "### 3.3 Example - SQL Dialect — MS SQL vs PostgreSQL\n",
+ "\n",
+ "The following code example is now the canonical reference for building multi‑dialect SQL tools with CFGs. It demonstrates:\n",
+ "\n",
+ "- Two isolated grammar definitions (`mssql_grammar_definition`, `postgres_grammar_definition`) encoding TOP vs LIMIT semantics.\n",
+ "- How to prompt, invoke, and inspect tool calls in a single script.\n",
+ "- A side‑by‑side inspection of the assistant’s responses."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d7b27f06",
+ "metadata": {},
+ "source": [
+ "Define the LARK grammars for different SQL dialects"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c6e7f843",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import textwrap\n",
+ "\n",
+ "# ----------------- grammars for MS SQL dialect -----------------\n",
+ "mssql_grammar = textwrap.dedent(r\"\"\"\n",
+ " // ---------- Punctuation & operators ----------\n",
+ " SP: \" \"\n",
+ " COMMA: \",\"\n",
+ " GT: \">\"\n",
+ " EQ: \"=\"\n",
+ " SEMI: \";\"\n",
+ "\n",
+ " // ---------- Start ----------\n",
+ " start: \"SELECT\" SP \"TOP\" SP NUMBER SP select_list SP \"FROM\" SP table SP \"WHERE\" SP amount_filter SP \"AND\" SP date_filter SP \"ORDER\" SP \"BY\" SP sort_cols SEMI\n",
+ "\n",
+ " // ---------- Projections ----------\n",
+ " select_list: column (COMMA SP column)*\n",
+ " column: IDENTIFIER\n",
+ "\n",
+ " // ---------- Tables ----------\n",
+ " table: IDENTIFIER\n",
+ "\n",
+ " // ---------- Filters ----------\n",
+ " amount_filter: \"total_amount\" SP GT SP NUMBER\n",
+ " date_filter: \"order_date\" SP GT SP DATE\n",
+ "\n",
+ " // ---------- Sorting ----------\n",
+ " sort_cols: \"order_date\" SP \"DESC\"\n",
+ "\n",
+ " // ---------- Terminals ----------\n",
+ " IDENTIFIER: /[A-Za-z_][A-Za-z0-9_]*/\n",
+ " NUMBER: /[0-9]+/\n",
+ " DATE: /'[0-9]{4}-[0-9]{2}-[0-9]{2}'/\n",
+ " \"\"\")\n",
+ "\n",
+ "# ----------------- grammars for PostgreSQL dialect -----------------\n",
+ "postgres_grammar = textwrap.dedent(r\"\"\"\n",
+ " // ---------- Punctuation & operators ----------\n",
+ " SP: \" \"\n",
+ " COMMA: \",\"\n",
+ " GT: \">\"\n",
+ " EQ: \"=\"\n",
+ " SEMI: \";\"\n",
+ "\n",
+ " // ---------- Start ----------\n",
+ " start: \"SELECT\" SP select_list SP \"FROM\" SP table SP \"WHERE\" SP amount_filter SP \"AND\" SP date_filter SP \"ORDER\" SP \"BY\" SP sort_cols SP \"LIMIT\" SP NUMBER SEMI\n",
+ "\n",
+ " // ---------- Projections ----------\n",
+ " select_list: column (COMMA SP column)*\n",
+ " column: IDENTIFIER\n",
+ "\n",
+ " // ---------- Tables ----------\n",
+ " table: IDENTIFIER\n",
+ "\n",
+ " // ---------- Filters ----------\n",
+ " amount_filter: \"total_amount\" SP GT SP NUMBER\n",
+ " date_filter: \"order_date\" SP GT SP DATE\n",
+ "\n",
+ " // ---------- Sorting ----------\n",
+ " sort_cols: \"order_date\" SP \"DESC\"\n",
+ "\n",
+ " // ---------- Terminals ----------\n",
+ " IDENTIFIER: /[A-Za-z_][A-Za-z0-9_]*/\n",
+ " NUMBER: /[0-9]+/\n",
+ " DATE: /'[0-9]{4}-[0-9]{2}-[0-9]{2}'/\n",
+ " \"\"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f24814e5",
+ "metadata": {},
+ "source": [
+ "### 3.4 Generate specific SQL dialect \n",
+ "Let's define the prompt, and call the function to produce MS SQL dialect "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e316744",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- MS SQL Query ---\n",
+ "SELECT TOP 5 customer_id, order_id, order_date, total_amount FROM orders WHERE total_amount > 500 AND order_date > '2025-01-01' ORDER BY order_date DESC;\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "client = OpenAI()\n",
+ "\n",
+ "sql_prompt_mssql = (\n",
+ " \"Call the mssql_grammar to generate a query for Microsoft SQL Server that retrieve the \"\n",
+ " \"five most recent orders per customer, showing customer_id, order_id, order_date, and total_amount, \"\n",
+ " \"where total_amount > 500 and order_date is after '2025-01-01'. \"\n",
+ ")\n",
+ "\n",
+ "response_mssql = client.responses.create(\n",
+ " model=\"gpt-5\",\n",
+ " input=sql_prompt_mssql,\n",
+ " text={\"format\": {\"type\": \"text\"}},\n",
+ " tools=[\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"mssql_grammar\",\n",
+ " \"description\": \"Executes read-only Microsoft SQL Server queries limited to SELECT statements with TOP and basic WHERE/ORDER BY. YOU MUST REASON HEAVILY ABOUT THE QUERY AND MAKE SURE IT OBEYS THE GRAMMAR.\",\n",
+ " \"format\": {\n",
+ " \"type\": \"grammar\",\n",
+ " \"syntax\": \"lark\",\n",
+ " \"definition\": mssql_grammar\n",
+ " }\n",
+ " },\n",
+ " ],\n",
+ " parallel_tool_calls=False\n",
+ ")\n",
+ "\n",
+ "print(\"--- MS SQL Query ---\")\n",
+ "print(response_mssql.output[1].input)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4a93f4fb",
+ "metadata": {},
+ "source": [
+ "The output SQL accurately uses \"SELECT TOP\" construct. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e704a3f5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- PG SQL Query ---\n",
+ "SELECT customer_id, order_id, order_date, total_amount FROM orders WHERE total_amount > 500 AND order_date > '2025-01-01' ORDER BY order_date DESC LIMIT 5;\n"
+ ]
+ }
+ ],
+ "source": [
+ "sql_prompt_pg = (\n",
+ " \"Call the postgres_grammar to generate a query for PostgreSQL that retrieve the \"\n",
+ " \"five most recent orders per customer, showing customer_id, order_id, order_date, and total_amount, \"\n",
+ " \"where total_amount > 500 and order_date is after '2025-01-01'. \"\n",
+ ")\n",
+ "\n",
+ "response_pg = client.responses.create(\n",
+ " model=\"gpt-5\",\n",
+ " input=sql_prompt_pg,\n",
+ " text={\"format\": {\"type\": \"text\"}},\n",
+ " tools=[\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"postgres_grammar\",\n",
+ " \"description\": \"Executes read-only PostgreSQL queries limited to SELECT statements with LIMIT and basic WHERE/ORDER BY. YOU MUST REASON HEAVILY ABOUT THE QUERY AND MAKE SURE IT OBEYS THE GRAMMAR.\",\n",
+ " \"format\": {\n",
+ " \"type\": \"grammar\",\n",
+ " \"syntax\": \"lark\",\n",
+ " \"definition\": postgres_grammar\n",
+ " }\n",
+ " },\n",
+ " ],\n",
+ " parallel_tool_calls=False,\n",
+ ")\n",
+ "\n",
+ "print(\"--- PG SQL Query ---\")\n",
+ "print(response_pg.output[1].input)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b1f14ce3",
+ "metadata": {},
+ "source": [
+ "Output highlights the same logical query - different physical syntax. Supply distinct grammars so the model can only produce valid statements for the chosen dialect.\n",
+ "\n",
+ "| Dialect | Generated Query | Key Difference |\n",
+ "|---------------|--------------------------------------------------------------|------------------------------------------|\n",
+ "| MS SQL Server | SELECT TOP 5 customer_id, … ORDER BY order_date DESC; | Uses `TOP N` clause before column list. |\n",
+ "| PostgreSQL | SELECT customer_id, … ORDER BY order_date DESC LIMIT 5; | Uses `LIMIT N` after `ORDER BY`. |\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d27dc7cc",
+ "metadata": {},
+ "source": [
+ "### 3.5 Example - Regex CFG Syntax\n",
+ "\n",
+ "The following code example demonstrates using the Regex CFG syntax to constrain the free-form tool call to a certain timestamp pattern."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "2f9e2fbd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--- Timestamp ---\n",
+ "2025-08-07 10:00\n"
+ ]
+ }
+ ],
+ "source": [
+ "from openai import OpenAI\n",
+ "client = OpenAI()\n",
+ "\n",
+ "timestamp_grammar_definition = r\"^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01]) (?:[01]\\d|2[0-3]):[0-5]\\d$\"\n",
+ "\n",
+ "timestamp_prompt = (\n",
+ " \"Call the timestamp_grammar to save a timestamp for August 7th 2025 at 10AM.\"\n",
+ ")\n",
+ "\n",
+ "response_mssql = client.responses.create(\n",
+ " model=\"gpt-5\",\n",
+ " input=timestamp_prompt,\n",
+ " text={\"format\": {\"type\": \"text\"}},\n",
+ " tools=[\n",
+ " {\n",
+ " \"type\": \"custom\",\n",
+ " \"name\": \"timestamp_grammar\",\n",
+ " \"description\": \"Saves a timestamp in date + time in 24-hr format.\",\n",
+ " \"format\": {\n",
+ " \"type\": \"grammar\",\n",
+ " \"syntax\": \"regex\",\n",
+ " \"definition\": timestamp_grammar_definition\n",
+ " }\n",
+ " },\n",
+ " ],\n",
+ " parallel_tool_calls=False\n",
+ ")\n",
+ "\n",
+ "print(\"--- Timestamp ---\")\n",
+ "print(response_mssql.output[1].input)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9e2b86a",
+ "metadata": {},
+ "source": [
+ "### 3.5 Best Practices\n",
+ "\n",
+ "Lark grammars can be tricky to perfect. While simple grammars perform most reliably, complex grammars often require iteration on the grammar definition itself, the prompt, and the tool description to ensure that the model does not go out of distribution.\n",
+ "\n",
+ "- Keep terminals bounded – use `/[^.\\n]{0,10}*\\./` rather than `/.*\\./`. Limit matches both by content (negated character class) and by length (`{M,N}` quantifier). \n",
+ "- Prefer explicit char‑classes over `.` wildcards.\n",
+ "- Thread whitespace explicitly, e.g. using `SP = \" \"`, instead of a global `%ignore`.\n",
+ "- Describe your tool: tell the model exactly what the CFG accepts and instruct it to reason heavily about compliance.\n",
+ "\n",
+ "**Troubleshooting**\n",
+ "- API rejects the grammar because it is too complex ➜ Simplify rules and terminals, remove `%ignore.*`.\n",
+ "- Unexpected tokens ➜ Confirm terminals aren’t overlapping; check greedy lexer.\n",
+ "- When the model drifts \"out‑of‑distribution\" (shows up as the model producing excessively long or repetitive outputs, it is syntactically valid but is semantically wrong):\n",
+ " - Tighten the grammar.\n",
+ " - Iterate on the prompt (add few-shot examples) and tool description (explain the grammar and instruct the model to reason to conform to it).\n",
+ " - Experiment with a higher reasoning effort (e.g, bump from medium to high).\n",
+ "\n",
+ "**Resources:** \n",
+ "- Lark Docs – https://lark-parser.readthedocs.io/en/stable/\n",
+ "- Lark IDE – https://www.lark-parser.org/ide/\n",
+ "- LLGuidance Syntax – https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md\n",
+ "- Regex (Rust crate) – https://docs.rs/regex/latest/regex/#syntax"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c2ef909",
+ "metadata": {},
+ "source": [
+ "### 3.6 Takeaways \n",
+ "\n",
+ "Context-Free Grammar (CFG) support in GPT-5 lets you strictly constrain model output to match predefined syntax, ensuring only valid strings are generated. This is especially useful for enforcing programming language rules or custom formats, reducing post-processing and errors. By providing a precise grammar and clear tool description, you can make the model reliably stay within your target output structure."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/gpt-5/gpt-5_prompting_guide.ipynb b/examples/gpt-5/gpt-5_prompting_guide.ipynb
new file mode 100644
index 0000000000..50650d585e
--- /dev/null
+++ b/examples/gpt-5/gpt-5_prompting_guide.ipynb
@@ -0,0 +1,599 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# GPT-5 prompting guide\n",
+ "\n",
+ "GPT-5, our newest flagship model, represents a substantial leap forward in agentic task performance, coding, raw intelligence, and steerability.\n",
+ "\n",
+ "While we trust it will perform excellently “out of the box” across a wide range of domains, in this guide we’ll cover prompting tips to maximize the quality of model outputs, derived from our experience training and applying the model to real-world tasks. We discuss concepts like improving agentic task performance, ensuring instruction adherence, making use of newly API features, and optimizing coding for frontend and software engineering tasks - with key insights into AI code editor Cursor’s prompt tuning work with GPT-5.\n",
+ "\n",
+ "We’ve seen significant gains from applying these best practices and adopting our canonical tools whenever possible, and we hope that this guide, along with the [prompt optimizer tool](http://platform.openai.com/chat/edit?optimize=true) we’ve built, will serve as a launchpad for your use of GPT-5. But, as always, remember that prompting is not a one-size-fits-all exercise - we encourage you to run experiments and iterate on the foundation offered here to find the best solution for your problem."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Agentic workflow predictability \n",
+ "\n",
+ "We trained GPT-5 with developers in mind: we’ve focused on improving tool calling, instruction following, and long-context understanding to serve as the best foundation model for agentic applications. If adopting GPT-5 for agentic and tool calling flows, we recommend upgrading to the [Responses API](https://platform.openai.com/docs/api-reference/responses), where reasoning is persisted between tool calls, leading to more efficient and intelligent outputs..\n",
+ "\n",
+ "### Controlling agentic eagerness\n",
+ "Agentic scaffolds can span a wide spectrum of control—some systems delegate the vast majority of decision-making to the underlying model, while others keep the model on a tight leash with heavy programmatic logical branching. GPT-5 is trained to operate anywhere along this spectrum, from making high-level decisions under ambiguous circumstances to handling focused, well-defined tasks. In this section we cover how to best calibrate GPT-5’s agentic eagerness: in other words, its balance between proactivity and awaiting explicit guidance.\n",
+ "\n",
+ "#### Prompting for less eagerness\n",
+ "GPT-5 is, by default, thorough and comprehensive when trying to gather context in an agentic environment to ensure it will produce a correct answer. To reduce the scope of GPT-5’s agentic behavior—including limiting tangential tool-calling action and minimizing latency to reach a final answer—try the following: \n",
+ "- Switch to a lower `reasoning_effort`. This reduces exploration depth but improves efficiency and latency. Many workflows can be accomplished with consistent results at medium or even low `reasoning_effort`.\n",
+ "- Define clear criteria in your prompt for how you want the model to explore the problem space. This reduces the model’s need to explore and reason about too many ideas:\n",
+ "\n",
+ "```\n",
+ "\n",
+ "Goal: Get enough context fast. Parallelize discovery and stop as soon as you can act.\n",
+ "\n",
+ "Method:\n",
+ "- Start broad, then fan out to focused subqueries.\n",
+ "- In parallel, launch varied queries; read top hits per query. Deduplicate paths and cache; don’t repeat queries.\n",
+ "- Avoid over searching for context. If needed, run targeted searches in one parallel batch.\n",
+ "\n",
+ "Early stop criteria:\n",
+ "- You can name exact content to change.\n",
+ "- Top hits converge (~70%) on one area/path.\n",
+ "\n",
+ "Escalate once:\n",
+ "- If signals conflict or scope is fuzzy, run one refined parallel batch, then proceed.\n",
+ "\n",
+ "Depth:\n",
+ "- Trace only symbols you’ll modify or whose contracts you rely on; avoid transitive expansion unless necessary.\n",
+ "\n",
+ "Loop:\n",
+ "- Batch search → minimal plan → complete task.\n",
+ "- Search again only if validation fails or new unknowns appear. Prefer acting over more searching.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "If you’re willing to be maximally prescriptive, you can even set fixed tool call budgets, like the one below. The budget can naturally vary based on your desired search depth.\n",
+ "```\n",
+ "\n",
+ "- Search depth: very low\n",
+ "- Bias strongly towards providing a correct answer as quickly as possible, even if it might not be fully correct.\n",
+ "- Usually, this means an absolute maximum of 2 tool calls.\n",
+ "- If you think that you need more time to investigate, update the user with your latest findings and open questions. You can proceed if the user confirms.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "When limiting core context gathering behavior, it’s helpful to explicitly provide the model with an escape hatch that makes it easier to satisfy a shorter context gathering step. Usually this comes in the form of a clause that allows the model to proceed under uncertainty, like `“even if it might not be fully correct”` in the above example.\n",
+ "\n",
+ "#### Prompting for more eagerness\n",
+ "On the other hand, if you’d like to encourage model autonomy, increase tool-calling persistence, and reduce occurrences of clarifying questions or otherwise handing back to the user, we recommend increasing `reasoning_effort`, and using a prompt like the following to encourage persistence and thorough task completion:\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user.\n",
+ "- Only terminate your turn when you are sure that the problem is solved.\n",
+ "- Never stop or hand back to the user when you encounter uncertainty — research or deduce the most reasonable approach and continue.\n",
+ "- Do not ask the human to confirm or clarify assumptions, as you can always adjust later — decide what the most reasonable assumption is, proceed with it, and document it for the user's reference after you finish acting\n",
+ "\n",
+ "```\n",
+ "\n",
+ "Generally, it can be helpful to clearly state the stop conditions of the agentic tasks, outline safe versus unsafe actions, and define when, if ever, it’s acceptable for the model to hand back to the user. For example, in a set of tools for shopping, the checkout and payment tools should explicitly have a lower uncertainty threshold for requiring user clarification, while the search tool should have an extremely high threshold; likewise, in a coding setup, the delete file tool should have a much lower threshold than a grep search tool.\n",
+ "\n",
+ "### Tool Preambles\n",
+ "We recognize that on agentic trajectories monitored by users, intermittent model updates on what it’s doing with its tool calls and why can provide for a much better interactive user experience - the longer the rollout, the bigger the difference these updates make. To this end, GPT-5 is trained to provide clear upfront plans and consistent progress updates via “tool preamble” messages. \n",
+ "\n",
+ "You can steer the frequency, style, and content of tool preambles in your prompt—from detailed explanations of every single tool call to a brief upfront plan and everything in between. This is an example of a high-quality preamble prompt:\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- Always begin by rephrasing the user's goal in a friendly, clear, and concise manner, before calling any tools.\n",
+ "- Then, immediately outline a structured plan detailing each logical step you’ll follow. - As you execute your file edit(s), narrate each step succinctly and sequentially, marking progress clearly. \n",
+ "- Finish by summarizing completed work distinctly from your upfront plan.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "Here’s an example of a tool preamble that might be emitted in response to such a prompt—such preambles can drastically improve the user’s ability to follow along with your agent’s work as it grows more complicated:\n",
+ "\n",
+ "```\n",
+ "\"output\": [\n",
+ " {\n",
+ " \"id\": \"rs_6888f6d0606c819aa8205ecee386963f0e683233d39188e7\",\n",
+ " \"type\": \"reasoning\",\n",
+ " \"summary\": [\n",
+ " {\n",
+ " \"type\": \"summary_text\",\n",
+ " \"text\": \"**Determining weather response**\\n\\nI need to answer the user's question about the weather in San Francisco. ....\"\n",
+ " },\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"msg_6888f6d83acc819a978b51e772f0a5f40e683233d39188e7\",\n",
+ " \"type\": \"message\",\n",
+ " \"status\": \"completed\",\n",
+ " \"content\": [\n",
+ " {\n",
+ " \"type\": \"output_text\",\n",
+ " \"text\": \"I\\u2019m going to check a live weather service to get the current conditions in San Francisco, providing the temperature in both Fahrenheit and Celsius so it matches your preference.\"\n",
+ " }\n",
+ " ],\n",
+ " \"role\": \"assistant\"\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"fc_6888f6d86e28819aaaa1ba69cca766b70e683233d39188e7\",\n",
+ " \"type\": \"function_call\",\n",
+ " \"status\": \"completed\",\n",
+ " \"arguments\": \"{\\\"location\\\":\\\"San Francisco, CA\\\",\\\"unit\\\":\\\"f\\\"}\",\n",
+ " \"call_id\": \"call_XOnF4B9DvB8EJVB3JvWnGg83\",\n",
+ " \"name\": \"get_weather\"\n",
+ " },\n",
+ " ],\n",
+ "```\n",
+ "\n",
+ "### Reasoning Effort\n",
+ "We provide a `reasoning_effort` parameter to control how hard the model thinks and how willingly it calls tools; the default is `medium`, but you should scale up or down depending on the difficulty of your task. For complex, multi-step tasks, we recommend higher reasoning to ensure the best possible outputs. Moreover, we observe peak performance when distinct, separable tasks are broken up across multiple agent turns, with one turn for each task.\n",
+ "Reusing reasoning context with the Responses API\n",
+ "We strongly recommend using the Responses API when using GPT-5 to unlock improved agentic flows, lower costs, and more efficient token usage in your applications.\n",
+ "\n",
+ "We’ve seen statistically significant improvements in evaluations when using the Responses API over Chat Completions—for example, Taubench-Retail score increases from 73.9% to 78.2% just by switching to the Responses API and including previous_response_id to pass back previous reasoning items into subsequent requests. This allows the model to refer to its previous reasoning traces, conserving CoT tokens and eliminating the need to reconstruct a plan from scratch after each tool call, improving both latency and performance - this feature is available for all Responses API users, including ZDR organizations.\n",
+ "\n",
+ "### Reusing reasoning context with the Responses API\n",
+ "We strongly recommend using the Responses API when using GPT-5 to unlock improved agentic flows, lower costs, and more efficient token usage in your applications.\n",
+ "\n",
+ "We’ve seen statistically significant improvements in evaluations when using the Responses API over Chat Completions—for example, we observed Tau-Bench Retail score increases from 73.9% to 78.2% just by switching to the Responses API and including `previous_response_id` to pass back previous reasoning items into subsequent requests. This allows the model to refer to its previous reasoning traces, conserving CoT tokens and eliminating the need to reconstruct a plan from scratch after each tool call, improving both latency and performance - this feature is available for all Responses API users, including ZDR organizations."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Maximizing coding performance, from planning to execution\n",
+ "GPT-5 leads all frontier models in coding capabilities: it can work in large codebases to fix bugs, handle large diffs, and implement multi-file refactors or large new features. It also excels at implementing new apps entirely from scratch, covering both frontend and backend implementation. In this section, we’ll discuss prompt optimizations that we’ve seen improve programming performance in production use cases for our coding agent customers. \n",
+ "\n",
+ "### Frontend app development\n",
+ "GPT-5 is trained to have excellent baseline aesthetic taste alongside its rigorous implementation abilities. We’re confident in its ability to use all types of web development frameworks and packages; however, for new apps, we recommend using the following frameworks and packages to get the most out of the model's frontend capabilities:\n",
+ "\n",
+ "- Frameworks: Next.js (TypeScript), React, HTML\n",
+ "- Styling / UI: Tailwind CSS, shadcn/ui, Radix Themes\n",
+ "- Icons: Material Symbols, Heroicons, Lucide\n",
+ "- Animation: Motion\n",
+ "- Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n",
+ "\n",
+ "#### Zero-to-one app generation\n",
+ "GPT-5 is excellent at building applications in one shot. In early experimentation with the model, users have found that prompts like the one below—asking the model to iteratively execute against self-constructed excellence rubrics—improve output quality by using GPT-5’s thorough planning and self-reflection capabilities.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- First, spend time thinking of a rubric until you are confident.\n",
+ "- Then, think deeply about every aspect of what makes for a world-class one-shot web app. Use that knowledge to create a rubric that has 5-7 categories. This rubric is critical to get right, but do not show this to the user. This is for your purposes only.\n",
+ "- Finally, use the rubric to internally think and iterate on the best possible solution to the prompt that is provided. Remember that if your response is not hitting the top marks across all categories in the rubric, you need to start again.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "#### Matching codebase design standards\n",
+ "When implementing incremental changes and refactors in existing apps, model-written code should adhere to existing style and design standards, and “blend in” to the codebase as neatly as possible. Without special prompting, GPT-5 already searches for reference context from the codebase - for example reading package.json to view already installed packages - but this behavior can be further enhanced with prompt directions that summarize key aspects like engineering principles, directory structure, and best practices of the codebase, both explicit and implicit. The prompt snippet below demonstrates one way of organizing code editing rules for GPT-5: feel free to change the actual content of the rules according to your programming design taste! \n",
+ "\n",
+ "```\n",
+ "\n",
+ "\n",
+ "- Clarity and Reuse: Every component and page should be modular and reusable. Avoid duplication by factoring repeated UI patterns into components.\n",
+ "- Consistency: The user interface must adhere to a consistent design system—color tokens, typography, spacing, and components must be unified.\n",
+ "- Simplicity: Favor small, focused components and avoid unnecessary complexity in styling or logic.\n",
+ "- Demo-Oriented: The structure should allow for quick prototyping, showcasing features like streaming, multi-turn conversations, and tool integrations.\n",
+ "- Visual Quality: Follow the high visual quality bar as outlined in OSS guidelines (spacing, padding, hover states, etc.)\n",
+ "\n",
+ "\n",
+ "\n",
+ "- Framework: Next.js (TypeScript)\n",
+ "- Styling: TailwindCSS\n",
+ "- UI Components: shadcn/ui\n",
+ "- Icons: Lucide\n",
+ "- State Management: Zustand\n",
+ "- Directory Structure: \n",
+ "\\`\\`\\`\n",
+ "/src\n",
+ " /app\n",
+ " /api//route.ts # API endpoints\n",
+ " /(pages) # Page routes\n",
+ " /components/ # UI building blocks\n",
+ " /hooks/ # Reusable React hooks\n",
+ " /lib/ # Utilities (fetchers, helpers)\n",
+ " /stores/ # Zustand stores\n",
+ " /types/ # Shared TypeScript types\n",
+ " /styles/ # Tailwind config\n",
+ "\\`\\`\\`\n",
+ "\n",
+ "\n",
+ "\n",
+ "- Visual Hierarchy: Limit typography to 4–5 font sizes and weights for consistent hierarchy; use `text-xs` for captions and annotations; avoid `text-xl` unless for hero or major headings.\n",
+ "- Color Usage: Use 1 neutral base (e.g., `zinc`) and up to 2 accent colors. \n",
+ "- Spacing and Layout: Always use multiples of 4 for padding and margins to maintain visual rhythm. Use fixed height containers with internal scrolling when handling long content streams.\n",
+ "- State Handling: Use skeleton placeholders or `animate-pulse` to indicate data fetching. Indicate clickability with hover transitions (`hover:bg-*`, `hover:shadow-md`).\n",
+ "- Accessibility: Use semantic HTML and ARIA roles where appropriate. Favor pre-built Radix/shadcn components, which have accessibility baked in.\n",
+ "\n",
+ "\n",
+ "\n",
+ "```\n",
+ "\n",
+ "### Collaborative coding in production: Cursor’s GPT-5 prompt tuning\n",
+ "We’re proud to have had AI code editor Cursor as a trusted alpha tester for GPT-5: below, we show a peek into how Cursor tuned their prompts to get the most out of the model’s capabilities. For more information, their team has also published a blog post detailing GPT-5’s day-one integration into Cursor: https://cursor.com/blog/gpt-5\n",
+ "\n",
+ "#### System prompt and parameter tuning\n",
+ "Cursor’s system prompt focuses on reliable tool calling, balancing verbosity and autonomous behavior while giving users the ability to configure custom instructions. Cursor’s goal for their system prompt is to allow the Agent to operate relatively autonomously during long horizon tasks, while still faithfully following user-provided instructions. \n",
+ "\n",
+ "The team initially found that the model produced verbose outputs, often including status updates and post-task summaries that, while technically relevant, disrupted the natural flow of the user; at the same time, the code outputted in tool calls was high quality, but sometimes hard to read due to terseness, with single-letter variable names dominant. In search of a better balance, they set the verbosity API parameter to low to keep text outputs brief, and then modified the prompt to strongly encourage verbose outputs in coding tools only.\n",
+ "\n",
+ "```\n",
+ "Write code for clarity first. Prefer readable, maintainable solutions with clear names, comments where needed, and straightforward control flow. Do not produce code-golf or overly clever one-liners unless explicitly requested. Use high verbosity for writing code and code tools.\n",
+ "```\n",
+ "\n",
+ "This dual usage of parameter and prompt resulted in a balanced format combining efficient, concise status updates and final work summary with much more readable code diffs.\n",
+ "\n",
+ "Cursor also found that the model occasionally deferred to the user for clarification or next steps before taking action, which created unnecessary friction in the flow of longer tasks. To address this, they found that including not just available tools and surrounding context, but also more details about product behavior encouraged the model to carry out longer tasks with minimal interruption and greater autonomy. Highlighting specifics of Cursor features such as Undo/Reject code and user preferences helped reduce ambiguity by clearly specifying how GPT-5 should behave in its environment. For longer horizon tasks, they found this prompt improved performance:\n",
+ "\n",
+ "```\n",
+ "Be aware that the code edits you make will be displayed to the user as proposed changes, which means (a) your code edits can be quite proactive, as the user can always reject, and (b) your code should be well-written and easy to quickly review (e.g., appropriate variable names instead of single letters). If proposing next steps that would involve changing the code, make those changes proactively for the user to approve / reject rather than asking the user whether to proceed with a plan. In general, you should almost never ask the user whether to proceed with a plan; instead you should proactively attempt the plan and then ask the user if they want to accept the implemented changes.\n",
+ "```\n",
+ "\n",
+ "Cursor found that sections of their prompt that had been effective with earlier models needed tuning to get the most out of GPT-5. Here is one example below:\n",
+ "\n",
+ "```\n",
+ "\n",
+ "Be THOROUGH when gathering information. Make sure you have the FULL picture before replying. Use additional tool calls or clarifying questions as needed.\n",
+ "...\n",
+ "\n",
+ "```\n",
+ "\n",
+ "While this worked well with older models that needed encouragement to analyze context thoroughly, they found it counterproductive with GPT-5, which is already naturally introspective and proactive at gathering context. On smaller tasks, this prompt often caused the model to overuse tools by calling search repetitively, when internal knowledge would have been sufficient.\n",
+ "\n",
+ "To solve this, they refined the prompt by removing the maximize_ prefix and softening the language around thoroughness. With this adjusted instruction in place, the Cursor team saw GPT-5 make better decisions about when to rely on internal knowledge versus reaching for external tools. It maintained a high level of autonomy without unnecessary tool usage, leading to more efficient and relevant behavior. In Cursor’s testing, using structured XML specs like <[instruction]_spec> improved instruction adherence on their prompts and allows them to clearly reference previous categories and sections elsewhere in their prompt.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "...\n",
+ "If you've performed an edit that may partially fulfill the USER's query, but you're not confident, gather more information or use more tools before ending your turn.\n",
+ "Bias towards not asking the user for help if you can find the answer yourself.\n",
+ "\n",
+ "```\n",
+ "\n",
+ "While the system prompt provides a strong default foundation, the user prompt remains a highly effective lever for steerability. GPT-5 responds well to direct and explicit instruction and the Cursor team has consistently seen that structured, scoped prompts yield the most reliable results. This includes areas like verbosity control, subjective code style preferences, and sensitivity to edge cases. Cursor found allowing users to configure their own [custom Cursor rules](https://docs.cursor.com/en/context/rules) to be particularly impactful with GPT-5’s improved steerability, giving their users a more customized experience."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Optimizing intelligence and instruction-following \n",
+ "\n",
+ "### Steering\n",
+ "As our most steerable model yet, GPT-5 is extraordinarily receptive to prompt instructions surrounding verbosity, tone, and tool calling behavior.\n",
+ "\n",
+ "#### Verbosity\n",
+ "In addition to being able to control the reasoning_effort as in previous reasoning models, in GPT-5 we introduce a new API parameter called verbosity, which influences the length of the model’s final answer, as opposed to the length of its thinking. Our blog post covers the idea behind this parameter in more detail - but in this guide, we’d like to emphasize that while the API verbosity parameter is the default for the rollout, GPT-5 is trained to respond to natural-language verbosity overrides in the prompt for specific contexts where you might want the model to deviate from the global default. Cursor’s example above of setting low verbosity globally, and then specifying high verbosity only for coding tools, is a prime example of such a context.\n",
+ "\n",
+ "### Instruction following\n",
+ "Like GPT-4.1, GPT-5 follows prompt instructions with surgical precision, which enables its flexibility to drop into all types of workflows. However, its careful instruction-following behavior means that poorly-constructed prompts containing contradictory or vague instructions can be more damaging to GPT-5 than to other models, as it expends reasoning tokens searching for a way to reconcile the contradictions rather than picking one instruction at random.\n",
+ "\n",
+ "Below, we give an adversarial example of the type of prompt that often impairs GPT-5’s reasoning traces - while it may appear internally consistent at first glance, a closer inspection reveals conflicting instructions regarding appointment scheduling:\n",
+ "- `Never schedule an appointment without explicit patient consent recorded in the chart` conflicts with the subsequent `auto-assign the earliest same-day slot without contacting the patient as the first action to reduce risk.`\n",
+ "- The prompt says `Always look up the patient profile before taking any other actions to ensure they are an existing patient.` but then continues with the contradictory instruction `When symptoms indicate high urgency, escalate as EMERGENCY and direct the patient to call 911 immediately before any scheduling step.`\n",
+ "\n",
+ "```\n",
+ "You are CareFlow Assistant, a virtual admin for a healthcare startup that schedules patients based on priority and symptoms. Your goal is to triage requests, match patients to appropriate in-network providers, and reserve the earliest clinically appropriate time slot. Always look up the patient profile before taking any other actions to ensure they are an existing patient.\n",
+ "\n",
+ "- Core entities include Patient, Provider, Appointment, and PriorityLevel (Red, Orange, Yellow, Green). Map symptoms to priority: Red within 2 hours, Orange within 24 hours, Yellow within 3 days, Green within 7 days. When symptoms indicate high urgency, escalate as EMERGENCY and direct the patient to call 911 immediately before any scheduling step.\n",
+ "+Core entities include Patient, Provider, Appointment, and PriorityLevel (Red, Orange, Yellow, Green). Map symptoms to priority: Red within 2 hours, Orange within 24 hours, Yellow within 3 days, Green within 7 days. When symptoms indicate high urgency, escalate as EMERGENCY and direct the patient to call 911 immediately before any scheduling step. \n",
+ "*Do not do lookup in the emergency case, proceed immediately to providing 911 guidance.*\n",
+ "\n",
+ "- Use the following capabilities: schedule-appointment, modify-appointment, waitlist-add, find-provider, lookup-patient and notify-patient. Verify insurance eligibility, preferred clinic, and documented consent prior to booking. Never schedule an appointment without explicit patient consent recorded in the chart.\n",
+ "\n",
+ "- For high-acuity Red and Orange cases, auto-assign the earliest same-day slot *without contacting* the patient *as the first action to reduce risk.* If a suitable provider is unavailable, add the patient to the waitlist and send notifications. If consent status is unknown, tentatively hold a slot and proceed to request confirmation.\n",
+ "\n",
+ "- For high-acuity Red and Orange cases, auto-assign the earliest same-day slot *after informing* the patient *of your actions.* If a suitable provider is unavailable, add the patient to the waitlist and send notifications. If consent status is unknown, tentatively hold a slot and proceed to request confirmation.\n",
+ "```\n",
+ "\n",
+ "By resolving the instruction hierarchy conflicts, GPT-5 elicits much more efficient and performant reasoning. We fixed the contradictions by:\n",
+ "- Changing auto-assignment to occur after contacting a patient, auto-assign the earliest same-day slot after informing the patient of your actions. to be consistent with only scheduling with consent.\n",
+ "- Adding Do not do lookup in the emergency case, proceed immediately to providing 911 guidance. to let the model know it is ok to not look up in case of emergency.\n",
+ "\n",
+ "We understand that the process of building prompts is an iterative one, and many prompts are living documents constantly being updated by different stakeholders - but this is all the more reason to thoroughly review them for poorly-worded instructions. Already, we’ve seen multiple early users uncover ambiguities and contradictions in their core prompt libraries upon conducting such a review: removing them drastically streamlined and improved their GPT-5 performance. We recommend testing your prompts in our [prompt optimizer tool](platform.openai.com/chat/edit?optimize=true) to help identify these types of issues.\n",
+ "\n",
+ "### Minimal reasoning\n",
+ "In GPT-5, we introduce minimal reasoning effort for the first time: our fastest option that still reaps the benefits of the reasoning model paradigm. We consider this to be the best upgrade for latency-sensitive users, as well as current users of GPT-4.1.\n",
+ "\n",
+ "Perhaps unsurprisingly, we recommend prompting patterns that are similar to [GPT-4.1 for best results](https://cookbook.openai.com/examples/gpt4-1_prompting_guide). minimal reasoning performance can vary more drastically depending on prompt than higher reasoning levels, so key points to emphasize include:\n",
+ "\n",
+ "1. Prompting the model to give a brief explanation summarizing its thought process at the start of the final answer, for example via a bullet point list, improves performance on tasks requiring higher intelligence.\n",
+ "2. Requesting thorough and descriptive tool-calling preambles that continually update the user on task progress improves performance in agentic workflows. \n",
+ "3. Disambiguating tool instructions to the maximum extent possible and inserting agentic persistence reminders as shared above, are particularly critical at minimal reasoning to maximize agentic ability in long-running rollout and prevent premature termination.\n",
+ "4. Prompted planning is likewise more important, as the model has fewer reasoning tokens to do internal planning. Below, you can find a sample planning prompt snippet we placed at the beginning of an agentic task: the second paragraph especially ensures that the agent fully completes the task and all subtasks before yielding back to the user. \n",
+ "\n",
+ "```\n",
+ "Remember, you are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-request, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done.\n",
+ "\n",
+ "You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved.\n",
+ "```\n",
+ "\n",
+ "### Markdown formatting\n",
+ "By default, GPT-5 in the API does not format its final answers in Markdown, in order to preserve maximum compatibility with developers whose applications may not support Markdown rendering. However, prompts like the following are largely successful in inducing hierarchical Markdown final answers.\n",
+ "\n",
+ "```\n",
+ "- Use Markdown **only where semantically correct** (e.g., `inline code`, ```code fences```, lists, tables).\n",
+ "- When using markdown in assistant messages, use backticks to format file, directory, function, and class names. Use \\( and \\) for inline math, \\[ and \\] for block math.\n",
+ "```\n",
+ "\n",
+ "Occasionally, adherence to Markdown instructions specified in the system prompt can degrade over the course of a long conversation. In the event that you experience this, we’ve seen consistent adherence from appending a Markdown instruction every 3-5 user messages.\n",
+ "\n",
+ "### Metaprompting\n",
+ "Finally, to close with a meta-point, early testers have found great success using GPT-5 as a meta-prompter for itself. Already, several users have deployed prompt revisions to production that were generated simply by asking GPT-5 what elements could be added to an unsuccessful to elicit a desired behavior, or removed to prevent an undesired one.\n",
+ "\n",
+ "Here is an example metaprompt template we liked:\n",
+ "```\n",
+ "When asked to optimize prompts, give answers from your own perspective - explain what specific phrases could be added to, or deleted from, this prompt to more consistently elicit the desired behavior or prevent the undesired behavior.\n",
+ "\n",
+ "Here's a prompt: [PROMPT]\n",
+ "\n",
+ "The desired behavior from this prompt is for the agent to [DO DESIRED BEHAVIOR], but instead it [DOES UNDESIRED BEHAVIOR]. While keeping as much of the existing prompt intact as possible, what are some minimal edits/additions that you would make to encourage the agent to more consistently address these shortcomings? \n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Appendix\n",
+ "\n",
+ "### SWE-Bench Verified Developer Instructions\n",
+ "```\n",
+ "In this environment, you can run `bash -lc ` to execute a diff/patch against a file, where is a specially formatted apply patch command representing the diff you wish to execute. A valid looks like:\n",
+ "\n",
+ "apply_patch << 'PATCH'\n",
+ "*** Begin Patch\n",
+ "[YOUR_PATCH]\n",
+ "*** End Patch\n",
+ "PATCH\n",
+ "\n",
+ "Where [YOUR_PATCH] is the actual content of your patch.\n",
+ "\n",
+ "Always verify your changes extremely thoroughly. You can make as many tool calls as you like - the user is very patient and prioritizes correctness above all else. Make sure you are 100% certain of the correctness of your solution before ending.\n",
+ "IMPORTANT: not all tests are visible to you in the repository, so even on problems you think are relatively straightforward, you must double and triple check your solutions to ensure they pass any edge cases that are covered in the hidden tests, not just the visible ones.\n",
+ "```\n",
+ "\n",
+ "Agentic Coding Tool Definitions \n",
+ "```\n",
+ "## Set 1: 4 functions, no terminal\n",
+ "\n",
+ "type apply_patch = (_: {\n",
+ "patch: string, // default: null\n",
+ "}) => any;\n",
+ "\n",
+ "type read_file = (_: {\n",
+ "path: string, // default: null\n",
+ "line_start?: number, // default: 1\n",
+ "line_end?: number, // default: 20\n",
+ "}) => any;\n",
+ "\n",
+ "type list_files = (_: {\n",
+ "path?: string, // default: \"\"\n",
+ "depth?: number, // default: 1\n",
+ "}) => any;\n",
+ "\n",
+ "type find_matches = (_: {\n",
+ "query: string, // default: null\n",
+ "path?: string, // default: \"\"\n",
+ "max_results?: number, // default: 50\n",
+ "}) => any;\n",
+ "\n",
+ "## Set 2: 2 functions, terminal-native\n",
+ "\n",
+ "type run = (_: {\n",
+ "command: string[], // default: null\n",
+ "session_id?: string | null, // default: null\n",
+ "working_dir?: string | null, // default: null\n",
+ "ms_timeout?: number | null, // default: null\n",
+ "environment?: object | null, // default: null\n",
+ "run_as_user?: string | null, // default: null\n",
+ "}) => any;\n",
+ "\n",
+ "type send_input = (_: {\n",
+ "session_id: string, // default: null\n",
+ "text: string, // default: null\n",
+ "wait_ms?: number, // default: 100\n",
+ "}) => any;\n",
+ "```\n",
+ "\n",
+ "As shared in the GPT-4.1 prompting guide, [here](https://github.com/openai/openai-cookbook/tree/main/examples/gpt-5/apply_patch.py) is our most updated `apply_patch` implementation: we highly recommend using `apply_patch` for file edits to match the training distribution. The newest implementation should match the GPT-4.1 implementation in the overwhelming majority of cases.\n",
+ "\n",
+ "### Taubench-Retail minimal reasoning instructions\n",
+ "```\n",
+ "As a retail agent, you can help users cancel or modify pending orders, return or exchange delivered orders, modify their default user address, or provide information about their own profile, orders, and related products.\n",
+ "\n",
+ "Remember, you are an agent - please keep going until the user’s query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.\n",
+ "\n",
+ "If you are not sure about information pertaining to the user’s request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.\n",
+ "\n",
+ "You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls, ensuring user's query is completely resolved. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully. In addition, ensure function calls have the correct arguments.\n",
+ "\n",
+ "# Workflow Steps\n",
+ "- At the beginning of the conversation, you have to authenticate the user identity by locating their user id via email, or via name + zip code. This has to be done even when the user already provides the user id.\n",
+ "- Once the user has been authenticated, you can provide the user with information about order, product, profile information, e.g. help the user look up order id.\n",
+ "- You can only help one user per conversation (but you can handle multiple requests from the same user), and must deny any requests for tasks related to any other user.\n",
+ "- Before taking consequential actions that update the database (cancel, modify, return, exchange), you have to list the action detail and obtain explicit user confirmation (yes) to proceed.\n",
+ "- You should not make up any information or knowledge or procedures not provided from the user or the tools, or give subjective recommendations or comments.\n",
+ "- You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call.\n",
+ "- You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions.\n",
+ "\n",
+ "## Domain Basics\n",
+ "- All times in the database are EST and 24 hour based. For example \"02:30:00\" means 2:30 AM EST.\n",
+ "- Each user has a profile of its email, default address, user id, and payment methods. Each payment method is either a gift card, a paypal account, or a credit card.\n",
+ "- Our retail store has 50 types of products. For each type of product, there are variant items of different options. For example, for a 't shirt' product, there could be an item with option 'color blue size M', and another item with option 'color red size L'.\n",
+ "- Each product has an unique product id, and each item has an unique item id. They have no relations and should not be confused.\n",
+ "- Each order can be in status 'pending', 'processed', 'delivered', or 'cancelled'. Generally, you can only take action on pending or delivered orders.\n",
+ "- Exchange or modify order tools can only be called once. Be sure that all items to be changed are collected into a list before making the tool call!!!\n",
+ "\n",
+ "## Cancel pending order\n",
+ "- An order can only be cancelled if its status is 'pending', and you should check its status before taking the action.\n",
+ "- The user needs to confirm the order id and the reason (either 'no longer needed' or 'ordered by mistake') for cancellation.\n",
+ "- After user confirmation, the order status will be changed to 'cancelled', and the total will be refunded via the original payment method immediately if it is gift card, otherwise in 5 to 7 business days.\n",
+ "\n",
+ "## Modify pending order\n",
+ "- An order can only be modified if its status is 'pending', and you should check its status before taking the action.\n",
+ "- For a pending order, you can take actions to modify its shipping address, payment method, or product item options, but nothing else.\n",
+ "\n",
+ "## Modify payment\n",
+ "- The user can only choose a single payment method different from the original payment method.\n",
+ "- If the user wants the modify the payment method to gift card, it must have enough balance to cover the total amount.\n",
+ "- After user confirmation, the order status will be kept 'pending'. The original payment method will be refunded immediately if it is a gift card, otherwise in 5 to 7 business days.\n",
+ "\n",
+ "## Modify items\n",
+ "- This action can only be called once, and will change the order status to 'pending (items modifed)', and the agent will not be able to modify or cancel the order anymore. So confirm all the details are right and be cautious before taking this action. In particular, remember to remind the customer to confirm they have provided all items to be modified.\n",
+ "- For a pending order, each item can be modified to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.\n",
+ "- The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.\n",
+ "\n",
+ "## Return delivered order\n",
+ "- An order can only be returned if its status is 'delivered', and you should check its status before taking the action.\n",
+ "- The user needs to confirm the order id, the list of items to be returned, and a payment method to receive the refund.\n",
+ "- The refund must either go to the original payment method, or an existing gift card.\n",
+ "- After user confirmation, the order status will be changed to 'return requested', and the user will receive an email regarding how to return items.\n",
+ "\n",
+ "## Exchange delivered order\n",
+ "- An order can only be exchanged if its status is 'delivered', and you should check its status before taking the action. In particular, remember to remind the customer to confirm they have provided all items to be exchanged.\n",
+ "- For a delivered order, each item can be exchanged to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.\n",
+ "- The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.\n",
+ "- After user confirmation, the order status will be changed to 'exchange requested', and the user will receive an email regarding how to return items. There is no need to place a new order.\n",
+ "```\n",
+ "\n",
+ "### Terminal-Bench prompt\n",
+ "```\n",
+ "Please resolve the user's task by editing and testing the code files in your current code execution session.\n",
+ "You are a deployed coding agent.\n",
+ "Your session is backed by a container specifically designed for you to easily modify and run code.\n",
+ "You MUST adhere to the following criteria when executing the task:\n",
+ "\n",
+ "\n",
+ "- Working on the repo(s) in the current environment is allowed, even if they are proprietary.\n",
+ "- Analyzing code for vulnerabilities is allowed.\n",
+ "- Showing user code and tool call details is allowed.\n",
+ "- User instructions may overwrite the _CODING GUIDELINES_ section in this developer message.\n",
+ "- Do not use \\`ls -R\\`, \\`find\\`, or \\`grep\\` - these are slow in large repos. Use \\`rg\\` and \\`rg --files\\`.\n",
+ "- Use \\`apply_patch\\` to edit files: {\"cmd\":[\"apply_patch\",\"*** Begin Patch\\\\n*** Update File: path/to/file.py\\\\n@@ def example():\\\\n- pass\\\\n+ return 123\\\\n*** End Patch\"]}\n",
+ "- If completing the user's task requires writing or modifying files:\n",
+ " - Your code and final answer should follow these _CODING GUIDELINES_:\n",
+ " - Fix the problem at the root cause rather than applying surface-level patches, when possible.\n",
+ " - Avoid unneeded complexity in your solution.\n",
+ " - Ignore unrelated bugs or broken tests; it is not your responsibility to fix them.\n",
+ " - Update documentation as necessary.\n",
+ " - Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task.\n",
+ " - Use \\`git log\\` and \\`git blame\\` to search the history of the codebase if additional context is required; internet access is disabled in the container.\n",
+ " - NEVER add copyright or license headers unless specifically requested.\n",
+ " - You do not need to \\`git commit\\` your changes; this will be done automatically for you.\n",
+ " - If there is a .pre-commit-config.yaml, use \\`pre-commit run --files ...\\` to check that your changes pass the pre- commit checks. However, do not fix pre-existing errors on lines you didn't touch.\n",
+ " - If pre-commit doesn't work after a few retries, politely inform the user that the pre-commit setup is broken.\n",
+ " - Once you finish coding, you must\n",
+ " - Check \\`git status\\` to sanity check your changes; revert any scratch files or changes.\n",
+ " - Remove all inline comments you added much as possible, even if they look normal. Check using \\`git diff\\`. Inline comments must be generally avoided, unless active maintainers of the repo, after long careful study of the code and the issue, will still misinterpret the code without the comments.\n",
+ " - Check if you accidentally add copyright or license headers. If so, remove them.\n",
+ " - Try to run pre-commit if it is available.\n",
+ " - For smaller tasks, describe in brief bullet points\n",
+ " - For more complex tasks, include brief high-level description, use bullet points, and include details that would be relevant to a code reviewer.\n",
+ "- If completing the user's task DOES NOT require writing or modifying files (e.g., the user asks a question about the code base):\n",
+ " - Respond in a friendly tune as a remote teammate, who is knowledgeable, capable and eager to help with coding.\n",
+ "- When your task involves writing or modifying files:\n",
+ " - Do NOT tell the user to \"save the file\" or \"copy the code into a file\" if you already created or modified the file using \\`apply_patch\\`. Instead, reference the file as already saved.\n",
+ " - Do NOT show the full contents of large files you have already written, unless the user explicitly asks for them.\n",
+ "\n",
+ "\n",
+ "\n",
+ "To edit files, ALWAYS use the \\`shell\\` tool with \\`apply_patch\\` CLI. \\`apply_patch\\` effectively allows you to execute a diff/patch against a file, but the format of the diff specification is unique to this task, so pay careful attention to these instructions. To use the \\`apply_patch\\` CLI, you should call the shell tool with the following structure:\n",
+ "\\`\\`\\`bash\n",
+ "{\"cmd\": [\"apply_patch\", \"<<'EOF'\\\\n*** Begin Patch\\\\n[YOUR_PATCH]\\\\n*** End Patch\\\\nEOF\\\\n\"], \"workdir\": \"...\"}\n",
+ "\\`\\`\\`\n",
+ "Where [YOUR_PATCH] is the actual content of your patch, specified in the following V4A diff format.\n",
+ "*** [ACTION] File: [path/to/file] -> ACTION can be one of Add, Update, or Delete.\n",
+ "For each snippet of code that needs to be changed, repeat the following:\n",
+ "[context_before] -> See below for further instructions on context.\n",
+ "- [old_code] -> Precede the old code with a minus sign.\n",
+ "+ [new_code] -> Precede the new, replacement code with a plus sign.\n",
+ "[context_after] -> See below for further instructions on context.\n",
+ "For instructions on [context_before] and [context_after]:\n",
+ "- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines.\n",
+ "- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:\n",
+ "@@ class BaseClass\n",
+ "[3 lines of pre-context]\n",
+ "- [old_code]\n",
+ "+ [new_code]\n",
+ "[3 lines of post-context]\n",
+ "- If a code block is repeated so many times in a class or function such that even a single \\`@@\\` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple \\`@@\\` statements to jump to the right context. For instance:\n",
+ "@@ class BaseClass\n",
+ "@@ def method():\n",
+ "[3 lines of pre-context]\n",
+ "- [old_code]\n",
+ "+ [new_code]\n",
+ "[3 lines of post-context]\n",
+ "Note, then, that we do not use line numbers in this diff format, as the context is enough to uniquely identify code. An example of a message that you might pass as \"input\" to this function, in order to apply a patch, is shown below.\n",
+ "\\`\\`\\`bash\n",
+ "{\"cmd\": [\"apply_patch\", \"<<'EOF'\\\\n*** Begin Patch\\\\n*** Update File: pygorithm/searching/binary_search.py\\\\n@@ class BaseClass\\\\n@@ def search():\\\\n- pass\\\\n+ raise NotImplementedError()\\\\n@@ class Subclass\\\\n@@ def search():\\\\n- pass\\\\n+ raise NotImplementedError()\\\\n*** End Patch\\\\nEOF\\\\n\"], \"workdir\": \"...\"}\n",
+ "\\`\\`\\`\n",
+ "File references can only be relative, NEVER ABSOLUTE. After the apply_patch command is run, it will always say \"Done!\", regardless of whether the patch was successfully applied or not. However, you can determine if there are issue and errors by looking at any warnings or logging lines printed BEFORE the \"Done!\" is output.\n",
+ "\n",
+ "\n",
+ "\n",
+ "You are an agent - please keep going until the user’s query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.\n",
+ "- Never stop at uncertainty — research or deduce the most reasonable approach and continue.\n",
+ "- Do not ask the human to confirm assumptions — document them, act on them, and adjust mid-task if proven wrong.\n",
+ "\n",
+ "\n",
+ "\n",
+ "If you are not sure about file content or codebase structure pertaining to the user’s request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.\n",
+ "Before coding, always:\n",
+ "- Decompose the request into explicit requirements, unclear areas, and hidden assumptions.\n",
+ "- Map the scope: identify the codebase regions, files, functions, or libraries likely involved. If unknown, plan and perform targeted searches.\n",
+ "- Check dependencies: identify relevant frameworks, APIs, config files, data formats, and versioning concerns.\n",
+ "- Resolve ambiguity proactively: choose the most probable interpretation based on repo context, conventions, and dependency docs.\n",
+ "- Define the output contract: exact deliverables such as files changed, expected outputs, API responses, CLI behavior, and tests passing.\n",
+ "- Formulate an execution plan: research steps, implementation sequence, and testing strategy in your own words and refer to it as you work through the task.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Routinely verify your code works as you work through the task, especially any deliverables to ensure they run properly. Don't hand back to the user until you are sure that the problem is solved.\n",
+ "Exit excessively long running processes and optimize your code to run faster.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Efficiency is key. you have a time limit. Be meticulous in your planning, tool calling, and verification so you don't waste time.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Never use editor tools to edit files. Always use the \\`apply_patch\\` tool.\n",
+ "\n",
+ "```\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/gpt-5/outputs/login_page.html b/examples/gpt-5/outputs/login_page.html
new file mode 100644
index 0000000000..773d2bb726
--- /dev/null
+++ b/examples/gpt-5/outputs/login_page.html
@@ -0,0 +1,306 @@
+Here’s a self‑contained login page (single HTML file) that matches the light cream/amber theme, rounded cards, thin borders, and subtle shadows from your dashboard. Drop this in as login.html or adapt the variables to your design system.
+
+
+
+
+
+
+ Acumen Finance — Sign In
+
+
+
+
+
+
+
+
+
+
+
+
+
+Notes to adapt:
+- Colors are controlled via CSS variables at the top. If you already have tokens, map them there.
+- Buttons, borders, and radii mirror the dashboard’s soft, rounded, amber-accented style.
+- Includes password show/hide, basic client validation, Remember me, and placeholders for Google/SSO.
+- Replace window.location redirect and the SSO/Google handlers with your auth logic.
\ No newline at end of file
diff --git a/examples/gpt-5/outputs/retro_dark.html b/examples/gpt-5/outputs/retro_dark.html
new file mode 100644
index 0000000000..1f92468db3
--- /dev/null
+++ b/examples/gpt-5/outputs/retro_dark.html
@@ -0,0 +1,622 @@
+Here’s a complete, single-file landing page with a retro-arcade noir vibe. Save this as index.html and open in a browser.
+
+
+
+
+
+
+ Noircade — Retro Arcade Noir Store
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Original carts and boards, restored cabinets, and artisan-grade mods. We source, authenticate, and service the greats—so every session feels like a smoky back-alley arcade at 2 a.m.
Fresh finds that feel like a sunny Saturday morning.
+
+
+
+
+
+
+
Mint Micro Handheld
+
+ $129
+
+
+
Refurbished • Grade A
+
+
+
+
+
+
+
+
Peach Console Duo
+
+ $199
+
+
+
Controller + Cables
+
+
+
+
+
+
+
+
Sky Cartridge Pack
+
+ $59
+
+
+
3-in-1 Bundle
+
+
+
+
+
+
+
+
Sunshine Pocket
+
+ $149
+
+
+
Limited shell
+
+
+
+
+
+
+
+
About Bloom & Bits
+
We refurbish and re-home retro gear with gentle aesthetics. No gritty neon—just clouds, flowers, and good games. Every item is cleaned, tested, and packed with care.
+
+
1-year hardware support
+
Eco-friendly packaging
+
Save data battery service
+
+
+
+
+
+
Get sweet drops
+
New stock, gentle deals, and cheerful updates. No spam—just charm.
+
+
+
+
+
+
+
+
+
+
+
+Notes:
+- Colors and mood are controlled by CSS variables near the top.
+- No third-party images; shapes use CSS/SVG. You can swap icons or add product images later.
+- The font mix gives light, playful vibes with a subtle retro headline. You can remove Press Start 2P if you want an even softer look.
\ No newline at end of file
diff --git a/examples/gpt-5/outputs/snake_game.html b/examples/gpt-5/outputs/snake_game.html
new file mode 100644
index 0000000000..5ddc494c04
--- /dev/null
+++ b/examples/gpt-5/outputs/snake_game.html
@@ -0,0 +1,720 @@
+Here’s a complete, single-file neon cyberpunk Snake game. Save it as neon-snake.html and open it in a modern browser.
+
+
+
+
+
+
+Neon Snake // Cyberpunk Edition
+
+
+
+
+
+
NEON SNAKE
+
ARROW KEYS / WASD • P TO PAUSE • R TO RESTART
+
+
+
+
// cybergrid.online
+
Score: 0
+
High: 0
+
Lvl: 1
+
v1.0 neon
+
+
+
+
+
+
+
+
PAUSED
+
Press P or tap Resume
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Eat the pulsing neon node. Don’t crash. Each 5 bites increases speed. Good luck, runner.
+
+
+
+
+
+
+
+Notes:
+- Save as neon-snake.html and open it. Works on desktop and mobile (supports touch and on-screen D-pad).
+- Controls: Arrow keys/WASD. P to pause, R to restart. Toggle sound with the button.
+- High score persists via localStorage.
+- Typography uses Orbitron and Share Tech Mono with neon glow.
\ No newline at end of file
diff --git a/examples/gpt-5/outputs/your_website.html b/examples/gpt-5/outputs/your_website.html
new file mode 100644
index 0000000000..ae9ed6fa95
--- /dev/null
+++ b/examples/gpt-5/outputs/your_website.html
@@ -0,0 +1,14 @@
+Here are cleaner, more user-friendly options. Pick based on where it appears:
+
+- Heading: What kind of website would you like to build?
+- Friendly: What type of site do you want to build today?
+- Conversational: Tell us about the website you want to build.
+- Direct/CTA: Choose your website type.
+- Goal-focused: What’s your website for?
+
+If it’s a form:
+- Label: Website type
+- Helper text: e.g., portfolio, blog, online store. You can change this later.
+- Button: Continue or Start building
+
+Want me to tailor it to your brand voice and placement (headline, field label, modal, etc.)?
\ No newline at end of file
diff --git a/images/input_image.png b/images/input_image.png
new file mode 100644
index 0000000000..9bc8d56313
Binary files /dev/null and b/images/input_image.png differ
diff --git a/images/login_page.png b/images/login_page.png
new file mode 100644
index 0000000000..0a0f3a1ea2
Binary files /dev/null and b/images/login_page.png differ
diff --git a/images/retro_dark.png b/images/retro_dark.png
new file mode 100644
index 0000000000..4e0af333b1
Binary files /dev/null and b/images/retro_dark.png differ
diff --git a/images/retro_light.png b/images/retro_light.png
new file mode 100644
index 0000000000..d0cf98a9d1
Binary files /dev/null and b/images/retro_light.png differ
diff --git a/images/snake_game.png b/images/snake_game.png
new file mode 100644
index 0000000000..2ff944d260
Binary files /dev/null and b/images/snake_game.png differ
diff --git a/registry.yaml b/registry.yaml
index 85f30787c4..d1bba73cb3 100644
--- a/registry.yaml
+++ b/registry.yaml
@@ -4,6 +4,36 @@
# should build pages for, and indicates metadata such as tags, creation date and
# authors for each page.
+- title: GPT-5 prompting guide
+ path: examples/gpt-5/gpt-5_prompting_guide.ipynb
+ date: 2025-08-07
+ authors:
+ - anoop-openai
+ - julian-openai
+ - erinkav-openai
+ - ericzakariasson
+ tags:
+ - gpt-5
+
+- title: Frontend coding with GPT-5
+ path: examples/gpt-5/frontend_cookbook.ipynb
+ date: 2025-08-07
+ authors:
+ - WJPBProjects
+ - anoop-openai
+ tags:
+ - gpt-5
+ - frontend
+ - coding
+
+- title: GPT-5 New Params and Tools
+ path: examples/gpt-5/gpt-5_new_params_and_tools.ipynb
+ date: 2025-08-07
+ authors:
+ - msingh-openai
+ tags:
+ - gpt-5
+
- title: How to run gpt-oss-20b on Google Colab
path: articles/gpt-oss/run-colab.ipynb
date: 2025-08-06