diff --git a/assess.py b/assess.py index ef8e838..becf817 100644 --- a/assess.py +++ b/assess.py @@ -157,8 +157,6 @@ def slugify(value): "OpenAI O4 Mini (High Reasoning)": "https://openai.com/favicon.ico", "OpenAI O3 (High Reasoning)": "https://openai.com/favicon.ico", "OpenAI O3 (Medium Reasoning)": "https://openai.com/favicon.ico", - "ChatGPT-4o (Medium Reasoning)": "https://openai.com/favicon.ico", - "ChatGPT-4o (High Reasoning)": "https://openai.com/favicon.ico", "OpenAI O1 Pro": "https://openai.com/favicon.ico", "GPT-4.1 Mini": "https://openai.com/favicon.ico", "GPT-4.1 Nano": "https://openai.com/favicon.ico", @@ -261,6 +259,8 @@ def run_model_with_prompt(model_name, model, assessment): with open("./model_results.json", "r") as file: final_results = orjson.loads(file.read()) + # Legacy normalization removed per request; load raw results as-is. + model_providers = { "OpenAI O4 Mini": "", "GPT-4.1": "", @@ -320,8 +320,8 @@ def run_model_with_prompt(model_name, model, assessment): "OpenAI O4 Mini (Medium Reasoning)": OpenAIModel(model_id="o4-mini"), "OpenAI O3 (Medium Reasoning)": OpenAIModel(model_id="o3"), "GPT-4.1": OpenAIModel(model_id="gpt-4.1"), - "ChatGPT-4o (Medium Reasoning)": OpenAIModel(model_id="chatgpt-4o-latest"), - "ChatGPT-4o (High Reasoning)": OpenAIModel(model_id="chatgpt-4o-latest", reasoning_effort="high"), + # 4o is not a reasoning model; expose a single canonical name without reasoning level. + "ChatGPT-4o": OpenAIModel(model_id="chatgpt-4o-latest"), "GPT-4.1 Mini": OpenAIModel(model_id="gpt-4.1-mini"), "GPT-4.1 Nano": OpenAIModel(model_id="gpt-4.1-nano"), "OpenAI O1": OpenAIModel(model_id="o1"), @@ -1041,4 +1041,4 @@ def on_created(self, event): except KeyboardInterrupt: observer.stop() print("Stopping observer...") - observer.join() \ No newline at end of file + observer.join() diff --git a/models/openai.py b/models/openai.py index 41b22ee..c611545 100644 --- a/models/openai.py +++ b/models/openai.py @@ -5,7 +5,33 @@ from .model import Model OPENAI_TEMPERATURE = 0.1 -SKIP_TEMPERATURE = ["gpt-5-2025-08-07", "o4-mini", "chatgpt-4o-latest", "o3", "o1", "o3-pro", "o4-mini-high", "gpt-5-mini", "gpt-5-nano", "gpt-5-chat", "gpt-5"] +SKIP_TEMPERATURE = [ + "gpt-5-2025-08-07", + "o4-mini", + "chatgpt-4o-latest", + "o3", + "o1", + "o3-pro", + "o4-mini-high", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-chat", + "gpt-5", +] + +# Models that support OpenAI "reasoning_effort" parameter. +# Includes O-series and GPT-5 family. +ALLOWED_REASONING_MODELS = { + "o3", + "o4-mini", + "o1", + "o3-pro", + "gpt-5-2025-08-07", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-chat", + "gpt-5", +} class OpenAIModel(Model): @@ -25,33 +51,32 @@ def run( image_dtype = "image/" + image_name.split(".")[-1].replace("jpg", "jpeg") if structured_output_format: try: - return ( - self.client.beta.chat.completions.parse( - model=self.model_id, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:{image_dtype};base64,{base64.b64encode(image).decode()}" - }, + kwargs = { + "model": self.model_id, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:{image_dtype};base64,{base64.b64encode(image).decode()}" }, - ], - }, - ], - temperature=( - OPENAI_TEMPERATURE - if self.model_id not in SKIP_TEMPERATURE - else 1 - ), - reasoning_effort=reasoning_effort, - response_format=structured_output_format, - ) - .choices[0] - .message.parsed + }, + ], + } + ], + "temperature": ( + OPENAI_TEMPERATURE if self.model_id not in SKIP_TEMPERATURE else 1 + ), + "response_format": structured_output_format, + } + if self.model_id in ALLOWED_REASONING_MODELS: + kwargs["reasoning_effort"] = reasoning_effort + + return ( + self.client.beta.chat.completions.parse(**kwargs).choices[0].message.parsed or {} ) except BadRequestError as e: @@ -59,25 +84,30 @@ def run( pass # if reasoning_effort != "medium": - completion = self.client.chat.completions.create( - model=self.model_id, - messages=[ + kwargs = { + "model": self.model_id, + "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", - "image_url": {"url": f"data:{image_dtype};base64,{base64.b64encode(image).decode()}"} + "image_url": { + "url": f"data:{image_dtype};base64,{base64.b64encode(image).decode()}" + }, }, ], - }, + } ], - reasoning_effort=reasoning_effort, - temperature=( + "temperature": ( OPENAI_TEMPERATURE if self.model_id not in SKIP_TEMPERATURE else 1 ), - ) + } + if self.model_id in ALLOWED_REASONING_MODELS: + kwargs["reasoning_effort"] = reasoning_effort + + completion = self.client.chat.completions.create(**kwargs) return completion.choices[0].message.content # else: