Skip to content

Commit b3c3f9c

Browse files
committed
Merge branch 'dev' into kpczerwinski/secrt-1731-builder-search-history
2 parents 06d20e7 + 1851264 commit b3c3f9c

File tree

58 files changed

+3102
-544
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3102
-544
lines changed

.github/workflows/claude-dependabot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
- name: Set up Node.js
8181
uses: actions/setup-node@v4
8282
with:
83-
node-version: "21"
83+
node-version: "22"
8484

8585
- name: Enable corepack
8686
run: corepack enable

.github/workflows/claude.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
- name: Set up Node.js
9191
uses: actions/setup-node@v4
9292
with:
93-
node-version: "21"
93+
node-version: "22"
9494

9595
- name: Enable corepack
9696
run: corepack enable

.github/workflows/copilot-setup-steps.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ jobs:
7878
- name: Set up Node.js
7979
uses: actions/setup-node@v4
8080
with:
81-
node-version: "21"
81+
node-version: "22"
8282

8383
- name: Enable corepack
8484
run: corepack enable
@@ -299,4 +299,4 @@ jobs:
299299
echo "✅ AutoGPT Platform development environment setup complete!"
300300
echo "🚀 Ready for development with Docker services running"
301301
echo "📝 Backend server: poetry run serve (port 8000)"
302-
echo "🌐 Frontend server: pnpm dev (port 3000)"
302+
echo "🌐 Frontend server: pnpm dev (port 3000)"

autogpt_platform/backend/backend/blocks/ai_image_customizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
class GeminiImageModel(str, Enum):
2626
NANO_BANANA = "google/nano-banana"
27+
NANO_BANANA_PRO = "google/nano-banana-pro"
2728

2829

2930
class OutputFormat(str, Enum):

autogpt_platform/backend/backend/blocks/ai_image_generator_block.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ class ImageSize(str, Enum):
6060
ImageSize.TALL: "1024x1536",
6161
}
6262

63+
SIZE_TO_NANO_BANANA_RATIO = {
64+
ImageSize.SQUARE: "1:1",
65+
ImageSize.LANDSCAPE: "4:3",
66+
ImageSize.PORTRAIT: "3:4",
67+
ImageSize.WIDE: "16:9",
68+
ImageSize.TALL: "9:16",
69+
}
70+
6371

6472
class ImageStyle(str, Enum):
6573
"""
@@ -98,6 +106,7 @@ class ImageGenModel(str, Enum):
98106
FLUX_ULTRA = "Flux 1.1 Pro Ultra"
99107
RECRAFT = "Recraft v3"
100108
SD3_5 = "Stable Diffusion 3.5 Medium"
109+
NANO_BANANA_PRO = "Nano Banana Pro"
101110

102111

103112
class AIImageGeneratorBlock(Block):
@@ -261,6 +270,20 @@ async def generate_image(self, input_data: Input, credentials: APIKeyCredentials
261270
)
262271
return output
263272

273+
elif input_data.model == ImageGenModel.NANO_BANANA_PRO:
274+
# Use Nano Banana Pro (Google Gemini 3 Pro Image)
275+
input_params = {
276+
"prompt": modified_prompt,
277+
"aspect_ratio": SIZE_TO_NANO_BANANA_RATIO[input_data.size],
278+
"resolution": "2K", # Default to 2K for good quality/cost balance
279+
"output_format": "jpg",
280+
"safety_filter_level": "block_only_high", # Most permissive
281+
}
282+
output = await self._run_client(
283+
credentials, "google/nano-banana-pro", input_params
284+
)
285+
return output
286+
264287
except Exception as e:
265288
raise RuntimeError(f"Failed to generate image: {str(e)}")
266289

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
from dataclasses import dataclass
2+
from enum import Enum
3+
from typing import Any, Literal
4+
5+
from openai import AsyncOpenAI
6+
from openai.types.responses import Response as OpenAIResponse
7+
from pydantic import SecretStr
8+
9+
from backend.data.block import (
10+
Block,
11+
BlockCategory,
12+
BlockOutput,
13+
BlockSchemaInput,
14+
BlockSchemaOutput,
15+
)
16+
from backend.data.model import (
17+
APIKeyCredentials,
18+
CredentialsField,
19+
CredentialsMetaInput,
20+
NodeExecutionStats,
21+
SchemaField,
22+
)
23+
from backend.integrations.providers import ProviderName
24+
25+
26+
@dataclass
27+
class CodexCallResult:
28+
"""Structured response returned by Codex invocations."""
29+
30+
response: str
31+
reasoning: str
32+
response_id: str
33+
34+
35+
class CodexModel(str, Enum):
36+
"""Codex-capable OpenAI models."""
37+
38+
GPT5_1_CODEX = "gpt-5.1-codex"
39+
40+
41+
class CodexReasoningEffort(str, Enum):
42+
"""Configuration for the Responses API reasoning effort."""
43+
44+
NONE = "none"
45+
LOW = "low"
46+
MEDIUM = "medium"
47+
HIGH = "high"
48+
49+
50+
CodexCredentials = CredentialsMetaInput[
51+
Literal[ProviderName.OPENAI], Literal["api_key"]
52+
]
53+
54+
TEST_CREDENTIALS = APIKeyCredentials(
55+
id="e2fcb203-3f2d-4ad4-a344-8df3bc7db36b",
56+
provider="openai",
57+
api_key=SecretStr("mock-openai-api-key"),
58+
title="Mock OpenAI API key",
59+
expires_at=None,
60+
)
61+
TEST_CREDENTIALS_INPUT = {
62+
"provider": TEST_CREDENTIALS.provider,
63+
"id": TEST_CREDENTIALS.id,
64+
"type": TEST_CREDENTIALS.type,
65+
"title": TEST_CREDENTIALS.title,
66+
}
67+
68+
69+
def CodexCredentialsField() -> CodexCredentials:
70+
return CredentialsField(
71+
description="OpenAI API key with access to Codex models (Responses API).",
72+
)
73+
74+
75+
class CodeGenerationBlock(Block):
76+
"""Block that talks to Codex models via the OpenAI Responses API."""
77+
78+
class Input(BlockSchemaInput):
79+
prompt: str = SchemaField(
80+
description="Primary coding request passed to the Codex model.",
81+
placeholder="Generate a Python function that reverses a list.",
82+
)
83+
system_prompt: str = SchemaField(
84+
title="System Prompt",
85+
default=(
86+
"You are Codex, an elite software engineer. "
87+
"Favor concise, working code and highlight important caveats."
88+
),
89+
description="Optional instructions injected via the Responses API instructions field.",
90+
advanced=True,
91+
)
92+
model: CodexModel = SchemaField(
93+
title="Codex Model",
94+
default=CodexModel.GPT5_1_CODEX,
95+
description="Codex-optimized model served via the Responses API.",
96+
advanced=False,
97+
)
98+
reasoning_effort: CodexReasoningEffort = SchemaField(
99+
title="Reasoning Effort",
100+
default=CodexReasoningEffort.MEDIUM,
101+
description="Controls the Responses API reasoning budget. Select 'none' to skip reasoning configs.",
102+
advanced=True,
103+
)
104+
max_output_tokens: int | None = SchemaField(
105+
title="Max Output Tokens",
106+
default=2048,
107+
description="Upper bound for generated tokens (hard limit 128,000). Leave blank to let OpenAI decide.",
108+
advanced=True,
109+
)
110+
credentials: CodexCredentials = CodexCredentialsField()
111+
112+
class Output(BlockSchemaOutput):
113+
response: str = SchemaField(
114+
description="Code-focused response returned by the Codex model."
115+
)
116+
reasoning: str = SchemaField(
117+
description="Reasoning summary returned by the model, if available.",
118+
default="",
119+
)
120+
response_id: str = SchemaField(
121+
description="ID of the Responses API call for auditing/debugging.",
122+
default="",
123+
)
124+
125+
def __init__(self):
126+
super().__init__(
127+
id="86a2a099-30df-47b4-b7e4-34ae5f83e0d5",
128+
description="Generate or refactor code using OpenAI's Codex (Responses API).",
129+
categories={BlockCategory.AI, BlockCategory.DEVELOPER_TOOLS},
130+
input_schema=CodeGenerationBlock.Input,
131+
output_schema=CodeGenerationBlock.Output,
132+
test_input=[
133+
{
134+
"prompt": "Write a TypeScript function that deduplicates an array.",
135+
"credentials": TEST_CREDENTIALS_INPUT,
136+
}
137+
],
138+
test_output=[
139+
("response", str),
140+
("reasoning", str),
141+
("response_id", str),
142+
],
143+
test_mock={
144+
"call_codex": lambda *_args, **_kwargs: CodexCallResult(
145+
response="function dedupe<T>(items: T[]): T[] { return [...new Set(items)]; }",
146+
reasoning="Used Set to remove duplicates in O(n).",
147+
response_id="resp_test",
148+
)
149+
},
150+
test_credentials=TEST_CREDENTIALS,
151+
)
152+
self.execution_stats = NodeExecutionStats()
153+
154+
async def call_codex(
155+
self,
156+
*,
157+
credentials: APIKeyCredentials,
158+
model: CodexModel,
159+
prompt: str,
160+
system_prompt: str,
161+
max_output_tokens: int | None,
162+
reasoning_effort: CodexReasoningEffort,
163+
) -> CodexCallResult:
164+
"""Invoke the OpenAI Responses API."""
165+
client = AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
166+
167+
request_payload: dict[str, Any] = {
168+
"model": model.value,
169+
"input": prompt,
170+
}
171+
if system_prompt:
172+
request_payload["instructions"] = system_prompt
173+
if max_output_tokens is not None:
174+
request_payload["max_output_tokens"] = max_output_tokens
175+
if reasoning_effort != CodexReasoningEffort.NONE:
176+
request_payload["reasoning"] = {"effort": reasoning_effort.value}
177+
178+
response = await client.responses.create(**request_payload)
179+
if not isinstance(response, OpenAIResponse):
180+
raise TypeError(f"Expected OpenAIResponse, got {type(response).__name__}")
181+
182+
# Extract data directly from typed response
183+
text_output = response.output_text or ""
184+
reasoning_summary = (
185+
str(response.reasoning.summary)
186+
if response.reasoning and response.reasoning.summary
187+
else ""
188+
)
189+
response_id = response.id or ""
190+
191+
# Update usage stats
192+
self.execution_stats.input_token_count = (
193+
response.usage.input_tokens if response.usage else 0
194+
)
195+
self.execution_stats.output_token_count = (
196+
response.usage.output_tokens if response.usage else 0
197+
)
198+
self.execution_stats.llm_call_count += 1
199+
200+
return CodexCallResult(
201+
response=text_output,
202+
reasoning=reasoning_summary,
203+
response_id=response_id,
204+
)
205+
206+
async def run(
207+
self,
208+
input_data: Input,
209+
*,
210+
credentials: APIKeyCredentials,
211+
**_kwargs,
212+
) -> BlockOutput:
213+
result = await self.call_codex(
214+
credentials=credentials,
215+
model=input_data.model,
216+
prompt=input_data.prompt,
217+
system_prompt=input_data.system_prompt,
218+
max_output_tokens=input_data.max_output_tokens,
219+
reasoning_effort=input_data.reasoning_effort,
220+
)
221+
222+
yield "response", result.response
223+
yield "reasoning", result.reasoning
224+
yield "response_id", result.response_id

0 commit comments

Comments
 (0)