Skip to content

Commit 78a2178

Browse files
committed
test: refactor pod/job into abstractions
Signed-off-by: vsoch <[email protected]>
1 parent 7987320 commit 78a2178

File tree

16 files changed

+808
-501
lines changed

16 files changed

+808
-501
lines changed

examples/agent/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,17 @@ We haven't hit the case yet where the manager needs to take over - that needs fu
7272
#### To do items
7373

7474
- Figure out optimization agent (with some goal)
75+
- Right now when we restart, we do with fresh slate (no log memory) - should there be?
76+
- We likely want some want to quantify the amount of change between prompts, and the difficulty of the task.
77+
- I think likely when we return to the manager, we want the last response (that might say why it is returning) should inform step selection. But not just step selection, the updated prompt to the step missing something.
78+
- Right now we rely on random sampling of the space to avoid whatever the issue might be.
7579

7680
#### Research Questions
7781

7882
**And experiment ideas**
7983

84+
- Why does it make the same mistakes? E.g., always forgetting ca-certificates. Did it learn from data that was OK to do and thus errors result from inconsistencies between the way things used to work and the way they do now?
85+
- Insight: if I don't know how to run an app, it's unlikely the LLM can do it, because I can't give any guidance (and it guesses)
8086
- How do we define stability?
8187
- What are the increments of change (e.g., "adding a library")? We should be able to keep track of times for each stage and what changed, and an analyzer LLM can look at result and understand (categorize) most salient contributions to change.
8288
- We also can time the time it takes to do subsequent changes, when relevant. For example, if we are building, we should be able to use cached layers (and the build times speed up) if the LLM is changing content later in the Dockerfile.

fractale/agent/base.py

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
import copy
12
import os
3+
import re
24
import sys
5+
import time
36

47
import google.generativeai as genai
58

6-
from fractale.agent.decorators import callback, save_logs
79
import fractale.agent.defaults as defaults
810
import fractale.agent.logger as logger
911
import fractale.utils as utils
1012
from fractale.agent.context import get_context
13+
from fractale.agent.decorators import save_result, timed
1114

1215

1316
class Agent:
@@ -22,37 +25,37 @@ class Agent:
2225
"""
2326

2427
# name and description should be on the class
28+
state_variables = ["result", "error_message"]
2529

26-
def __init__(self, use_cache=False, results_dir=None, incremental=False):
27-
28-
# Max attempts defaults to unlimited
29-
# We start counting at 1 for the user to see.
30-
# Eat your heart out, Matlab.
31-
self.attempts = 1
32-
self.max_attempts = None
30+
def __init__(
31+
self, use_cache=False, results_dir=None, save_incremental=False, max_attempts=None
32+
):
33+
self.attempts = 0
34+
self.max_attempts = max_attempts
3335

34-
# For now, assume this is for the manager.
36+
# For now, assume these are for the manager.
37+
# They get added to other agents via the step creation
3538
# We can optionally save incremental result objects
3639
self.results_dir = results_dir or os.getcwd()
37-
self.save_incremental = incremental
40+
self.save_incremental = save_incremental
3841

3942
# The user can save if desired - caching the context to skip steps that already run.
4043
self.setup_cache(use_cache)
4144

4245
# This supports saving custom logs and step (attempt) metadata
43-
self.metadata = {}
46+
self.init_metadata()
4447

4548
# Custom initialization functions
4649
self.init()
4750

48-
@callback(save_logs)
51+
def init_metadata(self):
52+
self.metadata = {"times": {}, "assets": {}, "ask_gemini": [], "retries": 0, "failures": []}
53+
54+
@save_result
4955
def run(self, context):
5056
"""
5157
Run the agent - a wrapper around internal function _run that prepares it.
5258
"""
53-
# Init attempts. Each agent has an internal counter for total attempts
54-
self.attempts = self.attempts or 1
55-
5659
# Load cached context. This is assumed to override user provided args
5760
# If we have a saved context, we assume we want to use it, return early
5861
cached_context = self.load_cache()
@@ -66,7 +69,8 @@ def run(self, context):
6669
context = get_context(context)
6770

6871
# Run, wrapping with a load and save of cache
69-
context = self._run(context)
72+
# This will return here when the internal loop is done
73+
context = self.run_step(context)
7074
self.save_cache(context)
7175
return context
7276

@@ -79,6 +83,32 @@ def print_result(self, result):
7983
"""
8084
pass
8185

86+
def reset_context(self, context):
87+
"""
88+
Remove output and any stateful variables. This is assuming we
89+
are starting again.
90+
"""
91+
for key in self.state_variables:
92+
if key in context:
93+
del context[key]
94+
95+
# Since we will try again, let's move current metadata into a subsection
96+
metadata = copy.deepcopy(self.metadata)
97+
98+
# We don't want this to recurse forever
99+
failures = metadata.get("failures") or []
100+
if "failures" in metadata:
101+
del metadata["failures"]
102+
failures.append(metadata)
103+
104+
# Reset metadata, save retries
105+
self.init_metadata()
106+
self.metadata["failures"] = failures
107+
self.metadata["retries"] = metadata["retries"]
108+
109+
# We don't need a return here, but let's be explicit
110+
return context
111+
82112
def setup_cache(self, use_cache=False):
83113
"""
84114
Setup (or load) a cache.
@@ -132,10 +162,7 @@ def reached_max_attempts(self):
132162
# Unset (None) or 1.
133163
if not self.max_attempts:
134164
return False
135-
return self.attempts >= self.max_attempts
136-
137-
def set_max_attempts(self, max_attempts):
138-
self.max_attempts = max_attempts
165+
return self.attempts > self.max_attempts
139166

140167
def add_shared_arguments(self, agent):
141168
"""
@@ -199,29 +226,25 @@ def get_code_block(self, content, code_type):
199226
"""
200227
Parse a code block from the response
201228
"""
229+
pattern = f"```(?:{code_type})?\n(.*?)```"
230+
match = re.search(pattern, content, re.DOTALL)
231+
if match:
232+
return match.group(1).strip()
202233
if content.startswith(f"```{code_type}"):
203234
content = content[len(f"```{code_type}") :]
204235
if content.startswith("```"):
205236
content = content[len("```") :]
206237
if content.endswith("```"):
207238
content = content[: -len("```")]
208-
return content
239+
return content.strip()
209240

210-
def _run(self, context):
241+
def run_step(self, context):
211242
"""
212243
Run the agent. This expects to be called with a loaded context.
213244
"""
214245
assert context
215246
raise NotImplementedError(f"The {self.name} agent is missing internal 'run' function")
216247

217-
def get_initial_prompt(self, context):
218-
"""
219-
Get the initial prompt (with details) to provide context to the manager.
220-
221-
If we don't do this, the manager can provide a bad instruction for how to fix the error.
222-
"""
223-
return self.get_prompt(context)
224-
225248
def get_prompt(self, context):
226249
"""
227250
This function should take the same context as run and return the parsed prompt that
@@ -244,19 +267,39 @@ def init(self):
244267
except KeyError:
245268
sys.exit("ERROR: GEMINI_API_KEY environment variable not set.")
246269

270+
# We don't add timed here because we do it custom
247271
def ask_gemini(self, prompt, with_history=True):
248272
"""
249273
Ask gemini adds a wrapper with some error handling.
250274
"""
251275
try:
276+
start = time.perf_counter()
252277
if with_history:
253278
response = self.chat.send_message(prompt)
254279
else:
255280
response = self.model.generate_content(prompt)
281+
end = time.perf_counter()
282+
283+
if self.save_incremental:
284+
self.save_gemini_metadata(end - start, response, with_history)
256285

257286
# This line can fail. If it succeeds, return entire response
258287
return response.text.strip()
259288

260289
except ValueError as e:
261290
print(f"[Error] The API response was blocked and contained no text: {str(e)}")
262291
return "GEMINI ERROR: The API returned an error (or stop) and we need to try again."
292+
293+
def save_gemini_metadata(self, elapsed_time, response, with_history):
294+
"""
295+
Save gemini response metadata and elapsed time
296+
"""
297+
self.metadata["ask_gemini"].append(
298+
{
299+
"conversation_history": with_history,
300+
"prompt_token_count": response.usage_metadata.prompt_token_count,
301+
"candidates_token_count": response.usage_metadata.candidates_token_count,
302+
"total_token_count": response.usage_metadata.total_token_count,
303+
"time_seconds": elapsed_time,
304+
}
305+
)

fractale/agent/build/agent.py

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from fractale.agent.base import GeminiAgent
22
import fractale.agent.build.prompts as prompts
3+
from fractale.agent.decorators import timed
34
from fractale.agent.context import get_context
45
from fractale.agent.errors import DebugAgent
56
import fractale.agent.logger as logger
@@ -18,10 +19,6 @@
1819
import textwrap
1920

2021

21-
# regular expression in case LLM does not follow my instructions!
22-
dockerfile_pattern = r"```(?:dockerfile)?\n(.*?)```"
23-
24-
2522
class BuildAgent(GeminiAgent):
2623
"""
2724
Builder agent.
@@ -33,6 +30,8 @@ class BuildAgent(GeminiAgent):
3330

3431
name = "build"
3532
description = "builder agent"
33+
state_variables = ["result", "dockerfile", "error_message"]
34+
result_type = "dockerfile"
3635

3736
def _add_arguments(self, subparser):
3837
"""
@@ -55,6 +54,12 @@ def _add_arguments(self, subparser):
5554
"--environment",
5655
help="Environment description to build for (defaults to generic)",
5756
)
57+
build.add_argument(
58+
"--load",
59+
help="Load into kind on success.",
60+
default=False,
61+
action="store_true",
62+
)
5863
return build
5964

6065
def get_prompt(self, context):
@@ -85,7 +90,8 @@ def filter_output(self, output):
8590
regex = "(%s)" % "|".join(skips)
8691
return "\n".join([x for x in output.split("\n") if not re.search(regex, x)])
8792

88-
def _run(self, context):
93+
@timed
94+
def run_step(self, context):
8995
"""
9096
Run the agent.
9197
@@ -113,6 +119,7 @@ def _run(self, context):
113119
if return_code == 0:
114120
self.print_result(context.result)
115121
logger.success(f"Build complete in {self.attempts} attempts")
122+
self.load(context)
116123
else:
117124
# Filter out likely not needed lines (ubuntu install)
118125
output = self.filter_output(output)
@@ -122,12 +129,14 @@ def _run(self, context):
122129
# Ask the debug agent to better instruct the error message
123130
# This becomes a more guided output
124131
context.error_message = output
125-
agent = DebugAgent()
132+
126133
# This updates the error message to be the output
127-
context = agent.run(context, requires=prompts.requires)
134+
context = DebugAgent().run(context, requires=prompts.requires)
135+
print("\n[bold cyan] Requesting Correction from Build Agent[/bold cyan]")
128136

129137
# If we have reached the max attempts...
130-
if self.reached_max_attempts():
138+
if self.reached_max_attempts() or context.get("return_to_manager") is True:
139+
context.return_to_manager = False
131140

132141
# If we are being managed, return the result
133142
if context.is_managed():
@@ -139,10 +148,9 @@ def _run(self, context):
139148
logger.exit(f"Max attempts {self.max_attempts} reached.", title="Agent Failure")
140149

141150
self.attempts += 1
142-
print("\n[bold cyan] Requesting Correction from Build Agent[/bold cyan]")
143151

144152
# Update the context with error message
145-
return self.run(context)
153+
return self.run_step(context)
146154

147155
# Add generation line
148156
self.write_file(context, context.result)
@@ -151,6 +159,25 @@ def _run(self, context):
151159
# unless we are being managed
152160
return context
153161

162+
@timed
163+
def load(self, context):
164+
"""
165+
If specified, load into kind.
166+
"""
167+
if not context.get("load") is True:
168+
return
169+
170+
logger.info("Loading into kind...")
171+
p = subprocess.run(
172+
["kind", "load", "docker-image", context.container],
173+
capture_output=True,
174+
text=True,
175+
check=False,
176+
)
177+
if p.returncode != 0:
178+
output = p.stdout + p.stderr
179+
logger.warning(f"Issue with kind load: {output}")
180+
154181
def print_result(self, dockerfile):
155182
"""
156183
Print Dockerfile with highlighted Syntax
@@ -183,6 +210,7 @@ def generate_name(self, name):
183210
name = name + "c"
184211
return name.lower()
185212

213+
@timed
186214
def build(self, context):
187215
"""
188216
Build the Dockerfile! Yolo!
@@ -219,6 +247,18 @@ def build(self, context):
219247
shutil.rmtree(build_dir, ignore_errors=True)
220248
return (p.returncode, p.stdout + p.stderr)
221249

250+
def save_dockerfile(self, dockerfile):
251+
"""
252+
Save logs to metadata
253+
"""
254+
if self.save_incremental:
255+
if "dockerfile" not in self.metadata["assets"]:
256+
self.metadata["assets"]["dockerfile"] = []
257+
self.metadata["assets"]["dockerfile"].append(
258+
{"item": dockerfile, "attempt": self.attempts}
259+
)
260+
261+
@timed
222262
def generate_dockerfile(self, context):
223263
"""
224264
Generates or refines a Dockerfile using the Gemini API.
@@ -233,14 +273,14 @@ def generate_dockerfile(self, context):
233273

234274
# Try to remove Dockerfile from code block
235275
try:
236-
content = self.get_code_block(content, "dockerfile")
237-
238-
# If we are getting commentary...
239-
match = re.search(dockerfile_pattern, content, re.DOTALL)
276+
# This can be provided as docker or dockerfile
277+
pattern = "```(?:docker|dockerfile)?\n(.*?)```"
278+
match = re.search(pattern, content, re.DOTALL)
240279
if match:
241280
dockerfile = match.group(1).strip()
242281
else:
243-
dockerfile = content.strip()
282+
dockerfile = self.get_code_block(content, "dockerfile")
283+
self.save_dockerfile(dockerfile)
244284

245285
# The result is saved as a build step
246286
# The dockerfile is the argument used internally

fractale/agent/build/prompts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
- Assume a default of CPU if GPU or CPU is not stated.
1010
- Do not do a multi-stage build, and do not COPY or ADD anything.
1111
- Try to place executables on the PATH so they do not need to be discovered.
12+
- You are only scoped to edit a Dockerfile to build the image.
1213
"""
1314

1415
common_instructions = (

0 commit comments

Comments
 (0)