Skip to content

Commit 4feeba3

Browse files
committed
test: refactor pod/job into abstractions
Signed-off-by: vsoch <[email protected]>
1 parent 7987320 commit 4feeba3

File tree

16 files changed

+796
-481
lines changed

16 files changed

+796
-481
lines changed

examples/agent/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,16 @@ We haven't hit the case yet where the manager needs to take over - that needs fu
7272
#### To do items
7373

7474
- Figure out optimization agent (with some goal)
75+
- Right now when we restart, we do with fresh slate (no log memory) - should there be?
76+
- I think likely when we return to the manager, we want the last response (that might say why it is returning) should inform step selection. But not just step selection, the updated prompt to the step missing something.
77+
- Right now we rely on random sampling of the space to avoid whatever the issue might be.
7578

7679
#### Research Questions
7780

7881
**And experiment ideas**
7982

83+
- Why does it make the same mistakes? E.g., always forgetting ca-certificates. Did it learn from data that was OK to do and thus errors result from inconsistencies between the way things used to work and the way they do now?
84+
- Insight: if I don't know how to run an app, it's unlikely the LLM can do it, because I can't give any guidance (and it guesses)
8085
- How do we define stability?
8186
- What are the increments of change (e.g., "adding a library")? We should be able to keep track of times for each stage and what changed, and an analyzer LLM can look at result and understand (categorize) most salient contributions to change.
8287
- We also can time the time it takes to do subsequent changes, when relevant. For example, if we are building, we should be able to use cached layers (and the build times speed up) if the LLM is changing content later in the Dockerfile.

fractale/agent/base.py

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
import copy
12
import os
23
import sys
4+
import time
35

46
import google.generativeai as genai
57

6-
from fractale.agent.decorators import callback, save_logs
78
import fractale.agent.defaults as defaults
89
import fractale.agent.logger as logger
910
import fractale.utils as utils
1011
from fractale.agent.context import get_context
12+
from fractale.agent.decorators import save_result, timed
1113

1214

1315
class Agent:
@@ -22,37 +24,37 @@ class Agent:
2224
"""
2325

2426
# name and description should be on the class
27+
state_variables = ["result", "error_message"]
2528

26-
def __init__(self, use_cache=False, results_dir=None, incremental=False):
27-
28-
# Max attempts defaults to unlimited
29-
# We start counting at 1 for the user to see.
30-
# Eat your heart out, Matlab.
31-
self.attempts = 1
32-
self.max_attempts = None
29+
def __init__(
30+
self, use_cache=False, results_dir=None, save_incremental=False, max_attempts=None
31+
):
32+
self.attempts = 0
33+
self.max_attempts = max_attempts
3334

34-
# For now, assume this is for the manager.
35+
# For now, assume these are for the manager.
36+
# They get added to other agents via the step creation
3537
# We can optionally save incremental result objects
3638
self.results_dir = results_dir or os.getcwd()
37-
self.save_incremental = incremental
39+
self.save_incremental = save_incremental
3840

3941
# The user can save if desired - caching the context to skip steps that already run.
4042
self.setup_cache(use_cache)
4143

4244
# This supports saving custom logs and step (attempt) metadata
43-
self.metadata = {}
45+
self.init_metadata()
4446

4547
# Custom initialization functions
4648
self.init()
4749

48-
@callback(save_logs)
50+
def init_metadata(self):
51+
self.metadata = {"times": {}, "assets": {}, "ask_gemini": [], "retries": 0, "failures": []}
52+
53+
@save_result
4954
def run(self, context):
5055
"""
5156
Run the agent - a wrapper around internal function _run that prepares it.
5257
"""
53-
# Init attempts. Each agent has an internal counter for total attempts
54-
self.attempts = self.attempts or 1
55-
5658
# Load cached context. This is assumed to override user provided args
5759
# If we have a saved context, we assume we want to use it, return early
5860
cached_context = self.load_cache()
@@ -66,7 +68,8 @@ def run(self, context):
6668
context = get_context(context)
6769

6870
# Run, wrapping with a load and save of cache
69-
context = self._run(context)
71+
# This will return here when the internal loop is done
72+
context = self.run_step(context)
7073
self.save_cache(context)
7174
return context
7275

@@ -79,6 +82,32 @@ def print_result(self, result):
7982
"""
8083
pass
8184

85+
def reset_context(self, context):
86+
"""
87+
Remove output and any stateful variables. This is assuming we
88+
are starting again.
89+
"""
90+
for key in self.state_variables:
91+
if key in context:
92+
del context[key]
93+
94+
# Since we will try again, let's move current metadata into a subsection
95+
metadata = copy.deepcopy(self.metadata)
96+
97+
# We don't want this to recurse forever
98+
failures = metadata.get("failures") or []
99+
if "failures" in metadata:
100+
del metadata["failures"]
101+
failures.append(metadata)
102+
103+
# Reset metadata, save retries
104+
self.init_metadata()
105+
self.metadata["failures"] = failures
106+
self.metadata["retries"] = metadata["retries"]
107+
108+
# We don't need a return here, but let's be explicit
109+
return context
110+
82111
def setup_cache(self, use_cache=False):
83112
"""
84113
Setup (or load) a cache.
@@ -132,10 +161,7 @@ def reached_max_attempts(self):
132161
# Unset (None) or 1.
133162
if not self.max_attempts:
134163
return False
135-
return self.attempts >= self.max_attempts
136-
137-
def set_max_attempts(self, max_attempts):
138-
self.max_attempts = max_attempts
164+
return self.attempts > self.max_attempts
139165

140166
def add_shared_arguments(self, agent):
141167
"""
@@ -207,21 +233,13 @@ def get_code_block(self, content, code_type):
207233
content = content[: -len("```")]
208234
return content
209235

210-
def _run(self, context):
236+
def run_step(self, context):
211237
"""
212238
Run the agent. This expects to be called with a loaded context.
213239
"""
214240
assert context
215241
raise NotImplementedError(f"The {self.name} agent is missing internal 'run' function")
216242

217-
def get_initial_prompt(self, context):
218-
"""
219-
Get the initial prompt (with details) to provide context to the manager.
220-
221-
If we don't do this, the manager can provide a bad instruction for how to fix the error.
222-
"""
223-
return self.get_prompt(context)
224-
225243
def get_prompt(self, context):
226244
"""
227245
This function should take the same context as run and return the parsed prompt that
@@ -244,19 +262,39 @@ def init(self):
244262
except KeyError:
245263
sys.exit("ERROR: GEMINI_API_KEY environment variable not set.")
246264

265+
# We don't add timed here because we do it custom
247266
def ask_gemini(self, prompt, with_history=True):
248267
"""
249268
Ask gemini adds a wrapper with some error handling.
250269
"""
251270
try:
271+
start = time.perf_counter()
252272
if with_history:
253273
response = self.chat.send_message(prompt)
254274
else:
255275
response = self.model.generate_content(prompt)
276+
end = time.perf_counter()
277+
278+
if self.save_incremental:
279+
self.save_gemini_metadata(end - start, response, with_history)
256280

257281
# This line can fail. If it succeeds, return entire response
258282
return response.text.strip()
259283

260284
except ValueError as e:
261285
print(f"[Error] The API response was blocked and contained no text: {str(e)}")
262286
return "GEMINI ERROR: The API returned an error (or stop) and we need to try again."
287+
288+
def save_gemini_metadata(self, elapsed_time, response, with_history):
289+
"""
290+
Save gemini response metadata and elapsed time
291+
"""
292+
self.metadata["ask_gemini"].append(
293+
{
294+
"conversation_history": with_history,
295+
"prompt_token_count": response.usage_metadata.prompt_token_count,
296+
"candidates_token_count": response.usage_metadata.candidates_token_count,
297+
"total_token_count": response.usage_metadata.total_token_count,
298+
"time_seconds": elapsed_time,
299+
}
300+
)

fractale/agent/build/agent.py

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from fractale.agent.base import GeminiAgent
22
import fractale.agent.build.prompts as prompts
3+
from fractale.agent.decorators import timed
34
from fractale.agent.context import get_context
45
from fractale.agent.errors import DebugAgent
56
import fractale.agent.logger as logger
@@ -33,6 +34,8 @@ class BuildAgent(GeminiAgent):
3334

3435
name = "build"
3536
description = "builder agent"
37+
state_variables = ["result", "dockerfile", "error_message"]
38+
result_type = "dockerfile"
3639

3740
def _add_arguments(self, subparser):
3841
"""
@@ -55,6 +58,12 @@ def _add_arguments(self, subparser):
5558
"--environment",
5659
help="Environment description to build for (defaults to generic)",
5760
)
61+
build.add_argument(
62+
"--load",
63+
help="Load into kind on success.",
64+
default=False,
65+
action="store_true",
66+
)
5867
return build
5968

6069
def get_prompt(self, context):
@@ -85,7 +94,8 @@ def filter_output(self, output):
8594
regex = "(%s)" % "|".join(skips)
8695
return "\n".join([x for x in output.split("\n") if not re.search(regex, x)])
8796

88-
def _run(self, context):
97+
@timed
98+
def run_step(self, context):
8999
"""
90100
Run the agent.
91101
@@ -113,6 +123,7 @@ def _run(self, context):
113123
if return_code == 0:
114124
self.print_result(context.result)
115125
logger.success(f"Build complete in {self.attempts} attempts")
126+
self.load(context)
116127
else:
117128
# Filter out likely not needed lines (ubuntu install)
118129
output = self.filter_output(output)
@@ -122,12 +133,14 @@ def _run(self, context):
122133
# Ask the debug agent to better instruct the error message
123134
# This becomes a more guided output
124135
context.error_message = output
125-
agent = DebugAgent()
136+
126137
# This updates the error message to be the output
127-
context = agent.run(context, requires=prompts.requires)
138+
context = DebugAgent().run(context, requires=prompts.requires)
139+
print("\n[bold cyan] Requesting Correction from Build Agent[/bold cyan]")
128140

129141
# If we have reached the max attempts...
130-
if self.reached_max_attempts():
142+
if self.reached_max_attempts() or context.get("return_to_manager") is True:
143+
context.return_to_manager = False
131144

132145
# If we are being managed, return the result
133146
if context.is_managed():
@@ -139,10 +152,9 @@ def _run(self, context):
139152
logger.exit(f"Max attempts {self.max_attempts} reached.", title="Agent Failure")
140153

141154
self.attempts += 1
142-
print("\n[bold cyan] Requesting Correction from Build Agent[/bold cyan]")
143155

144156
# Update the context with error message
145-
return self.run(context)
157+
return self.run_step(context)
146158

147159
# Add generation line
148160
self.write_file(context, context.result)
@@ -151,6 +163,25 @@ def _run(self, context):
151163
# unless we are being managed
152164
return context
153165

166+
@timed
167+
def load(self, context):
168+
"""
169+
If specified, load into kind.
170+
"""
171+
if not context.get("load") is True:
172+
return
173+
174+
logger.info("Loading into kind...")
175+
p = subprocess.run(
176+
["kind", "load", "docker-image", context.container],
177+
capture_output=True,
178+
text=True,
179+
check=False,
180+
)
181+
if p.returncode != 0:
182+
output = p.stdout + p.stderr
183+
logger.warning(f"Issue with kind load: {output}")
184+
154185
def print_result(self, dockerfile):
155186
"""
156187
Print Dockerfile with highlighted Syntax
@@ -183,6 +214,7 @@ def generate_name(self, name):
183214
name = name + "c"
184215
return name.lower()
185216

217+
@timed
186218
def build(self, context):
187219
"""
188220
Build the Dockerfile! Yolo!
@@ -219,6 +251,18 @@ def build(self, context):
219251
shutil.rmtree(build_dir, ignore_errors=True)
220252
return (p.returncode, p.stdout + p.stderr)
221253

254+
def save_dockerfile(self, dockerfile):
255+
"""
256+
Save logs to metadata
257+
"""
258+
if self.save_incremental:
259+
if "dockerfile" not in self.metadata["assets"]:
260+
self.metadata["assets"]["dockerfile"] = []
261+
self.metadata["assets"]["dockerfile"].append(
262+
{"item": dockerfile, "attempt": self.attempts}
263+
)
264+
265+
@timed
222266
def generate_dockerfile(self, context):
223267
"""
224268
Generates or refines a Dockerfile using the Gemini API.
@@ -241,6 +285,7 @@ def generate_dockerfile(self, context):
241285
dockerfile = match.group(1).strip()
242286
else:
243287
dockerfile = content.strip()
288+
self.save_dockerfile(dockerfile)
244289

245290
# The result is saved as a build step
246291
# The dockerfile is the argument used internally

fractale/agent/build/prompts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
- Assume a default of CPU if GPU or CPU is not stated.
1010
- Do not do a multi-stage build, and do not COPY or ADD anything.
1111
- Try to place executables on the PATH so they do not need to be discovered.
12+
- You are only scoped to edit a Dockerfile to build the image.
1213
"""
1314

1415
common_instructions = (

0 commit comments

Comments
 (0)