Skip to content

Commit a5120b0

Browse files
committed
test: refactor pod/job into abstractions
Signed-off-by: vsoch <[email protected]>
1 parent 7987320 commit a5120b0

File tree

16 files changed

+826
-503
lines changed

16 files changed

+826
-503
lines changed

examples/agent/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,17 @@ We haven't hit the case yet where the manager needs to take over - that needs fu
7272
#### To do items
7373

7474
- Figure out optimization agent (with some goal)
75+
- Right now when we restart, we do with fresh slate (no log memory) - should there be?
76+
- We likely want some want to quantify the amount of change between prompts, and the difficulty of the task.
77+
- I think likely when we return to the manager, we want the last response (that might say why it is returning) should inform step selection. But not just step selection, the updated prompt to the step missing something.
78+
- Right now we rely on random sampling of the space to avoid whatever the issue might be.
7579

7680
#### Research Questions
7781

7882
**And experiment ideas**
7983

84+
- Why does it make the same mistakes? E.g., always forgetting ca-certificates. Did it learn from data that was OK to do and thus errors result from inconsistencies between the way things used to work and the way they do now?
85+
- Insight: if I don't know how to run an app, it's unlikely the LLM can do it, because I can't give any guidance (and it guesses)
8086
- How do we define stability?
8187
- What are the increments of change (e.g., "adding a library")? We should be able to keep track of times for each stage and what changed, and an analyzer LLM can look at result and understand (categorize) most salient contributions to change.
8288
- We also can time the time it takes to do subsequent changes, when relevant. For example, if we are building, we should be able to use cached layers (and the build times speed up) if the LLM is changing content later in the Dockerfile.

fractale/agent/base.py

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
import copy
12
import os
3+
import re
24
import sys
5+
import time
36

47
import google.generativeai as genai
58

6-
from fractale.agent.decorators import callback, save_logs
79
import fractale.agent.defaults as defaults
810
import fractale.agent.logger as logger
911
import fractale.utils as utils
1012
from fractale.agent.context import get_context
13+
from fractale.agent.decorators import save_result, timed
1114

1215

1316
class Agent:
@@ -22,37 +25,37 @@ class Agent:
2225
"""
2326

2427
# name and description should be on the class
28+
state_variables = ["result", "error_message"]
2529

26-
def __init__(self, use_cache=False, results_dir=None, incremental=False):
27-
28-
# Max attempts defaults to unlimited
29-
# We start counting at 1 for the user to see.
30-
# Eat your heart out, Matlab.
31-
self.attempts = 1
32-
self.max_attempts = None
30+
def __init__(
31+
self, use_cache=False, results_dir=None, save_incremental=False, max_attempts=None
32+
):
33+
self.attempts = 0
34+
self.max_attempts = max_attempts
3335

34-
# For now, assume this is for the manager.
36+
# For now, assume these are for the manager.
37+
# They get added to other agents via the step creation
3538
# We can optionally save incremental result objects
3639
self.results_dir = results_dir or os.getcwd()
37-
self.save_incremental = incremental
40+
self.save_incremental = save_incremental
3841

3942
# The user can save if desired - caching the context to skip steps that already run.
4043
self.setup_cache(use_cache)
4144

4245
# This supports saving custom logs and step (attempt) metadata
43-
self.metadata = {}
46+
self.init_metadata()
4447

4548
# Custom initialization functions
4649
self.init()
4750

48-
@callback(save_logs)
51+
def init_metadata(self):
52+
self.metadata = {"times": {}, "assets": {}, "ask_gemini": [], "retries": 0, "failures": []}
53+
54+
@save_result
4955
def run(self, context):
5056
"""
5157
Run the agent - a wrapper around internal function _run that prepares it.
5258
"""
53-
# Init attempts. Each agent has an internal counter for total attempts
54-
self.attempts = self.attempts or 1
55-
5659
# Load cached context. This is assumed to override user provided args
5760
# If we have a saved context, we assume we want to use it, return early
5861
cached_context = self.load_cache()
@@ -66,7 +69,8 @@ def run(self, context):
6669
context = get_context(context)
6770

6871
# Run, wrapping with a load and save of cache
69-
context = self._run(context)
72+
# This will return here when the internal loop is done
73+
context = self.run_step(context)
7074
self.save_cache(context)
7175
return context
7276

@@ -79,6 +83,32 @@ def print_result(self, result):
7983
"""
8084
pass
8185

86+
def reset_context(self, context):
87+
"""
88+
Remove output and any stateful variables. This is assuming we
89+
are starting again.
90+
"""
91+
for key in self.state_variables:
92+
if key in context:
93+
del context[key]
94+
95+
# Since we will try again, let's move current metadata into a subsection
96+
metadata = copy.deepcopy(self.metadata)
97+
98+
# We don't want this to recurse forever
99+
failures = metadata.get("failures") or []
100+
if "failures" in metadata:
101+
del metadata["failures"]
102+
failures.append(metadata)
103+
104+
# Reset metadata, save retries
105+
self.init_metadata()
106+
self.metadata["failures"] = failures
107+
self.metadata["retries"] = metadata["retries"]
108+
109+
# We don't need a return here, but let's be explicit
110+
return context
111+
82112
def setup_cache(self, use_cache=False):
83113
"""
84114
Setup (or load) a cache.
@@ -132,10 +162,7 @@ def reached_max_attempts(self):
132162
# Unset (None) or 1.
133163
if not self.max_attempts:
134164
return False
135-
return self.attempts >= self.max_attempts
136-
137-
def set_max_attempts(self, max_attempts):
138-
self.max_attempts = max_attempts
165+
return self.attempts > self.max_attempts
139166

140167
def add_shared_arguments(self, agent):
141168
"""
@@ -199,29 +226,25 @@ def get_code_block(self, content, code_type):
199226
"""
200227
Parse a code block from the response
201228
"""
229+
pattern = f"```(?:{code_type})?\n(.*?)```"
230+
match = re.search(pattern, content, re.DOTALL)
231+
if match:
232+
return match.group(1).strip()
202233
if content.startswith(f"```{code_type}"):
203234
content = content[len(f"```{code_type}") :]
204235
if content.startswith("```"):
205236
content = content[len("```") :]
206237
if content.endswith("```"):
207238
content = content[: -len("```")]
208-
return content
239+
return content.strip()
209240

210-
def _run(self, context):
241+
def run_step(self, context):
211242
"""
212243
Run the agent. This expects to be called with a loaded context.
213244
"""
214245
assert context
215246
raise NotImplementedError(f"The {self.name} agent is missing internal 'run' function")
216247

217-
def get_initial_prompt(self, context):
218-
"""
219-
Get the initial prompt (with details) to provide context to the manager.
220-
221-
If we don't do this, the manager can provide a bad instruction for how to fix the error.
222-
"""
223-
return self.get_prompt(context)
224-
225248
def get_prompt(self, context):
226249
"""
227250
This function should take the same context as run and return the parsed prompt that
@@ -244,19 +267,45 @@ def init(self):
244267
except KeyError:
245268
sys.exit("ERROR: GEMINI_API_KEY environment variable not set.")
246269

270+
# We don't add timed here because we do it custom
247271
def ask_gemini(self, prompt, with_history=True):
248272
"""
249273
Ask gemini adds a wrapper with some error handling.
250274
"""
275+
# Always remove lines with empty spaces
276+
if len(prompt) > 15000:
277+
print("FOUND CHONKER PROMPT")
278+
import IPython
279+
280+
IPython.embed()
251281
try:
282+
start = time.perf_counter()
252283
if with_history:
253284
response = self.chat.send_message(prompt)
254285
else:
255286
response = self.model.generate_content(prompt)
287+
end = time.perf_counter()
288+
289+
if self.save_incremental:
290+
self.save_gemini_metadata(end - start, response, with_history)
256291

257292
# This line can fail. If it succeeds, return entire response
258293
return response.text.strip()
259294

260295
except ValueError as e:
261296
print(f"[Error] The API response was blocked and contained no text: {str(e)}")
262297
return "GEMINI ERROR: The API returned an error (or stop) and we need to try again."
298+
299+
def save_gemini_metadata(self, elapsed_time, response, with_history):
300+
"""
301+
Save gemini response metadata and elapsed time
302+
"""
303+
self.metadata["ask_gemini"].append(
304+
{
305+
"conversation_history": with_history,
306+
"prompt_token_count": response.usage_metadata.prompt_token_count,
307+
"candidates_token_count": response.usage_metadata.candidates_token_count,
308+
"total_token_count": response.usage_metadata.total_token_count,
309+
"time_seconds": elapsed_time,
310+
}
311+
)

0 commit comments

Comments
 (0)