compspec
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/agent/plans/run-lammps.yaml
Lines changed: 4 additions & 1 deletion b/‎examples/agent/plans/run-lammps.yaml
Lines changed: 4 additions & 1 deletion
diff --git a/‎fractale/agent/base.py
Lines changed: 30 additions & 21 deletions b/‎fractale/agent/base.py
Lines changed: 30 additions & 21 deletions
diff --git a/‎fractale/agent/build/agent.py
Lines changed: 19 additions & 29 deletions b/‎fractale/agent/build/agent.py
Lines changed: 19 additions & 29 deletions
diff --git a/‎fractale/agent/build/prompts.py
Lines changed: 12 additions & 18 deletions b/‎fractale/agent/build/prompts.py
Lines changed: 12 additions & 18 deletions
diff --git a/‎fractale/agent/context.py
Lines changed: 7 additions & 1 deletion b/‎fractale/agent/context.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎fractale/agent/kubernetes_job/agent.py
Lines changed: 21 additions & 20 deletions b/‎fractale/agent/kubernetes_job/agent.py
Lines changed: 21 additions & 20 deletions
@@ -28,6 +28,7 @@ See [examples/agent](examples/agent) for an example.
 
 **And experiment ideas**
 
+- How do we define stability? 
 - What are the increments of change (e.g., "adding a library")? We should be able to keep track of times for each stage and what changed, and an analyzer LLM can look at result and understand (categorize) most salient contributions to change.
   - We also can time the time it takes to do subsequent changes, when relevant. For example, if we are building, we should be able to use cached layers (and the build times speed up) if the LLM is changing content later in the Dockerfile.
 - We can also save the successful results (Dockerfile builds, for example) and compare for similarity. How consistent is the LLM?
 
@@ -1,11 +1,13 @@
 name: Build and Deploy LAMMPS
 description: Build a Docker container and deploy it as a Kubernetes Job.
 plan:
+
+  # Important: everything you want to provide to the manager agent should be defined.
+  # Agents can pass steps in between, but the manager is always given stateless context.
 - agent: build
   context:
     environment: "google cloud CPU instance in Kubernetes"
     application: lammps
-    # Testing max attempts for help from LLM manager
     max_attempts: 1
     details: |
       Please build the with reaxff HNS example located in examples/reaxff/HNS.
@@ -18,6 +20,7 @@ plan:
   context:
     no_pull: true
     environment: "google cloud CPU instance in Kubernetes" 
+    max_attempts: 1
     details: |
       Please execute the reaxff HNS example, and assume the data in the PWD,
       Run lammpss with params -v x 2 -v y 2 -v z 2 -in ./in.reaxff.hns
 
@@ -1,6 +1,5 @@
 import fractale.utils as utils
 
-
 class Agent:
     """
     A base for an agent. Each agent should:
@@ -36,12 +35,11 @@ def return_on_failure(self):
         """
         On failure, have we reached max attempts and should return?
         """
-        print('CHECK')
-        import IPython
-        IPython.embed()
+        # Unset or 0.
         if not self.max_attempts:
             return False
-        return self.attempts > self.max_attempts
+        # This starts counting at 1, so we check >=
+        return self.attempts >= self.max_attempts
 
     def set_max_attempts(self, max_attempts):
         self.max_attempts = max_attempts
@@ -66,31 +64,34 @@ def write_file(self, context, content, add_comment=True):
             content += f"\n# Generated by fractale {self.name} agent"
         utils.write_file(content, outfile)
 
-    def ask_gemini(self, prompt):
+    def get_code_block(self, content, code_type):             
+        """
+        Parse a code block from the response
+        """
+        if content.startswith(f"```{code_type}"):
+            content = content[len(f"```{code_type}") :]
+        if content.startswith("```"):
+            content = content[len("```") :]
+        if content.endswith("```"):
+            content = content[: -len("```")]
+        return content
+
+    def ask_gemini(self, prompt, with_history=True):
         """
         Ask gemini adds a wrapper with some error handling.
         """
         try:
-            response = self.chat.send_message(prompt)
+            if with_history:
+                response = self.chat.send_message(prompt)
+            else:
+                response = self.model.generate_content(prompt)
 
             # This line can fail. If it succeeds, return entire response
-            text_content = response.text
-            assert text_content
-            return response
+            return response.text.strip()
 
         except ValueError as e:
             print(f"[Error] The API response was blocked and contained no text: {str(e)}")
-
-            print("VANESSA DEBUG WHAT TO DO")
-            import IPython
-
-            IPython.embed()
-            # We probably want to retry if it is 1 (STOP) and empty.
-            # Otherwise we need to somehow retry fixing the dockerfile.
-            # For robust logging, you can inspect the reason.
-            if response.candidates:
-                finish_reason = response.candidates[0].finish_reason.name
-                print(f"Finish Reason: {finish_reason}")
+            return "GEMINI ERROR: The API returned an error (or stop) and we need to try again."
 
     def run(self, context):
         """
@@ -99,6 +100,14 @@ def run(self, context):
         assert context
         raise NotImplementedError(f"The {self.name} agent is missing a 'run' function")
 
+    def get_initial_prompt(self, context):
+        """
+        Get the initial prompt (with details) to provide context to the manager.
+
+        If we don't do this, the manager can provide a bad instruction for how to fix the error.
+        """
+        return self.get_prompt(context)
+
     def get_prompt(self, context):
         """
         This function should take the same context as run and return the parsed prompt that
 
@@ -41,13 +41,6 @@ def init(self):
         model = genai.GenerativeModel("gemini-2.5-pro")
         self.chat = model.start_chat()
 
-    def requires(self):
-        """
-        Each agent has a requires function to tell the manager what
-        they do and what is required in the context to run them.
-        """
-        return prompts.requires
-
     def add_arguments(self, subparser):
         """
         Add arguments for the plugin to show up in argparse
@@ -125,7 +118,11 @@ def run(self, context):
         # This will either generate fresh or rebuild erroneous Dockerfile
         # We don't return the dockerfile because it is updated in the context
         self.generate_dockerfile(context)
-        print(Panel(context.dockerfile, title="[green]Dockerfile[/green]", border_style="green"))
+        print(Panel(context.dockerfile, title="[green]Dockerfile or Response[/green]", border_style="green"))
+
+        # Set the container on the context for a next step to use it...
+        container = context.get("container") or self.generate_name(context.application)
+        context.container = container
 
         # Build it! We might want to only allow a certain number of retries or incremental changes.
         return_code, output = self.build(context)
@@ -170,7 +167,7 @@ def get_result(self, context):
         """
         Return either the entire context or single result.
         """
-        if context.get("managed") is True:
+        if context.is_managed:
             return context
         return context.dockerfile
 
@@ -216,10 +213,6 @@ def build(self, context):
         Build the Dockerfile! Yolo!
         """
         dockerfile = context.get("dockerfile")
-        image_name = context.get("container") or self.generate_name(context.application)
-
-        # Set the container on the context for follow up steps.
-        context.container = image_name
 
         # Not sure if this can happen, assume it can
         if not dockerfile:
@@ -230,17 +223,20 @@ def build(self, context):
 
         # Write the Dockerfile to the temporary directory
         utils.write_file(dockerfile, os.path.join(build_dir, "Dockerfile"))
-        print(
-            Panel(
-                f"Attempt {self.attempts} to build image: [bold cyan]{image_name}[/bold cyan]",
-                title="[blue]Docker Build[/blue]",
-                border_style="blue",
+        
+        # If only one max attempt, don't print here, not important to show.
+        if self.max_attempts is not None and self.max_attempts > 1:
+            print(
+                Panel(
+                    f"Attempt {self.attempts} to build image: [bold cyan]{context.container}[/bold cyan]",
+                    title="[blue]Docker Build[/blue]",
+                    border_style="blue",
+                )
             )
-        )
 
         # Run the build process using the temporary directory as context
         p = subprocess.run(
-            ["docker", "build", "--network", "host", "-t", image_name, "."],
+            ["docker", "build", "--network", "host", "-t", context.container, "."],
             capture_output=True,
             text=True,
             cwd=build_dir,
@@ -259,18 +255,12 @@ def generate_dockerfile(self, context):
         print(textwrap.indent(prompt, "> ", predicate=lambda _: True))
 
         # The API can error and not return a response.text.
-        response = self.ask_gemini(prompt)
+        content = self.ask_gemini(prompt)
         print("Received Dockerfile response from Gemini...")
 
         # Try to remove Dockerfile from code block
         try:
-            content = response.text.strip()
-            if content.startswith("```dockerfile"):
-                content = content[len("```dockerfile") :]
-            if content.startswith("```"):
-                content = content[len("```") :]
-            if content.endswith("```"):
-                content = content[: -len("```")]
+            content = self.get_code_block(content, 'dockerfile')
 
             # If we are getting commentary...
             match = re.search(dockerfile_pattern, content, re.DOTALL)
@@ -283,4 +273,4 @@ def generate_dockerfile(self, context):
             context.dockerfile = dockerfile
             context.result = dockerfile
         except Exception as e:
-            sys.exit(f"Error parsing response from Gemini: {e}\n{response.text}")
+            sys.exit(f"Error parsing response from Gemini: {e}\n{content}")
@@ -25,33 +25,27 @@
 - Don't worry about users/permissions - just be root.
 """
 
+
+# TODO: do we want to add back common instructions here?
 rebuild_prompt = (
-    f"""Act as a Dockerfile builder service expert. I am trying to build a Docker image named for the application '%s' in an environment for '%s'. The previous attempt to build or run the Dockerfile failed. Here is the problematic Dockerfile:
+    f"""Your previous Dockerfile build has failed. Here is instruction for how to fix it.
 
-```dockerfile
-%s
-```
+Please analyze the instruction and your previous Dockerfile, and provide a corrected version.
+- The response should only contain the complete, corrected Dockerfile inside a single markdown code block.
+- Use succinct comments in the Dockerfile to explain build logic and changes. 
+- Follow the same guidelines as previously instructed.
 
-Here is the error message I received:
-```
 %s
-```
-
-Please analyze the error and the Dockerfile, and provide a corrected version.
-- The response should only contain the complete, corrected Dockerfile inside a single markdown code block.
-- Use succinct comments in the Dockerfile to explain build logic and changes.
 """
-    + common_instructions
 )
 
 
 def get_rebuild_prompt(context):
-    environment = context.get("environment", defaults.environment)
-    application = context.get("application", required=True)
-    return prompt_wrapper(
-        rebuild_prompt % (application, environment, context.dockerfile, context.error_message),
-        context=context,
-    )
+    """
+    The rebuild prompt will either be the entire error output, or the parsed error
+    output with help from the agent manager.
+    """
+    return prompt_wrapper(rebuild_prompt % context.error_message, context=context)
 
 
 build_prompt = (
 
@@ -32,9 +32,15 @@ def reset(self):
         """
         Reset the return code and result.
         """
-        for key in ["return_code", "result"]:
+        for key in ["return_code", "result", "error_message"]:
             self.data[key] = None
 
+    def is_managed(self):
+        """
+        Is the context being managed?
+        """
+        return self.get("managed") is True
+
     def __getattribute__(self, name):
         """
         Intercepts all attribute lookups (including methods/functions)
 
@@ -14,7 +14,6 @@
 from rich.panel import Panel
 from rich.syntax import Syntax
 
-import fractale.agent.defaults as defaults
 import fractale.agent.kubernetes_job.prompts as prompts
 import fractale.utils as utils
 from fractale.agent.base import Agent
@@ -77,13 +76,6 @@ def init(self):
         model = genai.GenerativeModel("gemini-2.5-pro")
         self.chat = model.start_chat()
 
-    def requires(self):
-        """
-        Each agent has a requires function to tell the manager what
-        they do and what is required in the context to run them.
-        """
-        return prompts.requires
-
     def get_prompt(self, context):
         """
         Get the prompt for the LLM. We expose this so the manager can take it
@@ -144,16 +136,31 @@ def run(self, context):
             print("\n[bold cyan] Requesting Correction from Kubernetes Job Agent[/bold cyan]")
             self.attempts += 1
 
+            # Return early based on max attempts
+            if self.return_on_failure():
+                context.return_code = -1
+                context.result = output
+                return self.get_result(context)
+
             # Trigger again, provide initial context and error message
+            # This is the internal loop running, no manager agent
             context.error_message = output
             context.job_crd = job_crd
             return self.run(context)
 
         self.write_file(context, job_crd)
         self.print_crd(job_crd)
-        if context.get("managed") is True:
+        return self.get_result(context)
+
+
+    def get_result(self, context):
+        """
+        Return either the entire context or single result.
+        """
+        if context.is_managed:
             return context
-        return job_crd
+        return context.job_crd
+
 
     def print_crd(self, job_crd):
         """
@@ -424,26 +431,20 @@ def deploy(self, job_crd, image_name, cleanup=True):
         shutil.rmtree(deploy_dir, ignore_errors=True)
         return (0, "Success")
 
-    def generate_crd(self, context, template=None):
+    def generate_crd(self, context):
         """
         Generates or refines an existing Job CRD using the Gemini API.
         """
         prompt = self.get_prompt(context)
         print("Sending generation prompt to Gemini...")
         print(textwrap.indent(prompt, "> ", predicate=lambda _: True))
 
-        response = self.ask_gemini(prompt)
+        content = self.ask_gemini(prompt)
         print("Received response from Gemini...")
 
         # Try to remove Dockerfile from code block
         try:
-            content = response.text.strip()
-            if content.startswith("```yaml"):
-                content = content[len("```yaml") :]
-            if content.startswith("```"):
-                content = content[len("```") :]
-            if content.endswith("```"):
-                content = content[: -len("```")]
+            content = self.get_code_block(content, 'yaml')
 
             # If we are getting commentary...
             match = re.search(yaml_pattern, content, re.DOTALL)
@@ -455,4 +456,4 @@ def generate_crd(self, context, template=None):
             return job_crd
 
         except Exception as e:
-            sys.exit(f"Error parsing response from Gemini: {e}\n{response.text}")
+            sys.exit(f"Error parsing response from Gemini: {e}\n{content}")