Update coc_plugin.py

codelion · codelion · commit 727613830757 · 2024-11-24T17:47:58.000+08:00
diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py
@@ -12,39 +12,70 @@
 # Plugin identifier
 SLUG = "coc"
 
+# Maximum attempts to fix code
+MAX_FIX_ATTEMPTS = 3
+
 # List of allowed modules for execution
 ALLOWED_MODULES = {
     'math': math,
 }
 
-# Prompts
+# Initial code generation prompt
 CHAIN_OF_CODE_PROMPT = '''
-You are an AI assistant that uses Chain of Code (CoC) approach to solve problems. Follow these steps:
-
-1. Write Python code that breaks down the problem into clear steps
-2. Each step should either be:
-   - Executable Python code that performs computations
-   - Pseudocode that you will simulate with natural language understanding
-3. Track final result in an 'answer' variable
-4. Return the final answer within the <output> tags
+Write Python code to solve this problem. The code should:
+1. Break down the problem into clear computational steps
+2. Use standard Python features and math operations
+3. Store the final result in a variable named 'answer'
+4. Include error handling where appropriate
+5. Be complete and executable
 
 Format your response using:
 ```python
 [Your complete Python program here]
 ```
+'''
+
+# Code fix prompt
+CODE_FIX_PROMPT = '''
+The following Python code failed to execute. Fix the code to make it work.
+Original code:
+```python
+{code}
+```
 
-Finally provide output as:
-<output>
-[Your final answer]
-</output>
+Error encountered:
+{error}
+
+Please provide a complete, fixed version of the code that:
+1. Addresses the error message
+2. Maintains the same logic and approach
+3. Stores the final result in 'answer'
+4. Is complete and executable
+
+Return only the fixed code in a code block:
+```python
+[Your fixed code here]
+```
 '''
 
-STATE_SIMULATION_PROMPT = '''You are simulating the execution of a Python program.
-Given the code below, simulate its execution and return the final value that would be in the 'answer' variable.
-Return ONLY the final value, no explanations or additional text.
+# Simulation prompt
+SIMULATION_PROMPT = '''
+The following Python code could not be executed after several attempts. 
+Please simulate its execution and determine the final value that would be in the 'answer' variable.
 
 Code to simulate:
+```python
 {code}
+```
+
+Last error encountered:
+{error}
+
+Important:
+1. Follow the logic of the code exactly
+2. Perform all calculations carefully
+3. Return ONLY the final numeric or string value, no explanations
+4. If the code contains semantic functions (like text analysis), use your judgment to simulate them
 '''
 
 def extract_code_blocks(text: str) -> List[str]:
@@ -57,100 +88,168 @@ def extract_code_blocks(text: str) -> List[str]:
         logger.info(f"Code block {i+1}:\n{block}")
     return blocks
 
-def extract_output(text: str) -> str:
-    """Extract content from output tags."""
-    pattern = r'<output>(.*?)</output>'
-    match = re.search(pattern, text, re.DOTALL)
-    result = match.group(1).strip() if match else text.strip()
-    logger.info(f"Extracted output: {result}")
-    return result
-
-def execute_code(code: str, client, model: str) -> Tuple[Any, int]:
-    """Execute full code block either with Python or LM simulation."""
-    logger.info("Attempting to execute complete code block")
+def sanitize_code(code: str) -> str:
+    """Prepare code for execution by adding necessary imports and safety checks."""
+    # Add standard imports
+    imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES)
+    
+    # Add safety wrapper
+    wrapper = f"""
+{imports}
+
+def safe_execute():
+    {code.replace('\n', '\n    ')}
+    return answer if 'answer' in locals() else None
+
+result = safe_execute()
+answer = result
+"""
+    return wrapper
+
+def execute_code(code: str) -> Tuple[Any, str]:
+    """Attempt to execute the code and return result or error."""
+    logger.info("Attempting to execute code")
     logger.info(f"Code:\n{code}")
     
-    # Add imports
     execution_env = {}
-    for mod_name, mod in ALLOWED_MODULES.items():
-        execution_env[mod_name] = mod
-    
     try:
-        # Try executing the complete code block with Python
-        logger.info("Attempting Python execution")
-        exec(code, execution_env)
+        sanitized_code = sanitize_code(code)
+        exec(sanitized_code, execution_env)
         answer = execution_env.get('answer')
-        logger.info(f"Python execution successful. Answer: {answer}")
-        return answer, 0
         
+        if answer is not None:
+            logger.info(f"Execution successful. Answer: {answer}")
+            return answer, None
+        else:
+            error = "Code executed but did not produce an answer"
+            logger.warning(error)
+            return None, error
+            
     except Exception as e:
-        logger.info(f"Python execution failed: {str(e)}")
-        logger.info("Falling back to LM simulation")
-        
-        # If Python execution fails, simulate with LM
-        response = client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": STATE_SIMULATION_PROMPT.format(code=code)},
-                {"role": "user", "content": "Simulate this code and return the final value of 'answer'."}
-            ],
-            temperature=0.2
-        )
-        
+        error = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
+        logger.error(f"Execution failed: {error}")
+        return None, error
+
+def generate_fixed_code(original_code: str, error: str, client, model: str) -> Tuple[str, int]:
+    """Ask LLM to fix the broken code."""
+    logger.info("Requesting code fix from LLM")
+    logger.info(f"Original error: {error}")
+    
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": CODE_FIX_PROMPT.format(
+                code=original_code, error=error)},
+            {"role": "user", "content": "Fix the code to make it work."}
+        ],
+        temperature=0.2
+    )
+    
+    fixed_code = response.choices[0].message.content
+    code_blocks = extract_code_blocks(fixed_code)
+    
+    if code_blocks:
+        logger.info("Received fixed code from LLM")
+        return code_blocks[0], response.usage.completion_tokens
+    else:
+        logger.warning("No code block found in LLM response")
+        return None, response.usage.completion_tokens
+
+def simulate_execution(code: str, error: str, client, model: str) -> Tuple[Any, int]:
+    """Ask LLM to simulate code execution."""
+    logger.info("Attempting code simulation with LLM")
+    
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": SIMULATION_PROMPT.format(
+                code=code, error=error)},
+            {"role": "user", "content": "Simulate this code and return the final answer value."}
+        ],
+        temperature=0.2
+    )
+    
+    try:
+        result = response.choices[0].message.content.strip()
+        # Try to convert to appropriate type
         try:
-            answer = response.choices[0].message.content.strip()
-            logger.info(f"LM simulation successful. Answer: {answer}")
-            
-            # Try to convert to number if possible
-            try:
-                answer = ast.literal_eval(answer)
-            except:
-                pass
-                
-            return answer, response.usage.completion_tokens
-            
-        except Exception as e:
-            logger.error(f"Could not parse LM simulation response: {str(e)}")
-            return None, response.usage.completion_tokens
+            answer = ast.literal_eval(result)
+        except:
+            answer = result
+        logger.info(f"Simulation successful. Result: {answer}")
+        return answer, response.usage.completion_tokens
+    except Exception as e:
+        logger.error(f"Failed to parse simulation result: {str(e)}")
+        return None, response.usage.completion_tokens
 
 def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]:
     """Main Chain of Code execution function."""
     logger.info("Starting Chain of Code execution")
     logger.info(f"Query: {initial_query}")
     
+    # Initial code generation
     messages = [
         {"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT},
         {"role": "user", "content": initial_query}
     ]
     
-    logger.info("Generating code solution")
     response = client.chat.completions.create(
         model=model,
         messages=messages,
         temperature=0.7
     )
-    initial_response = response.choices[0].message.content
     total_tokens = response.usage.completion_tokens
     
-    logger.info("Initial response from LM:")
-    logger.info(initial_response)
-
-    code_blocks = extract_code_blocks(initial_response)
+    # Extract initial code
+    code_blocks = extract_code_blocks(response.choices[0].message.content)
     if not code_blocks:
         logger.warning("No code blocks found in response")
-        return initial_response, total_tokens
-
-    # Execute the complete code block
-    code = code_blocks[0]
-    answer, execution_tokens = execute_code(code, client, model)
-    total_tokens += execution_tokens
+        return response.choices[0].message.content, total_tokens
 
-    # If we got an answer from code execution, use it
-    if answer is not None:
-        final_answer = str(answer)
-    else:
-        # Fall back to output tags if code execution failed
-        final_answer = extract_output(initial_response)
+    current_code = code_blocks[0]
+    fix_attempts = 0
+    last_error = None
+    
+    # Strategy 1: Direct execution and fix attempts
+    while fix_attempts < MAX_FIX_ATTEMPTS:
+        fix_attempts += 1
+        logger.info(f"Execution attempt {fix_attempts}/{MAX_FIX_ATTEMPTS}")
+        
+        # Try to execute current code
+        answer, error = execute_code(current_code)
+        
+        # If successful, return the answer
+        if error is None:
+            logger.info(f"Successful execution on attempt {fix_attempts}")
+            return str(answer), total_tokens
             
-    logger.info(f"Chain of Code execution completed. Final answer: {final_answer}")
-    return final_answer, total_tokens
+        last_error = error
+        
+        # If we hit max attempts, break to try simulation
+        if fix_attempts >= MAX_FIX_ATTEMPTS:
+            logger.warning(f"Failed after {fix_attempts} fix attempts")
+            break
+            
+        # Otherwise, try to get fixed code from LLM
+        logger.info(f"Requesting code fix, attempt {fix_attempts}")
+        fixed_code, fix_tokens = generate_fixed_code(current_code, error, client, model)
+        total_tokens += fix_tokens
+        
+        if fixed_code:
+            current_code = fixed_code
+        else:
+            logger.error("Failed to get fixed code from LLM")
+            break
+    
+    # Strategy 2: If all execution attempts failed, try simulation
+    logger.info("All execution attempts failed, trying simulation")
+    simulated_answer, sim_tokens = simulate_execution(current_code, last_error, client, model)
+    total_tokens += sim_tokens
+    
+    if simulated_answer is not None:
+        logger.info("Successfully got answer from simulation")
+        return str(simulated_answer), total_tokens
+    
+    # If we get here, everything failed
+    logger.warning("All strategies failed")
+    return f"Error: Could not solve problem after all attempts. Last error: {last_error}", total_tokens