diff --git a/.env.template b/.env.template
index ffdc5bb6e2..a765bdb379 100644
--- a/.env.template
+++ b/.env.template
@@ -3,3 +3,4 @@
# OPENAI_API_KEY=Your personal OpenAI API key from https://platform.openai.com/account/api-keys
OPENAI_API_KEY=...
ANTHROPIC_API_KEY=...
+MORPH_API_KEY=...
\ No newline at end of file
diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py
index 6b46263424..2088695a45 100644
--- a/gpt_engineer/core/default/steps.py
+++ b/gpt_engineer/core/default/steps.py
@@ -25,9 +25,11 @@
setup_sys_prompt_existing_code : function
Sets up the system prompt for improving existing code.
-
improve : function
Improves the code based on user input and returns the updated files.
+
+_apply_morph_edit : function
+ Send code editing instructions to Morph API and return the merged code.
"""
import inspect
@@ -35,6 +37,10 @@
import re
import sys
import traceback
+import json
+import os
+from openai import OpenAI
+
from pathlib import Path
from typing import List, MutableMapping, Union
@@ -95,7 +101,7 @@ def setup_sys_prompt(preprompts: MutableMapping[Union[str, Path], str]) -> str:
def setup_sys_prompt_existing_code(
- preprompts: MutableMapping[Union[str, Path], str]
+ preprompts: MutableMapping[Union[str, Path], str],
) -> str:
"""
Sets up the system prompt for improving existing code.
@@ -268,6 +274,46 @@ def execute_entrypoint(
return files_dict
+def apply_morph_edit(instructions: str, initial_code: str, code_edit: str) -> str:
+ """
+ Send code editing instructions to Morph API and return the merged code.
+
+ Args:
+ instructions (str): The instructions for the edit.
+ initial_code (str): The original code to modify.
+ code_edit (str): The proposed code changes.
+
+ Returns:
+ str: The merged/edited code returned by Morph, or the original code if an error occurs.
+ """
+ api_key = os.getenv("MORPH_API_KEY")
+ if not api_key:
+ print("Error: MORPH_API_KEY not set.")
+ return initial_code
+
+ client = OpenAI(api_key=api_key, base_url="https://api.morphllm.com/v1")
+
+ try:
+ response = client.chat.completions.create(
+ model="morph-v3-large",
+ messages=[
+ {
+ "role": "user",
+ "content": f"{instructions}\n{initial_code}\n{code_edit}",
+ }
+ ],
+ )
+ merged_code = response.choices[0].message.content
+ if not merged_code:
+ print("Warning: Morph returned empty content. Using original code.")
+ return initial_code
+ return merged_code
+
+ except Exception as e:
+ print(f"Unexpected error applying Morph edit: {e}")
+ return initial_code
+
+
def improve_fn(
ai: AI,
prompt: Prompt,
@@ -316,50 +362,32 @@ def _improve_loop(
ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List, diff_timeout=3
) -> FilesDict:
messages = ai.next(messages, step_name=curr_fn())
- files_dict, errors = salvage_correct_hunks(
- messages, files_dict, memory, diff_timeout=diff_timeout
- )
+ # Log AI raw response
+ ai_response = messages[-1].content.strip()
- retries = 0
- while errors and retries < MAX_EDIT_REFINEMENT_STEPS:
- messages.append(
- HumanMessage(
- content="Some previously produced diffs were not on the requested format, or the code part was not found in the code. Details:\n"
- + "\n".join(errors)
- + "\n Only rewrite the problematic diffs, making sure that the failing ones are now on the correct format and can be found in the code. Make sure to not repeat past mistakes. \n"
+ try:
+ data = json.loads(ai_response)
+ except json.JSONDecodeError as e:
+ # Log the error and the response for debugging
+ print(f"Failed to parse AI response as JSON: {e}")
+ print("AI response:", ai_response)
+ return files_dict # Return the original files_dict unchanged
+
+ for file in data:
+ if file["target_file"] not in files_dict:
+ files_dict[file["target_file"]] = file["code_edit"]
+ else:
+ # Use Morph to apply the edit
+ merged_code = apply_morph_edit(
+ instructions=file["instructions"],
+ initial_code=files_dict[file["target_file"]],
+ code_edit=file["code_edit"],
)
- )
- messages = ai.next(messages, step_name=curr_fn())
- files_dict, errors = salvage_correct_hunks(
- messages, files_dict, memory, diff_timeout
- )
- retries += 1
+ files_dict[file["target_file"]] = merged_code
return files_dict
-def salvage_correct_hunks(
- messages: List, files_dict: FilesDict, memory: BaseMemory, diff_timeout=3
-) -> tuple[FilesDict, List[str]]:
- error_messages = []
- ai_response = messages[-1].content.strip()
-
- diffs = parse_diffs(ai_response, diff_timeout=diff_timeout)
- # validate and correct diffs
-
- for _, diff in diffs.items():
- # if diff is a new file, validation and correction is unnecessary
- if not diff.is_new_file():
- problems = diff.validate_and_correct(
- file_to_lines_dict(files_dict[diff.filename_pre])
- )
- error_messages.extend(problems)
- files_dict = apply_diffs(diffs, files_dict)
- memory.log(IMPROVE_LOG_FILE, "\n\n".join(x.pretty_repr() for x in messages))
- memory.log(DIFF_LOG_FILE, "\n\n".join(error_messages))
- return files_dict, error_messages
-
-
class Tee(object):
def __init__(self, *files):
self.files = files
diff --git a/gpt_engineer/preprompts/file_format_diff b/gpt_engineer/preprompts/file_format_diff
index 486e76a2e9..dae5cea22d 100644
--- a/gpt_engineer/preprompts/file_format_diff
+++ b/gpt_engineer/preprompts/file_format_diff
@@ -1,41 +1,39 @@
-You will output the content of each file necessary to achieve the goal, including ALL code.
-Output requested code changes and new code in the unified "git diff" syntax. Example:
-
-```diff
---- example.txt
-+++ example.txt
-@@ -6,3 +6,4 @@
- line content A
- line content B
-+ new line added
-- original line X
-+ modified line X with changes
-@@ -26,4 +27,5 @@
- condition check:
-- action for condition A
-+ if certain condition is met:
-+ alternative action for condition A
- another condition check:
-- action for condition B
-+ modified action for condition B
-```
-
-Example of a git diff creating a new file:
-
-```diff
---- /dev/null
-+++ new_file.txt
-@@ -0,0 +1,3 @@
-+First example line
-+
-+Last example line
-```
-
-RULES:
--A program will apply the diffs you generate exactly to the code, so diffs must be precise and unambiguous!
--Every diff must be fenced with triple backtick ```.
--The file names at the beginning of a diff, (lines starting with --- and +++) is the relative path to the file before and after the diff.
--LINES TO BE REMOVED (starting with single -) AND LINES TO BE RETAIN (no starting symbol) HAVE TO REPLICATE THE DIFFED HUNK OF THE CODE EXACTLY LINE BY LINE. KEEP THE NUMBER OF RETAIN LINES SMALL IF POSSIBLE.
--EACH LINE IN THE SOURCE FILES STARTS WITH A LINE NUMBER, WHICH IS NOT PART OF THE SOURCE CODE. NEVER TRANSFER THESE LINE NUMBERS TO THE DIFF HUNKS.
--AVOID STARTING A HUNK WITH AN EMPTY LINE.
--ENSURE ALL CHANGES ARE PROVIDED IN A SINGLE DIFF CHUNK PER FILE TO PREVENT MULTIPLE DIFFS ON THE SAME FILE.
+You must produce your output as a JSON array of objects. Each object must have exactly three fields:
+
+1. **target_file**: the relative path of the file to edit.
+2. **instructions**: a brief description of the changes made to this file.
+3. **code_edit**: the edited content of the file, showing all changes in context. Use the special comment `// ... existing code ...` to represent unchanged spans of code.
+
+Example JSON output:
+[
+ {
+ "target_file": "path/to/file.ext",
+ "instructions": "Brief summary of changes",
+ "code_edit": "// ... existing code ...\nFIRST_EDIT\n// ... existing code ...\nSECOND_EDIT\n// ... existing code ..."
+ }
+]
+
+Use this tool to make an edit to an existing file.
+
+This will be read by a less intelligent model, which will quickly apply the edit. You should make it clear what the edit is, while also minimizing the unchanged code you write.
+When writing the edit, you should specify each edit in sequence, with the special comment // ... existing code ... to represent unchanged code in between edited lines.
+
+For example:
+
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+
+You should still bias towards repeating as few lines of the original file as possible to convey the change.
+But, each edit should contain minimally sufficient context of unchanged lines around the code you're editing to resolve ambiguity.
+DO NOT omit spans of pre-existing code (or comments) without using the // ... existing code ... comment to indicate its absence. If you omit the existing code comment, the model may inadvertently delete these lines.
+If you plan on deleting a section, you must provide context before and after to delete it. If the initial code is ```code \n Block 1 \n Block 2 \n Block 3 \n code```, and you want to remove Block 2, you would output ```// ... existing code ... \n Block 1 \n Block 3 \n // ... existing code ...```.
+Make sure it is clear what the edit should be, and where it should be applied.
+Make edits to a file in a single edit_file call instead of multiple edit_file calls to the same file. The apply model can handle many distinct edits at once.
+
+
+RULES: Only return a JSON object nothing else dont even use markdown syntax and strictly no other text or instructions or formatting
\ No newline at end of file
diff --git a/gpt_engineer/preprompts/improve b/gpt_engineer/preprompts/improve
index a810b42666..38757ca4e9 100644
--- a/gpt_engineer/preprompts/improve
+++ b/gpt_engineer/preprompts/improve
@@ -1,5 +1,5 @@
Think step by step and reason yourself to the correct decisions to make sure we get it right.
-Make changes to existing code and implement new code in the unified git diff syntax. When implementing new code, First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
+Make changes to existing code. When implementing new code, First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
FILE_FORMAT
diff --git a/tests/applications/cli/test_cli_agent.py b/tests/applications/cli/test_cli_agent.py
index 8bc6e93875..1f54206045 100644
--- a/tests/applications/cli/test_cli_agent.py
+++ b/tests/applications/cli/test_cli_agent.py
@@ -1,7 +1,9 @@
import os
import tempfile
+import json
import pytest
+from unittest.mock import patch
from langchain.schema import AIMessage
@@ -112,43 +114,45 @@ def test_init_clarified_gen_config(monkeypatch):
assert code[outfile].strip() == "Hello World!"
-def test_improve_standard_config(monkeypatch):
+@patch("gpt_engineer.core.default.steps.apply_morph_edit")
+def test_improve_standard_config(mock_apply_morph, monkeypatch): # 2. Pytest gives you the mock as an argument
+ # 3. Tell your fake function what to return when it's called
+ mock_apply_morph.return_value = "!dlroW olleH"
+
+ # The rest of the test setup
monkeypatch.setattr("builtins.input", lambda _: "y")
temp_dir = tempfile.mkdtemp()
code = FilesDict(
{
- "main.py": "def write_hello_world_to_file(filename):\n \"\"\"\n Writes 'Hello World!' to the specified file.\n \n :param filename: The name of the file to write to.\n \"\"\"\n with open(filename, 'w') as file:\n file.write('Hello World!')\n\nif __name__ == \"__main__\":\n output_filename = 'output.txt'\n write_hello_world_to_file(output_filename)",
+ "main.py": "def write_hello_world_to_file(filename):\n file.write('Hello World!')",
"requirements.txt": "# No dependencies required",
"run.sh": "python3 main.py\n",
}
)
memory = DiskMemory(memory_path(temp_dir))
- # version_manager = GitVersionManager(temp_dir)
execution_env = DiskExecutionEnv()
- mock_ai = MockAI(
- [
- AIMessage(
- "```diff\n--- main.py\n+++ main.py\n@@ -7,3 +7,3 @@\n with open(filename, 'w') as file:\n- file.write('Hello World!')\n+ file.write('!dlroW olleH')\n```"
- )
- ]
- )
+
+ # The mock for the *first* AI call (this part you had correct)
+ mock_response_data = [{
+ "target_file": "main.py",
+ "instructions": "Reverse the string written to the file",
+ "code_edit": "...", # The content here doesn't matter, as the call is mocked
+ }]
+ mock_ai = MockAI([AIMessage(content=json.dumps(mock_response_data))])
+
cli_agent = CliAgent.with_default_config(memory, execution_env, ai=mock_ai)
- code = cli_agent.improve(
+ # When this improve function is called, it will use your MOCKED apply_morph_edit
+ improved_code = cli_agent.improve(
code,
- Prompt(
- "Change the program so that it prints '!dlroW olleH' instead of 'Hello World!'"
- ),
+ Prompt("Change the program so that it prints '!dlroW olleH' instead of 'Hello World!'"),
)
- env = DiskExecutionEnv()
- env.upload(code).run(f"bash {ENTRYPOINT_FILE}")
- code = env.download()
-
- outfile = "output.txt"
- assert outfile in code
- assert code[outfile] == "!dlroW olleH"
-
+ # 4. The result of the `improve` call is now the direct output of your mock
+ assert improved_code["main.py"] == "!dlroW olleH"
+
+ # 5. (Best Practice) We can also check that the fake API call was made
+ mock_apply_morph.assert_called_once()
if __name__ == "__main__":
pytest.main()
diff --git a/tests/core/default/test_simple_agent.py b/tests/core/default/test_simple_agent.py
index 7c75210de2..cabc0f4540 100644
--- a/tests/core/default/test_simple_agent.py
+++ b/tests/core/default/test_simple_agent.py
@@ -1,6 +1,7 @@
import tempfile
import pytest
+import json
from langchain.schema import AIMessage
@@ -47,11 +48,16 @@ def test_improve():
"run.sh": "python3 main.py\n",
}
)
+ mock_response_data = [
+ {
+ "target_file": "main.py",
+ "instructions": "Reverse the string written to the file",
+ "code_edit": "with open(filename, 'w') as file:\n file.write('!dlroW olleH')",
+ }
+ ]
mock_ai = MockAI(
[
- AIMessage(
- "```diff\n--- main.py\n+++ main.py\n@@ -7,3 +7,3 @@\n with open(filename, 'w') as file:\n- file.write('Hello World!')\n+ file.write('!dlroW olleH')\n```"
- )
+ AIMessage(content=json.dumps(mock_response_data))
]
)
lean_agent = SimpleAgent.with_default_config(temp_dir, mock_ai)
diff --git a/tests/core/default/test_steps.py b/tests/core/default/test_steps.py
index 37e4784b1e..2798fc2bc5 100644
--- a/tests/core/default/test_steps.py
+++ b/tests/core/default/test_steps.py
@@ -4,6 +4,8 @@
from unittest.mock import MagicMock
import pytest
+import json
+from unittest.mock import patch
from langchain.schema import SystemMessage
@@ -266,51 +268,27 @@ def test_empty_codebase_returns_empty_entrypoint(self):
class TestImprove:
- def test_improve_existing_code(self, tmp_path):
- # Mock the AI class
- ai_patch = """
-Some introductory text.
-```diff
---- main.py
-+++ main.py
-@@ -1,1 +1,1 @@
--print('Hello, World!')
-+print('Goodbye, World!')
-```
-"""
- ai_mock = MagicMock(spec=AI)
- ai_mock.next.return_value = [SystemMessage(content=ai_patch)]
-
- # Create a Code object with existing code
- code = FilesDict(
- {
- "main.py": "print('Hello, World!')",
- "requirements.txt": "numpy==1.18.1",
- "README.md": "This is a sample code repository.",
- }
- )
+ @patch("gpt_engineer.core.default.steps.apply_morph_edit")
+ def test_improve_existing_code(self, mock_apply_morph, tmp_path):
+ expected_final_code = "print('Goodbye, World!')"
- # Create a BaseRepository object for memory
- memory = DiskMemory(tmp_path)
+ mock_apply_morph.return_value = expected_final_code
- # Define the user prompt
- prompt = Prompt(
- "Change the program to print 'Goodbye, World!' instead of 'Hello, World!'"
+ mock_ai_response_json = json.dumps(
+ [{"target_file": "main.py", "instructions": "...", "code_edit": "..."}]
)
+ ai_mock = MagicMock(spec=AI)
+ ai_mock.next.return_value = [SystemMessage(content=mock_ai_response_json)]
- # Call the improve function
+ code = FilesDict({"main.py": "print('Hello, World!')"})
+ memory = DiskMemory(tmp_path)
+ prompt = Prompt("Change the program...")
preprompts_holder = PrepromptsHolder(PREPROMPTS_PATH)
- improved_code = improve_fn(ai_mock, prompt, code, memory, preprompts_holder)
- # Assert that the code was improved correctly
- expected_code = FilesDict(
- {
- "main.py": "print('Goodbye, World!')",
- "requirements.txt": "numpy==1.18.1",
- "README.md": "This is a sample code repository.",
- }
- )
- assert improved_code == expected_code
+ # Call the function, which will now use the FAKE Morph call
+ improved_code = improve_fn(ai_mock, prompt, code, memory, preprompts_holder)
+ assert improved_code["main.py"] == expected_final_code
+ mock_apply_morph.assert_called_once()
def test_lint_python(self):
linting = Linting()
diff --git a/tests/core/test_salvage_correct_hunks.py b/tests/core/test_salvage_correct_hunks.py
index 218ace422c..e69de29bb2 100644
--- a/tests/core/test_salvage_correct_hunks.py
+++ b/tests/core/test_salvage_correct_hunks.py
@@ -1,118 +0,0 @@
-import os
-import shutil
-
-from typing import List
-
-import pytest
-
-from langchain_core.messages import AIMessage
-
-from gpt_engineer.core.default.disk_memory import DiskMemory
-from gpt_engineer.core.default.paths import memory_path
-from gpt_engineer.core.default.steps import salvage_correct_hunks
-from gpt_engineer.core.files_dict import FilesDict
-
-TEST_FILES_DIR = os.path.dirname(os.path.abspath(__file__))
-memory = DiskMemory(memory_path("."))
-
-
-def get_file_content(file_path: str) -> str:
- with open(
- os.path.join(TEST_FILES_DIR, "improve_function_test_cases", file_path), "r"
- ) as f:
- return f.read()
-
-
-def message_builder(chat_path: str) -> List[AIMessage]:
- chat_content = get_file_content(chat_path)
-
- json = {
- "lc": 1,
- "type": "constructor",
- "id": ["langchain", "schema", "messages", "AIMessage"],
- "kwargs": {
- "content": chat_content,
- "additional_kwargs": {},
- "response_metadata": {"finish_reason": "stop"},
- "name": None,
- "id": None,
- "example": False,
- },
- }
-
- return [AIMessage(**json["kwargs"])]
-
-
-def test_validation_and_apply_complex_diff():
- files = FilesDict({"taskmaster.py": get_file_content("task_master_code")})
- salvage_correct_hunks(message_builder("task_master_chat"), files, memory)
-
-
-def test_validation_and_apply_long_diff():
- files = FilesDict({"VMClonetest.ps1": get_file_content("wheaties_example_code")})
- salvage_correct_hunks(message_builder("wheaties_example_chat"), files, memory)
-
-
-def test_validation_and_apply_wrong_diff():
- files = FilesDict(
- {"src/components/SocialLinks.tsx": get_file_content("vgvishesh_example_code")}
- )
- salvage_correct_hunks(message_builder("vgvishesh_example_chat"), files, memory)
-
-
-def test_validation_and_apply_non_change_diff():
- files = FilesDict({"src/App.tsx": get_file_content("vgvishesh_example_2_code")})
- salvage_correct_hunks(message_builder("vgvishesh_example_2_chat"), files, memory)
-
-
-def test_validation_and_apply_diff_on_apps_benchmark_6():
- files = FilesDict({"main.py": get_file_content("apps_benchmark_6_code")})
- salvage_correct_hunks(message_builder("apps_benchmark_6_chat"), files, memory)
-
-
-def test_validation_and_apply_diff_on_apps_benchmark_6_v2():
- files = FilesDict({"main.py": get_file_content("apps_benchmark_6_v2_code")})
- salvage_correct_hunks(message_builder("apps_benchmark_6_v2_chat"), files, memory)
-
-
-def test_create_two_new_files():
- files = FilesDict({"main.py": get_file_content("create_two_new_files_code")})
- salvage_correct_hunks(message_builder("create_two_new_files_chat"), files, memory)
-
-
-def test_theo_case():
- files = FilesDict({"dockerfile": get_file_content("theo_case_code")})
- updated_files, _ = salvage_correct_hunks(
- message_builder("theo_case_chat"), files, memory
- )
- print(updated_files["dockerfile"])
- print(updated_files["run.py"])
-
-
-def test_zbf_yml_missing():
- files = FilesDict(
- {"src/main/resources/application.yml": get_file_content("zbf_yml_missing_code")}
- )
- updated_files, _ = salvage_correct_hunks(
- message_builder("zbf_yml_missing_chat"), files, memory
- )
- print(updated_files["src/main/resources/application.yml"])
- print(updated_files["src/main/resources/application-local.yml"])
-
-
-def test_clean_up_folder(clean_up_folder):
- # The folder should be deleted after the test is run
- assert True
-
-
-@pytest.fixture
-def clean_up_folder():
- yield
- # Teardown code: delete a folder and all its contents
- print("cleaning up")
- folder_path = os.path.join(os.path.dirname(__file__), ".gpteng")
- shutil.rmtree(folder_path, ignore_errors=True)
-
-
-if __name__ == "__main__":
- pytest.main()