AntonOsika · MayureshMore · May 23, 2024 · May 23, 2024 · May 23, 2024 · Feb 15, 2025
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,7 +1,12 @@
 # Stage 1: Builder stage
-FROM python:3.11-alpine AS builder
+FROM python:3.11-slim AS builder
 
-RUN apk update && apk add --no-cache tk tcl curl
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    tk \
+    tcl \
+    curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
 
@@ -10,7 +15,9 @@ COPY . .
 RUN pip install --no-cache-dir -e .
 
 # Stage 2: Final stage
-FROM python:3.11-alpine
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends git
-RUN apt-get update && apt-get install -y --no-install-recommends git
+RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
-RUN apt-get update && apt-get install -y --no-install-recommends git
+RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
 

diff --git a/docker/README.md b/docker/README.md
@@ -1,24 +1,54 @@
-# Getting Started using Docker
+# Getting Started Using Docker
 
-**Running using docker cli**:
+This guide provides step-by-step instructions on how to set up and run the Docker environment for your GPT-Engineer project.
 
-Building the image:
-- `git clone https://github.com/gpt-engineer-org/gpt-engineer.git`
-- `cd gpt-engineer`
-- `docker build --rm -t gpt-engineer -f docker/Dockerfile .`
+## Prerequisites
 
-Running the container:
-- `docker run -it --rm -e OPENAI_API_KEY="YOUR OPENAI KEY" -v ./your-project:/project gpt-engineer`
+- Docker installed on your machine.
+- Git (for cloning the repository).
 
-The `-v` flag mounts the `your-project` folder into the container. Make sure to have a `prompt` file in there.
+## Setup Instructions
 
-**Running using docker-compose cli**:
+### Using Docker CLI
 
-Building the image:
-- `git clone https://github.com/gpt-engineer-org/gpt-engineer.git`
-- `cd gpt-engineer`
-- `docker-compose -f docker-compose.yml build`
-- `docker-compose run --rm gpt-engineer`
+1. **Clone the Repository**
+   ```
+   git clone https://github.com/gpt-engineer-org/gpt-engineer.git
+   cd gpt-engineer
+   ```
 
+2. **Build the Docker Image**
+   ```
+   docker build --rm -t gpt-engineer -f docker/Dockerfile .
+   ```
 
-Set the OPENAI_API_KEY in docker/docker-compose.yml using .env file or environment variable, and mount your project folder into the container using volumes. for example "./projects/example:/project" ./projects/example is the path to your project folder.
+3. **Run the Docker Container**
+   ```
+   docker run -it --rm -e OPENAI_API_KEY="YOUR_OPENAI_KEY" -v ./your-project:/project gpt-engineer
+   ```
+   Replace `YOUR_OPENAI_KEY` with your actual OpenAI API key. The `-v` flag mounts your local `your-project` directory inside the container. Replace this with your actual project directory.  Ensure this directory contains all necessary files, including the `prompt` file.
+
+### Using Docker Compose
+
+1. **Clone the Repository** (if not already done)
+   ```
+   git clone https://github.com/gpt-engineer-org/gpt-engineer.git
+   cd gpt-engineer
+   ```
+
+2. **Build and Run using Docker Compose**
+   ```
+   docker-compose -f docker-compose.yml build
+   docker-compose run --rm gpt-engineer
+   ```
+   Set the `OPENAI_API_KEY` in the `docker/docker-compose.yml` using an `.env` file or as an environment variable. Mount your project directory to the container using volumes, e.g., `"./projects/example:/project"` where `./projects/example` is the path to your project directory.
+
+## Debugging
+
+To facilitate debugging, you can run a shell inside the built Docker image:
+
+```
+docker run -it --entrypoint /bin/bash gpt-engineer
+```
+
+This opens a shell inside the Docker container, allowing you to execute commands and inspect the environment manually.
diff --git a/gpt_engineer/benchmark/benchmarks/mbpp/load.py b/gpt_engineer/benchmark/benchmarks/mbpp/load.py
@@ -13,54 +13,46 @@
 from pathlib import Path
 from subprocess import TimeoutExpired
 from typing import Union
-
 from datasets import Dataset, DatasetDict, load_dataset, load_from_disk
-
 from gpt_engineer.benchmark.bench_config import MbppConfig
 from gpt_engineer.benchmark.benchmarks.mbpp.problem import Problem
 from gpt_engineer.benchmark.types import Assertable, Benchmark, Task
 from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv
 from gpt_engineer.core.files_dict import FilesDict
 from gpt_engineer.core.prompt import Prompt
-
-DATASET_PATH = Path(__file__).parent / "dataset"
-
+DATASET_PATH = Path(__file__).parent / 'dataset'
 
 class MbppAssertion:
+
     def __init__(self, assertion: str):
+        """Auto-generated docstring for function __init__."""
-        """Auto-generated docstring for function __init__."""
+        """Initialize MbppAssertion with a given assertion string."""
-        """Auto-generated docstring for function __init__."""
+        """Initialize MbppAssertion with a given assertion string."""
         self.assertion = assertion
 
     def evaluate(self, assertable: Assertable) -> bool:
-        generated_code = assertable.files["main.py"]
-        code_with_assertion = f"{generated_code}\n{self.assertion}"
-
-        # Create new execution environment for every run to avoid side effects
+        """Auto-generated docstring for function evaluate."""
+        generated_code = assertable.files['main.py']
+        code_with_assertion = f'{generated_code}\n{self.assertion}'
         env = DiskExecutionEnv()
-        env.upload(FilesDict({"main.py": code_with_assertion}))
-        pro = env.popen("python main.py")
-
+        env.upload(FilesDict({'main.py': code_with_assertion}))
+        pro = env.popen('python main.py')
         try:
             stdout, stderr = pro.communicate(timeout=2)
-            stdout, stderr = stdout.decode("utf-8"), stderr.decode("utf-8")
+            stdout, stderr = (stdout.decode('utf-8'), stderr.decode('utf-8'))
         except TimeoutExpired:
-            print("Execution Timeout")
+            print('Execution Timeout')
             return False
-
         return not stderr
 
-
 def _get_dataset() -> Union[Dataset, DatasetDict]:
+    """Auto-generated docstring for function _get_dataset."""
     try:
         return load_from_disk(str(DATASET_PATH))
     except FileNotFoundError:
-        print("Dataset not found locally, downloading...")
-
-    dataset = load_dataset("mbpp", "sanitized", trust_remote_code=True)
+        print('Dataset not found locally, downloading...')
+    dataset = load_dataset('mbpp', 'sanitized', trust_remote_code=True)
     dataset.save_to_disk(str(DATASET_PATH))
-
     return dataset
 
-
 def load_mbpp(config: MbppConfig) -> Benchmark:
     """
     Loads the MBPP benchmark, which consists of a series coding problems.
@@ -73,42 +65,9 @@ def load_mbpp(config: MbppConfig) -> Benchmark:
     dataset = _get_dataset()
     tasks = []
     problems = []
-    for dataset_type in ["test", "train"]:
-        problems += [
-            Problem(
-                source_file=problem["source_file"],
-                task_id=problem["task_id"],
-                prompt=problem["prompt"],
-                code=problem["code"],
-                test_imports=problem["test_imports"],
-                test_list=problem["test_list"],
-            )
-            for index, problem in enumerate(dataset[dataset_type])
-            if index < config.__getattribute__(dataset_type + "_len")
-        ]
-
+    for dataset_type in ['test', 'train']:
+        problems += [Problem(source_file=problem['source_file'], task_id=problem['task_id'], prompt=problem['prompt'], code=problem['code'], test_imports=problem['test_imports'], test_list=problem['test_list']) for index, problem in enumerate(dataset[dataset_type]) if index < config.__getattribute__(dataset_type + '_len')]
     for problem in problems:
-        prompt = Prompt(
-            problem.prompt
-            + "Please extend given function without changing it's declaration including arguments."
-        )
-
-        tasks.append(
-            Task(
-                name=str(problem.task_id),
-                initial_code=FilesDict({"main.py": problem.starting_code}),
-                command=None,  # Explicitly setting `None` because each assertion runs code
-                prompt=prompt,
-                assertions={
-                    f"correct assertion {i}": MbppAssertion(
-                        assertion=assertion
-                    ).evaluate
-                    for i, assertion in enumerate(problem.test_list)
-                },
-            )
-        )
-
-    return Benchmark(
-        name="mbpp",
-        tasks=tasks,
-    )
+        prompt = Prompt(problem.prompt + "Please extend given function without changing it's declaration including arguments.")
+        tasks.append(Task(name=str(problem.task_id), initial_code=FilesDict({'main.py': problem.starting_code}), command=None, prompt=prompt, assertions={f'correct assertion {i}': MbppAssertion(assertion=assertion).evaluate for i, assertion in enumerate(problem.test_list)}))
+    return Benchmark(name='mbpp', tasks=tasks)