add plugindir flag and env var

codelion · codelion · commit d6c11518c2f2 · 2025-01-20T20:47:20.000+08:00
diff --git a/Dockerfile.proxy_only b/Dockerfile.proxy_only
@@ -0,0 +1,55 @@
+# Build stage
+FROM python:3.12-slim AS builder
+
+# Define build argument with default value
+ARG PORT=8000
+# Make it available as env variable at runtime
+ENV OPTILLM_PORT=$PORT
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev \
+  gcc \
+  g++ \
+  && rm -rf /var/lib/apt/lists/*
+
+# Copy only the requirements file first to leverage Docker cache
+COPY requirements_proxy_only.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements_proxy_only.txt
+
+# Final stage
+FROM python:3.12-slim
+
+# Install curl for the healthcheck
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  curl && \
+  apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Copy installed dependencies from builder stage
+COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY . .
+
+# Create a non-root user and switch to it
+RUN useradd -m appuser
+USER appuser
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+
+# Use the ARG in EXPOSE
+EXPOSE ${PORT}
+
+# Run the application
+ENTRYPOINT ["python", "optillm.py"]
diff --git a/optillm.py b/optillm.py
@@ -158,7 +158,7 @@ def load_plugins():
    package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins')
    
    # Get local project plugins directory
-   current_dir = os.getcwd()
+   current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"]
    local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins')
    
    plugin_dirs = []
@@ -664,7 +664,8 @@ def parse_args():
         ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
         ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
         ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
-        ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface")
+        ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
+        ("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"),
     ]
 
     for arg, env, type_, default, help_text, *extra in args_env:
@@ -704,11 +705,11 @@ def main():
     global server_config
     # Call this function at the start of main()
     args = parse_args()
-    load_plugins()
-
     # Update server_config with all argument values
     server_config.update(vars(args))
 
+    load_plugins()
+
     port = server_config['port']
 
     # Set logging level from user request
diff --git a/requirements_proxy_only.txt b/requirements_proxy_only.txt
@@ -0,0 +1,19 @@
+numpy
+networkx
+openai
+z3-solver
+aiohttp
+flask
+azure.identity
+scikit-learn
+litellm
+requests
+beautifulsoup4
+lxml
+presidio_analyzer
+presidio_anonymizer
+nbformat
+nbconvert
+ipython
+ipykernel
+gradio
diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 # Initialize OpenAI client
-client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/")
 
 SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems.
 
@@ -104,10 +104,11 @@ def get_llm_response(problem: str, model: str) -> Union[str, List[Dict]]:
     try:
         response = client.with_options(timeout=1000.0).chat.completions.create(
             model=model,
+            temperature=0.2,
             messages=[
                 {"role": "user", "content": SYSTEM_PROMPT + problem}
             ],
-            max_tokens=8192,
+            max_tokens=40000,
         )
         
         # If there's more than one choice, format as attempts
@@ -241,18 +242,21 @@ def analyze_results(results: List[Dict], n: int):
             print("---")
 
 def main(model: str, n_attempts: int):
-    """Main evaluation function."""
+    """Main evaluation function that handles gaps in processed indexes."""
     os.makedirs("results", exist_ok=True)
     
-    # Include n_attempts in filename to keep separate results for different n values
     results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json"
     
     dataset = load_2024_dataset()
     existing_results = load_existing_results(results_file)
-    last_processed_index = get_last_processed_index(existing_results)
     
-    for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
-        if idx <= last_processed_index:
+    # Create a set of already processed indexes for efficient lookup
+    processed_indexes = {result['index'] for result in existing_results}
+    
+    for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
+        id = int(item['id'])
+        # Skip if this index has already been processed
+        if id in processed_indexes:
             continue
             
         problem_text = item['problem']
@@ -263,7 +267,7 @@ def main(model: str, n_attempts: int):
         is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer)
         
         result = {
-            "index": idx,
+            "index": id,
             "problem": problem_text,
             "attempts": attempts,
             "correct_answer": correct_answer,
@@ -275,6 +279,7 @@ def main(model: str, n_attempts: int):
     final_results = load_existing_results(results_file)
     analyze_results(final_results, n_attempts)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems")
     parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)")