Skip to content

Commit d6c1151

Browse files
committed
add plugindir flag and env var
1 parent 75a5d21 commit d6c1151

File tree

4 files changed

+92
-12
lines changed

4 files changed

+92
-12
lines changed

Dockerfile.proxy_only

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Build stage
2+
FROM python:3.12-slim AS builder
3+
4+
# Define build argument with default value
5+
ARG PORT=8000
6+
# Make it available as env variable at runtime
7+
ENV OPTILLM_PORT=$PORT
8+
9+
# Set working directory
10+
WORKDIR /app
11+
12+
# Install system dependencies
13+
RUN apt-get update && apt-get install -y --no-install-recommends \
14+
build-essential \
15+
python3-dev \
16+
gcc \
17+
g++ \
18+
&& rm -rf /var/lib/apt/lists/*
19+
20+
# Copy only the requirements file first to leverage Docker cache
21+
COPY requirements_proxy_only.txt .
22+
23+
# Install Python dependencies
24+
RUN pip install --no-cache-dir -r requirements_proxy_only.txt
25+
26+
# Final stage
27+
FROM python:3.12-slim
28+
29+
# Install curl for the healthcheck
30+
RUN apt-get update && apt-get install -y --no-install-recommends \
31+
curl && \
32+
apt-get clean && rm -rf /var/lib/apt/lists/*
33+
34+
# Set working directory
35+
WORKDIR /app
36+
37+
# Copy installed dependencies from builder stage
38+
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
39+
COPY --from=builder /usr/local/bin /usr/local/bin
40+
41+
# Copy application code
42+
COPY . .
43+
44+
# Create a non-root user and switch to it
45+
RUN useradd -m appuser
46+
USER appuser
47+
48+
# Set environment variables
49+
ENV PYTHONUNBUFFERED=1
50+
51+
# Use the ARG in EXPOSE
52+
EXPOSE ${PORT}
53+
54+
# Run the application
55+
ENTRYPOINT ["python", "optillm.py"]

optillm.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def load_plugins():
158158
package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins')
159159

160160
# Get local project plugins directory
161-
current_dir = os.getcwd()
161+
current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"]
162162
local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins')
163163

164164
plugin_dirs = []
@@ -664,7 +664,8 @@ def parse_args():
664664
("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
665665
("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
666666
("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
667-
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface")
667+
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
668+
("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"),
668669
]
669670

670671
for arg, env, type_, default, help_text, *extra in args_env:
@@ -704,11 +705,11 @@ def main():
704705
global server_config
705706
# Call this function at the start of main()
706707
args = parse_args()
707-
load_plugins()
708-
709708
# Update server_config with all argument values
710709
server_config.update(vars(args))
711710

711+
load_plugins()
712+
712713
port = server_config['port']
713714

714715
# Set logging level from user request

requirements_proxy_only.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
numpy
2+
networkx
3+
openai
4+
z3-solver
5+
aiohttp
6+
flask
7+
azure.identity
8+
scikit-learn
9+
litellm
10+
requests
11+
beautifulsoup4
12+
lxml
13+
presidio_analyzer
14+
presidio_anonymizer
15+
nbformat
16+
nbconvert
17+
ipython
18+
ipykernel
19+
gradio

scripts/eval_aime_benchmark.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
logger = logging.getLogger(__name__)
1616

1717
# Initialize OpenAI client
18-
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
18+
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/")
1919

2020
SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems.
2121
@@ -104,10 +104,11 @@ def get_llm_response(problem: str, model: str) -> Union[str, List[Dict]]:
104104
try:
105105
response = client.with_options(timeout=1000.0).chat.completions.create(
106106
model=model,
107+
temperature=0.2,
107108
messages=[
108109
{"role": "user", "content": SYSTEM_PROMPT + problem}
109110
],
110-
max_tokens=8192,
111+
max_tokens=40000,
111112
)
112113

113114
# If there's more than one choice, format as attempts
@@ -241,18 +242,21 @@ def analyze_results(results: List[Dict], n: int):
241242
print("---")
242243

243244
def main(model: str, n_attempts: int):
244-
"""Main evaluation function."""
245+
"""Main evaluation function that handles gaps in processed indexes."""
245246
os.makedirs("results", exist_ok=True)
246247

247-
# Include n_attempts in filename to keep separate results for different n values
248248
results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json"
249249

250250
dataset = load_2024_dataset()
251251
existing_results = load_existing_results(results_file)
252-
last_processed_index = get_last_processed_index(existing_results)
253252

254-
for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
255-
if idx <= last_processed_index:
253+
# Create a set of already processed indexes for efficient lookup
254+
processed_indexes = {result['index'] for result in existing_results}
255+
256+
for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
257+
id = int(item['id'])
258+
# Skip if this index has already been processed
259+
if id in processed_indexes:
256260
continue
257261

258262
problem_text = item['problem']
@@ -263,7 +267,7 @@ def main(model: str, n_attempts: int):
263267
is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer)
264268

265269
result = {
266-
"index": idx,
270+
"index": id,
267271
"problem": problem_text,
268272
"attempts": attempts,
269273
"correct_answer": correct_answer,
@@ -275,6 +279,7 @@ def main(model: str, n_attempts: int):
275279
final_results = load_existing_results(results_file)
276280
analyze_results(final_results, n_attempts)
277281

282+
278283
if __name__ == "__main__":
279284
parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems")
280285
parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)")

0 commit comments

Comments
 (0)