@@ -48,18 +48,33 @@ routing:
4848### 2. Start OptiLLM Server
4949
5050` ` ` bash
51- # Start server normally
51+ # Option A: Use proxy as default for ALL requests (recommended)
52+ optillm --approach proxy
53+
54+ # Option B: Start server normally (use model prefix or extra_body per request)
5255optillm
5356
5457# With custom port
55- optillm --port 8000
58+ optillm --approach proxy -- port 8000
5659```
5760
58- > ** Note** : The ` --approach proxy ` flag is not currently supported in the command-line interface.
59-
6061### 3. Usage Examples
6162
62- #### Method 1: Using Model Prefix
63+ #### Method 1: Using --approach proxy (Recommended)
64+ ``` bash
65+ # Start server with proxy as default approach
66+ optillm --approach proxy
67+
68+ # Then make normal requests - proxy handles all routing automatically!
69+ curl -X POST http://localhost:8000/v1/chat/completions \
70+ -H " Content-Type: application/json" \
71+ -d ' {
72+ "model": "gpt-4",
73+ "messages": [{"role": "user", "content": "Hello"}]
74+ }'
75+ ```
76+
77+ #### Method 2: Using Model Prefix (when server started without --approach proxy)
6378``` bash
6479# Use "proxy-" prefix to activate the proxy plugin
6580curl -X POST http://localhost:8000/v1/chat/completions \
@@ -70,7 +85,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
7085 }'
7186```
7287
73- #### Method 2 : Using extra_body (Recommended for SDK usage )
88+ #### Method 3 : Using extra_body (when server started without --approach proxy )
7489``` bash
7590# Use extra_body parameter
7691curl -X POST http://localhost:8000/v1/chat/completions \
@@ -337,13 +352,20 @@ client = OpenAI(
337352 api_key="dummy" # Can be any string when using proxy
338353)
339354
340- # Method 1: Use proxy with model prefix
355+ # Method 1: Server started with --approach proxy (recommended)
356+ # Just make normal requests - proxy handles everything!
357+ response = client.chat.completions.create(
358+ model="gpt-4",
359+ messages=[{"role": "user", "content": "Hello"}]
360+ )
361+
362+ # Method 2: Use proxy with model prefix
341363response = client.chat.completions.create(
342364 model="proxy-gpt-4", # Use "proxy-" prefix
343365 messages=[{"role": "user", "content": "Hello"}]
344366)
345367
346- # Method 2 : Use extra_body (recommended)
368+ # Method 3 : Use extra_body
347369response = client.chat.completions.create(
348370 model="gpt-4",
349371 messages=[{"role": "user", "content": "Hello"}],
@@ -352,7 +374,7 @@ response = client.chat.completions.create(
352374 }
353375)
354376
355- # Method 3 : Proxy wrapping another approach
377+ # Method 4 : Proxy wrapping another approach
356378response = client.chat.completions.create(
357379 model="gpt-4",
358380 messages=[{"role": "user", "content": "Hello"}],
@@ -367,7 +389,13 @@ response = client.chat.completions.create(
367389` ` ` python
368390from langchain.llms import OpenAI
369391
370- # Use proxy with model prefix
392+ # If server started with --approach proxy (recommended)
393+ llm = OpenAI(
394+ openai_api_base="http://localhost:8000/v1",
395+ model_name="gpt-4" # Proxy handles routing automatically
396+ )
397+
398+ # Or use proxy with model prefix
371399llm = OpenAI(
372400 openai_api_base="http://localhost:8000/v1",
373401 model_name="proxy-gpt-4" # Use "proxy-" prefix
0 commit comments