Skip to content

Commit d748617

Browse files
yossiovadiaclaude
andcommitted
feat: enable E2E testing with LLM Katan and fix configuration
- Remove Ollama dependencies from E2E config as requested - Update config.e2e.yaml to use only LLM Katan models (Qwen/Qwen2-0.5B-Instruct, TinyLlama/TinyLlama-1.1B-Chat-v1.0) - Fix bash 3.2 compatibility in start-llm-katan.sh (replace associative arrays) - Add required use_reasoning fields to all model entries for validation - Fix zero scores in model configurations (0.0 → 0.1) Testing Status: - ✅ Router: Successfully starts with E2E config (ExtProc on :50051, API on :8080) - ✅ LLM Katan: Running on ports 8000/8001 with correct model mapping - ✅ Envoy: Running on port 8801 - ✅ Test: 00-client-request-test.py passes with 200 OK responses - ✅ Pipeline: Full end-to-end flow working (Client → Envoy → ExtProc → LLM Katan) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]> Signed-off-by: Yossi Ovadia <[email protected]>
1 parent 9868dbb commit d748617

File tree

6 files changed

+124
-131
lines changed

6 files changed

+124
-131
lines changed

config/config.e2e.yaml

Lines changed: 91 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,6 @@ prompt_guard:
3939

4040
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
4141
vllm_endpoints:
42-
- name: "endpoint1"
43-
address: "127.0.0.1"
44-
port: 11434
45-
models:
46-
- "phi4"
47-
- "gemma3:27b"
48-
weight: 1 # Load balancing weight
49-
health_check_path: "/health" # Optional health check endpoint
50-
- name: "endpoint2"
51-
address: "127.0.0.1"
52-
port: 11434
53-
models:
54-
- "mistral-small3.1"
55-
weight: 1
56-
health_check_path: "/health"
57-
- name: "endpoint3"
58-
address: "127.0.0.1"
59-
port: 11434
60-
models:
61-
- "phi4" # Same model can be served by multiple endpoints for redundancy
62-
- "mistral-small3.1"
63-
weight: 2 # Higher weight for more powerful endpoint
6442
- name: "qwen-endpoint"
6543
address: "127.0.0.1"
6644
port: 8000
@@ -77,63 +55,16 @@ vllm_endpoints:
7755
health_check_path: "/health"
7856

7957
model_config:
80-
phi4:
81-
pricing:
82-
currency: USD
83-
prompt_per_1m: 0.07
84-
completion_per_1m: 0.35
85-
pii_policy:
86-
allow_by_default: false # Deny all PII by default
87-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
88-
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
89-
preferred_endpoints: ["endpoint1", "endpoint3"]
90-
# Reasoning family - phi4 doesn't support reasoning, so omit this field
9158

92-
# Example: DeepSeek model with custom name
93-
"ds-v31-custom":
94-
reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax
95-
preferred_endpoints: ["endpoint1"]
96-
pii_policy:
97-
allow_by_default: true
98-
99-
# Example: Qwen3 model with custom name
100-
"my-qwen3-model":
101-
reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax
102-
preferred_endpoints: ["endpoint2"]
103-
pii_policy:
104-
allow_by_default: true
105-
106-
# Example: GPT-OSS model with custom name
107-
"custom-gpt-oss":
108-
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
109-
preferred_endpoints: ["endpoint1"]
110-
pii_policy:
111-
allow_by_default: true
112-
gemma3:27b:
113-
pricing:
114-
currency: USD
115-
prompt_per_1m: 0.067
116-
completion_per_1m: 0.267
117-
pii_policy:
118-
allow_by_default: false # Deny all PII by default
119-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
120-
preferred_endpoints: ["endpoint1"]
121-
"mistral-small3.1":
122-
pricing:
123-
currency: USD
124-
prompt_per_1m: 0.1
125-
completion_per_1m: 0.3
126-
pii_policy:
127-
allow_by_default: false # Deny all PII by default
128-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
129-
preferred_endpoints: ["endpoint2", "endpoint3"]
13059
"Qwen/Qwen2-0.5B-Instruct":
60+
use_reasoning: false
13161
reasoning_family: "qwen3" # This model uses Qwen reasoning syntax
13262
preferred_endpoints: ["qwen-endpoint"]
13363
pii_policy:
13464
allow_by_default: true
13565
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
13666
"TinyLlama/TinyLlama-1.1B-Chat-v1.0":
67+
use_reasoning: false
13768
preferred_endpoints: ["tinyllama-endpoint"]
13869
pii_policy:
13970
allow_by_default: true
@@ -159,148 +90,191 @@ categories:
15990
reasoning_description: "Business content is typically conversational"
16091
reasoning_effort: low # Business conversations need low reasoning effort
16192
model_scores:
162-
- model: phi4
93+
- model: "Qwen/Qwen2-0.5B-Instruct"
16394
score: 0.8
164-
- model: gemma3:27b
95+
use_reasoning: false
96+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
16597
score: 0.4
166-
- model: mistral-small3.1
98+
use_reasoning: false
99+
- model: "Qwen/Qwen2-0.5B-Instruct"
167100
score: 0.2
101+
use_reasoning: false
168102
- name: law
169103
use_reasoning: false
170104
reasoning_description: "Legal content is typically explanatory"
171105
model_scores:
172-
- model: gemma3:27b
106+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
173107
score: 0.8
174-
- model: phi4
108+
use_reasoning: false
109+
- model: "Qwen/Qwen2-0.5B-Instruct"
175110
score: 0.6
176-
- model: mistral-small3.1
111+
use_reasoning: false
112+
- model: "Qwen/Qwen2-0.5B-Instruct"
177113
score: 0.4
114+
use_reasoning: false
178115
- name: psychology
179116
use_reasoning: false
180117
reasoning_description: "Psychology content is usually explanatory"
181118
model_scores:
182-
- model: mistral-small3.1
119+
- model: "Qwen/Qwen2-0.5B-Instruct"
183120
score: 0.6
184-
- model: gemma3:27b
121+
use_reasoning: false
122+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
185123
score: 0.4
186-
- model: phi4
124+
use_reasoning: false
125+
- model: "Qwen/Qwen2-0.5B-Instruct"
187126
score: 0.4
127+
use_reasoning: false
188128
- name: biology
189129
use_reasoning: true
190130
reasoning_description: "Biological processes benefit from structured analysis"
191131
model_scores:
192-
- model: mistral-small3.1
132+
- model: "Qwen/Qwen2-0.5B-Instruct"
193133
score: 0.8
194-
- model: gemma3:27b
134+
use_reasoning: false
135+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
195136
score: 0.6
196-
- model: phi4
137+
use_reasoning: false
138+
- model: "Qwen/Qwen2-0.5B-Instruct"
197139
score: 0.2
140+
use_reasoning: false
198141
- name: chemistry
199142
use_reasoning: true
200143
reasoning_description: "Chemical reactions and formulas require systematic thinking"
201144
reasoning_effort: high # Chemistry requires high reasoning effort
202145
model_scores:
203-
- model: mistral-small3.1
146+
- model: "Qwen/Qwen2-0.5B-Instruct"
204147
score: 0.8
205-
- model: gemma3:27b
148+
use_reasoning: true
149+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
206150
score: 0.6
207-
- model: phi4
151+
use_reasoning: false
152+
- model: "Qwen/Qwen2-0.5B-Instruct"
208153
score: 0.6
154+
use_reasoning: false
209155
- name: history
210156
use_reasoning: false
211157
reasoning_description: "Historical content is narrative-based"
212158
model_scores:
213-
- model: mistral-small3.1
159+
- model: "Qwen/Qwen2-0.5B-Instruct"
214160
score: 0.8
215-
- model: phi4
161+
use_reasoning: false
162+
- model: "Qwen/Qwen2-0.5B-Instruct"
216163
score: 0.6
217-
- model: gemma3:27b
164+
use_reasoning: false
165+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
218166
score: 0.4
167+
use_reasoning: false
219168
- name: other
220169
use_reasoning: false
221170
reasoning_description: "General content doesn't require reasoning"
222171
model_scores:
223-
- model: gemma3:27b
172+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
224173
score: 0.8
225-
- model: phi4
174+
use_reasoning: false
175+
- model: "Qwen/Qwen2-0.5B-Instruct"
226176
score: 0.6
227-
- model: mistral-small3.1
177+
use_reasoning: false
178+
- model: "Qwen/Qwen2-0.5B-Instruct"
228179
score: 0.6
180+
use_reasoning: false
229181
- name: health
230182
use_reasoning: false
231183
reasoning_description: "Health information is typically informational"
232184
model_scores:
233-
- model: gemma3:27b
185+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
234186
score: 0.8
235-
- model: phi4
187+
use_reasoning: false
188+
- model: "Qwen/Qwen2-0.5B-Instruct"
236189
score: 0.8
237-
- model: mistral-small3.1
190+
use_reasoning: false
191+
- model: "Qwen/Qwen2-0.5B-Instruct"
238192
score: 0.6
193+
use_reasoning: false
239194
- name: economics
240195
use_reasoning: false
241196
reasoning_description: "Economic discussions are usually explanatory"
242197
model_scores:
243-
- model: gemma3:27b
198+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
244199
score: 0.8
245-
- model: mistral-small3.1
200+
use_reasoning: false
201+
- model: "Qwen/Qwen2-0.5B-Instruct"
246202
score: 0.8
247-
- model: phi4
248-
score: 0.0
203+
use_reasoning: false
204+
- model: "Qwen/Qwen2-0.5B-Instruct"
205+
score: 0.1
206+
use_reasoning: false
249207
- name: math
250208
use_reasoning: true
251209
reasoning_description: "Mathematical problems require step-by-step reasoning"
252210
reasoning_effort: high # Math problems need high reasoning effort
253211
model_scores:
254-
- model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
212+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
255213
score: 1.0
256-
- model: phi4
214+
use_reasoning: true
215+
- model: "Qwen/Qwen2-0.5B-Instruct"
257216
score: 0.9
258-
- model: mistral-small3.1
217+
use_reasoning: true
218+
- model: "Qwen/Qwen2-0.5B-Instruct"
259219
score: 0.8
260-
- model: gemma3:27b
220+
use_reasoning: false
221+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
261222
score: 0.6
223+
use_reasoning: false
262224
- name: physics
263225
use_reasoning: true
264226
reasoning_description: "Physics concepts need logical analysis"
265227
model_scores:
266-
- model: gemma3:27b
228+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
267229
score: 0.4
268-
- model: phi4
230+
use_reasoning: true
231+
- model: "Qwen/Qwen2-0.5B-Instruct"
269232
score: 0.4
270-
- model: mistral-small3.1
233+
use_reasoning: false
234+
- model: "Qwen/Qwen2-0.5B-Instruct"
271235
score: 0.4
236+
use_reasoning: false
272237
- name: computer science
273238
use_reasoning: true
274239
reasoning_description: "Programming and algorithms need logical reasoning"
275240
model_scores:
276-
- model: gemma3:27b
241+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
277242
score: 0.6
278-
- model: mistral-small3.1
243+
use_reasoning: false
244+
- model: "Qwen/Qwen2-0.5B-Instruct"
279245
score: 0.6
280-
- model: phi4
281-
score: 0.0
246+
use_reasoning: false
247+
- model: "Qwen/Qwen2-0.5B-Instruct"
248+
score: 0.1
249+
use_reasoning: false
282250
- name: philosophy
283251
use_reasoning: false
284252
reasoning_description: "Philosophical discussions are conversational"
285253
model_scores:
286-
- model: phi4
254+
- model: "Qwen/Qwen2-0.5B-Instruct"
287255
score: 0.6
288-
- model: gemma3:27b
256+
use_reasoning: false
257+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
289258
score: 0.2
290-
- model: mistral-small3.1
259+
use_reasoning: false
260+
- model: "Qwen/Qwen2-0.5B-Instruct"
291261
score: 0.2
262+
use_reasoning: false
292263
- name: engineering
293264
use_reasoning: true
294265
reasoning_description: "Engineering problems require systematic problem-solving"
295266
model_scores:
296-
- model: gemma3:27b
267+
- model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
297268
score: 0.6
298-
- model: mistral-small3.1
269+
use_reasoning: false
270+
- model: "Qwen/Qwen2-0.5B-Instruct"
299271
score: 0.6
300-
- model: phi4
272+
use_reasoning: false
273+
- model: "Qwen/Qwen2-0.5B-Instruct"
301274
score: 0.2
275+
use_reasoning: false
302276

303-
default_model: mistral-small3.1
277+
default_model: "Qwen/Qwen2-0.5B-Instruct"
304278

305279
# API Configuration
306280
api:

e2e-tests/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ llm-katan --model Qwen/Qwen3-0.6B --port 8001 --served-model-name "TinyLlama/Tin
5555
make run-envoy
5656

5757
# Terminal 3: Start semantic router
58-
make run-router
58+
make run-router-e2e
5959

6060
# Terminal 4: Run tests
6161
python e2e-tests/00-client-request-test.py # Individual test

e2e-tests/llm-katan/llm_katan/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
Signed-off-by: Yossi Ovadia <[email protected]>
99
"""
1010

11-
__version__ = "0.1.4"
11+
try:
12+
from importlib.metadata import version, PackageNotFoundError
13+
__version__ = version("llm-katan")
14+
except PackageNotFoundError:
15+
__version__ = "unknown"
1216
__author__ = "Yossi Ovadia"
1317
__email__ = "[email protected]"
1418

e2e-tests/llm-katan/llm_katan/cli.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
from .config import ServerConfig
1717
from .server import run_server
1818

19+
try:
20+
from importlib.metadata import version, PackageNotFoundError
21+
__version__ = version("llm-katan")
22+
except PackageNotFoundError:
23+
__version__ = "unknown"
24+
1925
# Set up logging
2026
logging.basicConfig(
2127
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -83,7 +89,7 @@
8389
default="INFO",
8490
help="Log level (default: INFO)",
8591
)
86-
@click.version_option(version="0.1.4", prog_name="LLM Katan")
92+
@click.version_option(version=__version__, prog_name="LLM Katan")
8793
def main(
8894
model: str,
8995
served_model_name: Optional[str],

0 commit comments

Comments
 (0)