Skip to content

Commit 0292153

Browse files
authored
refactor: move use_reasoning to the model level from the category level to support non-reasoning models (#178)
Signed-off-by: Huamin Chen <[email protected]>
1 parent 951617b commit 0292153

File tree

9 files changed

+317
-278
lines changed

9 files changed

+317
-278
lines changed

config/config.yaml

Lines changed: 71 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,22 @@ bert_model:
22
model_id: sentence-transformers/all-MiniLM-L12-v2
33
threshold: 0.6
44
use_cpu: true
5+
56
semantic_cache:
67
enabled: true
78
backend_type: "memory" # Options: "memory" or "milvus"
89
similarity_threshold: 0.8
910
max_entries: 1000 # Only applies to memory backend
1011
ttl_seconds: 3600
11-
eviction_policy: "fifo" # "fifo", "lru", "lfu", currently only supports memory backend
12-
13-
# For production environments, use Milvus for scalable caching:
14-
# backend_type: "milvus"
15-
# backend_config_path: "config/cache/milvus.yaml"
12+
eviction_policy: "fifo"
1613

17-
# Development/Testing: Use in-memory cache (current configuration)
18-
# - Fast startup and no external dependencies
19-
# - Limited to single instance scaling
20-
# - Data lost on restart
21-
22-
# Production: Use Milvus vector database
23-
# - Horizontally scalable and persistent
24-
# - Supports distributed deployments
25-
# - Requires Milvus cluster setup
26-
# - To enable: uncomment the lines above and install Milvus dependencies
2714
tools:
28-
enabled: true # Set to true to enable automatic tool selection
29-
top_k: 3 # Number of most relevant tools to select
30-
similarity_threshold: 0.2 # Threshold for tool similarity
15+
enabled: true
16+
top_k: 3
17+
similarity_threshold: 0.2
3118
tools_db_path: "config/tools_db.json"
32-
fallback_to_empty: true # If true, return no tools on failure; if false, return error
19+
fallback_to_empty: true
20+
3321
prompt_guard:
3422
enabled: true
3523
use_modernbert: true
@@ -38,258 +26,114 @@ prompt_guard:
3826
use_cpu: true
3927
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
4028

41-
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
29+
# vLLM Endpoints Configuration
4230
vllm_endpoints:
4331
- name: "endpoint1"
4432
address: "127.0.0.1"
45-
port: 11434
46-
models:
47-
- "phi4"
48-
- "gemma3:27b"
49-
weight: 1 # Load balancing weight
50-
health_check_path: "/health" # Optional health check endpoint
51-
- name: "endpoint2"
52-
address: "127.0.0.1"
53-
port: 11434
33+
port: 8000
5434
models:
55-
- "mistral-small3.1"
35+
- "openai/gpt-oss-20b"
5636
weight: 1
5737
health_check_path: "/health"
58-
- name: "endpoint3"
59-
address: "127.0.0.1"
60-
port: 11434
61-
models:
62-
- "phi4" # Same model can be served by multiple endpoints for redundancy
63-
- "mistral-small3.1"
64-
weight: 2 # Higher weight for more powerful endpoint
6538

6639
model_config:
67-
phi4:
68-
pricing:
69-
currency: USD
70-
prompt_per_1m: 0.07
71-
completion_per_1m: 0.35
72-
pii_policy:
73-
allow_by_default: false # Deny all PII by default
74-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
75-
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
76-
preferred_endpoints: ["endpoint1", "endpoint3"]
77-
# Reasoning family - phi4 doesn't support reasoning, so omit this field
78-
79-
# Example: DeepSeek model with custom name
80-
"ds-v31-custom":
81-
reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax
40+
"openai/gpt-oss-20b":
41+
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
8242
preferred_endpoints: ["endpoint1"]
8343
pii_policy:
8444
allow_by_default: true
8545

86-
# Example: Qwen3 model with custom name
87-
"my-qwen3-model":
88-
reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax
89-
preferred_endpoints: ["endpoint2"]
90-
pii_policy:
91-
allow_by_default: true
92-
93-
# Example: GPT-OSS model with custom name
94-
"custom-gpt-oss":
95-
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
96-
preferred_endpoints: ["endpoint1"]
97-
pii_policy:
98-
allow_by_default: true
99-
gemma3:27b:
100-
pricing:
101-
currency: USD
102-
prompt_per_1m: 0.067
103-
completion_per_1m: 0.267
104-
pii_policy:
105-
allow_by_default: false # Deny all PII by default
106-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
107-
preferred_endpoints: ["endpoint1"]
108-
"mistral-small3.1":
109-
pricing:
110-
currency: USD
111-
prompt_per_1m: 0.1
112-
completion_per_1m: 0.3
113-
pii_policy:
114-
allow_by_default: false # Deny all PII by default
115-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
116-
preferred_endpoints: ["endpoint2", "endpoint3"]
117-
118-
# Classifier configuration for text classification
46+
# Classifier configuration
11947
classifier:
12048
category_model:
121-
model_id: "models/category_classifier_modernbert-base_model" # TODO: Use local model for now before the code can download the entire model from huggingface
49+
model_id: "models/category_classifier_modernbert-base_model"
12250
use_modernbert: true
12351
threshold: 0.6
12452
use_cpu: true
12553
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
12654
pii_model:
127-
model_id: "models/pii_classifier_modernbert-base_presidio_token_model" # TODO: Use local model for now before the code can download the entire model from huggingface
55+
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
12856
use_modernbert: true
12957
threshold: 0.7
13058
use_cpu: true
13159
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
60+
61+
# Categories with new use_reasoning field structure
13262
categories:
13363
- name: business
134-
use_reasoning: false
135-
reasoning_description: "Business content is typically conversational"
136-
reasoning_effort: low # Business conversations need low reasoning effort
13764
model_scores:
138-
- model: phi4
139-
score: 0.8
140-
- model: gemma3:27b
141-
score: 0.4
142-
- model: mistral-small3.1
143-
score: 0.2
65+
- model: openai/gpt-oss-20b
66+
score: 0.7
67+
use_reasoning: false # Business performs better without reasoning
14468
- name: law
145-
use_reasoning: false
146-
reasoning_description: "Legal content is typically explanatory"
14769
model_scores:
148-
- model: gemma3:27b
149-
score: 0.8
150-
- model: phi4
151-
score: 0.6
152-
- model: mistral-small3.1
70+
- model: openai/gpt-oss-20b
15371
score: 0.4
72+
use_reasoning: false
15473
- name: psychology
155-
use_reasoning: false
156-
reasoning_description: "Psychology content is usually explanatory"
15774
model_scores:
158-
- model: mistral-small3.1
75+
- model: openai/gpt-oss-20b
15976
score: 0.6
160-
- model: gemma3:27b
161-
score: 0.4
162-
- model: phi4
163-
score: 0.4
77+
use_reasoning: false
16478
- name: biology
165-
use_reasoning: true
166-
reasoning_description: "Biological processes benefit from structured analysis"
16779
model_scores:
168-
- model: mistral-small3.1
169-
score: 0.8
170-
- model: gemma3:27b
171-
score: 0.6
172-
- model: phi4
173-
score: 0.2
80+
- model: openai/gpt-oss-20b
81+
score: 0.9
82+
use_reasoning: false
17483
- name: chemistry
175-
use_reasoning: true
176-
reasoning_description: "Chemical reactions and formulas require systematic thinking"
177-
reasoning_effort: high # Chemistry requires high reasoning effort
17884
model_scores:
179-
- model: mistral-small3.1
180-
score: 0.8
181-
- model: gemma3:27b
182-
score: 0.6
183-
- model: phi4
85+
- model: openai/gpt-oss-20b
18486
score: 0.6
87+
use_reasoning: true # Enable reasoning for complex chemistry
18588
- name: history
186-
use_reasoning: false
187-
reasoning_description: "Historical content is narrative-based"
18889
model_scores:
189-
- model: mistral-small3.1
190-
score: 0.8
191-
- model: phi4
192-
score: 0.6
193-
- model: gemma3:27b
194-
score: 0.4
90+
- model: openai/gpt-oss-20b
91+
score: 0.7
92+
use_reasoning: false
19593
- name: other
196-
use_reasoning: false
197-
reasoning_description: "General content doesn't require reasoning"
19894
model_scores:
199-
- model: gemma3:27b
200-
score: 0.8
201-
- model: phi4
202-
score: 0.6
203-
- model: mistral-small3.1
204-
score: 0.6
95+
- model: openai/gpt-oss-20b
96+
score: 0.7
97+
use_reasoning: false
20598
- name: health
206-
use_reasoning: false
207-
reasoning_description: "Health information is typically informational"
20899
model_scores:
209-
- model: gemma3:27b
210-
score: 0.8
211-
- model: phi4
212-
score: 0.8
213-
- model: mistral-small3.1
214-
score: 0.6
100+
- model: openai/gpt-oss-20b
101+
score: 0.5
102+
use_reasoning: false
215103
- name: economics
216-
use_reasoning: false
217-
reasoning_description: "Economic discussions are usually explanatory"
218104
model_scores:
219-
- model: gemma3:27b
220-
score: 0.8
221-
- model: mistral-small3.1
222-
score: 0.8
223-
- model: phi4
224-
score: 0.0
105+
- model: openai/gpt-oss-20b
106+
score: 1.0
107+
use_reasoning: false
225108
- name: math
226-
use_reasoning: true
227-
reasoning_description: "Mathematical problems require step-by-step reasoning"
228-
reasoning_effort: high # Math problems need high reasoning effort
229109
model_scores:
230-
- model: phi4
110+
- model: openai/gpt-oss-20b
231111
score: 1.0
232-
- model: mistral-small3.1
233-
score: 0.8
234-
- model: gemma3:27b
235-
score: 0.6
112+
use_reasoning: true # Enable reasoning for complex math
236113
- name: physics
237-
use_reasoning: true
238-
reasoning_description: "Physics concepts need logical analysis"
239114
model_scores:
240-
- model: gemma3:27b
241-
score: 0.4
242-
- model: phi4
243-
score: 0.4
244-
- model: mistral-small3.1
245-
score: 0.4
115+
- model: openai/gpt-oss-20b
116+
score: 0.7
117+
use_reasoning: true # Enable reasoning for physics
246118
- name: computer science
247-
use_reasoning: true
248-
reasoning_description: "Programming and algorithms need logical reasoning"
249119
model_scores:
250-
- model: gemma3:27b
120+
- model: openai/gpt-oss-20b
251121
score: 0.6
252-
- model: mistral-small3.1
253-
score: 0.6
254-
- model: phi4
255-
score: 0.0
122+
use_reasoning: false
256123
- name: philosophy
257-
use_reasoning: false
258-
reasoning_description: "Philosophical discussions are conversational"
259124
model_scores:
260-
- model: phi4
261-
score: 0.6
262-
- model: gemma3:27b
263-
score: 0.2
264-
- model: mistral-small3.1
265-
score: 0.2
125+
- model: openai/gpt-oss-20b
126+
score: 0.5
127+
use_reasoning: false
266128
- name: engineering
267-
use_reasoning: true
268-
reasoning_description: "Engineering problems require systematic problem-solving"
269129
model_scores:
270-
- model: gemma3:27b
271-
score: 0.6
272-
- model: mistral-small3.1
273-
score: 0.6
274-
- model: phi4
275-
score: 0.2
276-
277-
default_model: mistral-small3.1
130+
- model: openai/gpt-oss-20b
131+
score: 0.7
132+
use_reasoning: false
278133

279-
# API Configuration
280-
api:
281-
batch_classification:
282-
# Metrics configuration for monitoring batch classification performance
283-
metrics:
284-
enabled: true # Enable comprehensive metrics collection
285-
detailed_goroutine_tracking: true # Track individual goroutine lifecycle
286-
high_resolution_timing: false # Use nanosecond precision timing
287-
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
288-
# Histogram buckets for metrics (directly configure what you need)
289-
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
290-
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
134+
default_model: openai/gpt-oss-20b
291135

292-
# Reasoning family configurations - define how different model families handle reasoning syntax
136+
# Reasoning family configurations
293137
reasoning_families:
294138
deepseek:
295139
type: "chat_template_kwargs"
@@ -302,10 +146,23 @@ reasoning_families:
302146
gpt-oss:
303147
type: "reasoning_effort"
304148
parameter: "reasoning_effort"
305-
306149
gpt:
307150
type: "reasoning_effort"
308151
parameter: "reasoning_effort"
309152

310153
# Global default reasoning effort level
311-
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)
154+
default_reasoning_effort: high
155+
156+
# API Configuration
157+
api:
158+
batch_classification:
159+
max_batch_size: 100
160+
concurrency_threshold: 5
161+
max_concurrency: 8
162+
metrics:
163+
enabled: true
164+
detailed_goroutine_tracking: true
165+
high_resolution_timing: false
166+
sample_rate: 1.0
167+
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
168+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

0 commit comments

Comments
 (0)