You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
29
+
# vLLM Endpoints Configuration
42
30
vllm_endpoints:
43
31
- name: "endpoint1"
44
32
address: "127.0.0.1"
45
-
port: 11434
46
-
models:
47
-
- "phi4"
48
-
- "gemma3:27b"
49
-
weight: 1# Load balancing weight
50
-
health_check_path: "/health"# Optional health check endpoint
51
-
- name: "endpoint2"
52
-
address: "127.0.0.1"
53
-
port: 11434
33
+
port: 8000
54
34
models:
55
-
- "mistral-small3.1"
35
+
- "openai/gpt-oss-20b"
56
36
weight: 1
57
37
health_check_path: "/health"
58
-
- name: "endpoint3"
59
-
address: "127.0.0.1"
60
-
port: 11434
61
-
models:
62
-
- "phi4"# Same model can be served by multiple endpoints for redundancy
63
-
- "mistral-small3.1"
64
-
weight: 2# Higher weight for more powerful endpoint
65
38
66
39
model_config:
67
-
phi4:
68
-
pricing:
69
-
currency: USD
70
-
prompt_per_1m: 0.07
71
-
completion_per_1m: 0.35
72
-
pii_policy:
73
-
allow_by_default: false # Deny all PII by default
74
-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
75
-
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
76
-
preferred_endpoints: ["endpoint1", "endpoint3"]
77
-
# Reasoning family - phi4 doesn't support reasoning, so omit this field
78
-
79
-
# Example: DeepSeek model with custom name
80
-
"ds-v31-custom":
81
-
reasoning_family: "deepseek"# This model uses DeepSeek reasoning syntax
40
+
"openai/gpt-oss-20b":
41
+
reasoning_family: "gpt-oss"# This model uses GPT-OSS reasoning syntax
82
42
preferred_endpoints: ["endpoint1"]
83
43
pii_policy:
84
44
allow_by_default: true
85
45
86
-
# Example: Qwen3 model with custom name
87
-
"my-qwen3-model":
88
-
reasoning_family: "qwen3"# This model uses Qwen3 reasoning syntax
89
-
preferred_endpoints: ["endpoint2"]
90
-
pii_policy:
91
-
allow_by_default: true
92
-
93
-
# Example: GPT-OSS model with custom name
94
-
"custom-gpt-oss":
95
-
reasoning_family: "gpt-oss"# This model uses GPT-OSS reasoning syntax
96
-
preferred_endpoints: ["endpoint1"]
97
-
pii_policy:
98
-
allow_by_default: true
99
-
gemma3:27b:
100
-
pricing:
101
-
currency: USD
102
-
prompt_per_1m: 0.067
103
-
completion_per_1m: 0.267
104
-
pii_policy:
105
-
allow_by_default: false # Deny all PII by default
106
-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
107
-
preferred_endpoints: ["endpoint1"]
108
-
"mistral-small3.1":
109
-
pricing:
110
-
currency: USD
111
-
prompt_per_1m: 0.1
112
-
completion_per_1m: 0.3
113
-
pii_policy:
114
-
allow_by_default: false # Deny all PII by default
115
-
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
116
-
preferred_endpoints: ["endpoint2", "endpoint3"]
117
-
118
-
# Classifier configuration for text classification
46
+
# Classifier configuration
119
47
classifier:
120
48
category_model:
121
-
model_id: "models/category_classifier_modernbert-base_model"# TODO: Use local model for now before the code can download the entire model from huggingface
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"# TODO: Use local model for now before the code can download the entire model from huggingface
0 commit comments