Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ __pycache__/
.venv/
pii_env/

# Python build artifacts
dist/
build/
*.egg-info/
*.whl
*.tar.gz

# Go
*.exe
*.exe~
Expand Down Expand Up @@ -117,4 +124,7 @@ results/
.cursorrules.*

# augment editor rules
.augment
.augment

# Claude Code configuration (should not be committed)
CLAUDE.md
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ repos:
entry: bash -c "make markdown-lint"
language: system
files: \.md$
exclude: ^(\node_modules/)
exclude: ^(\node_modules/|CLAUDE\.md)

# Yaml specific hooks
- repo: local
Expand Down
337 changes: 337 additions & 0 deletions config/config.e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
bert_model:
model_id: sentence-transformers/all-MiniLM-L12-v2
threshold: 0.6
use_cpu: true
semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory" or "milvus"
similarity_threshold: 0.8
max_entries: 1000 # Only applies to memory backend
ttl_seconds: 3600

# For production environments, use Milvus for scalable caching:
# backend_type: "milvus"
# backend_config_path: "config/cache/milvus.yaml"

# Development/Testing: Use in-memory cache (current configuration)
# - Fast startup and no external dependencies
# - Limited to single instance scaling
# - Data lost on restart

# Production: Use Milvus vector database
# - Horizontally scalable and persistent
# - Supports distributed deployments
# - Requires Milvus cluster setup
# - To enable: uncomment the lines above and install Milvus dependencies
tools:
enabled: true # Set to true to enable automatic tool selection
top_k: 3 # Number of most relevant tools to select
similarity_threshold: 0.2 # Threshold for tool similarity
tools_db_path: "config/tools_db.json"
fallback_to_empty: true # If true, return no tools on failure; if false, return error
prompt_guard:
enabled: true
use_modernbert: true
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 11434
models:
- "phi4"
- "gemma3:27b"
weight: 1 # Load balancing weight
health_check_path: "/health" # Optional health check endpoint
- name: "endpoint2"
address: "127.0.0.1"
port: 11434
models:
- "mistral-small3.1"
weight: 1
health_check_path: "/health"
- name: "endpoint3"
address: "127.0.0.1"
port: 11434
models:
- "phi4" # Same model can be served by multiple endpoints for redundancy
- "mistral-small3.1"
weight: 2 # Higher weight for more powerful endpoint
- name: "qwen-endpoint"
address: "127.0.0.1"
port: 8000
models:
- "Qwen/Qwen2-0.5B-Instruct"
weight: 1
health_check_path: "/health"
- name: "tinyllama-endpoint"
address: "127.0.0.1"
port: 8001
models:
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
weight: 1
health_check_path: "/health"

model_config:
phi4:
pricing:
currency: USD
prompt_per_1m: 0.07
completion_per_1m: 0.35
pii_policy:
allow_by_default: false # Deny all PII by default
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
preferred_endpoints: ["endpoint1", "endpoint3"]
# Reasoning family - phi4 doesn't support reasoning, so omit this field

# Example: DeepSeek model with custom name
"ds-v31-custom":
reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax
preferred_endpoints: ["endpoint1"]
pii_policy:
allow_by_default: true

# Example: Qwen3 model with custom name
"my-qwen3-model":
reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax
preferred_endpoints: ["endpoint2"]
pii_policy:
allow_by_default: true

# Example: GPT-OSS model with custom name
"custom-gpt-oss":
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
preferred_endpoints: ["endpoint1"]
pii_policy:
allow_by_default: true
gemma3:27b:
pricing:
currency: USD
prompt_per_1m: 0.067
completion_per_1m: 0.267
pii_policy:
allow_by_default: false # Deny all PII by default
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
preferred_endpoints: ["endpoint1"]
"mistral-small3.1":
pricing:
currency: USD
prompt_per_1m: 0.1
completion_per_1m: 0.3
pii_policy:
allow_by_default: false # Deny all PII by default
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
preferred_endpoints: ["endpoint2", "endpoint3"]
"Qwen/Qwen2-0.5B-Instruct":
reasoning_family: "qwen3" # This model uses Qwen reasoning syntax
preferred_endpoints: ["qwen-endpoint"]
pii_policy:
allow_by_default: true
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]
"TinyLlama/TinyLlama-1.1B-Chat-v1.0":
preferred_endpoints: ["tinyllama-endpoint"]
pii_policy:
allow_by_default: true
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]

# Classifier configuration for text classification
classifier:
category_model:
model_id: "models/category_classifier_modernbert-base_model" # TODO: Use local model for now before the code can download the entire model from huggingface
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model" # TODO: Use local model for now before the code can download the entire model from huggingface
use_modernbert: true
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
categories:
- name: business
use_reasoning: false
reasoning_description: "Business content is typically conversational"
reasoning_effort: low # Business conversations need low reasoning effort
model_scores:
- model: phi4
score: 0.8
- model: gemma3:27b
score: 0.4
- model: mistral-small3.1
score: 0.2
- name: law
use_reasoning: false
reasoning_description: "Legal content is typically explanatory"
model_scores:
- model: gemma3:27b
score: 0.8
- model: phi4
score: 0.6
- model: mistral-small3.1
score: 0.4
- name: psychology
use_reasoning: false
reasoning_description: "Psychology content is usually explanatory"
model_scores:
- model: mistral-small3.1
score: 0.6
- model: gemma3:27b
score: 0.4
- model: phi4
score: 0.4
- name: biology
use_reasoning: true
reasoning_description: "Biological processes benefit from structured analysis"
model_scores:
- model: mistral-small3.1
score: 0.8
- model: gemma3:27b
score: 0.6
- model: phi4
score: 0.2
- name: chemistry
use_reasoning: true
reasoning_description: "Chemical reactions and formulas require systematic thinking"
reasoning_effort: high # Chemistry requires high reasoning effort
model_scores:
- model: mistral-small3.1
score: 0.8
- model: gemma3:27b
score: 0.6
- model: phi4
score: 0.6
- name: history
use_reasoning: false
reasoning_description: "Historical content is narrative-based"
model_scores:
- model: mistral-small3.1
score: 0.8
- model: phi4
score: 0.6
- model: gemma3:27b
score: 0.4
- name: other
use_reasoning: false
reasoning_description: "General content doesn't require reasoning"
model_scores:
- model: gemma3:27b
score: 0.8
- model: phi4
score: 0.6
- model: mistral-small3.1
score: 0.6
- name: health
use_reasoning: false
reasoning_description: "Health information is typically informational"
model_scores:
- model: gemma3:27b
score: 0.8
- model: phi4
score: 0.8
- model: mistral-small3.1
score: 0.6
- name: economics
use_reasoning: false
reasoning_description: "Economic discussions are usually explanatory"
model_scores:
- model: gemma3:27b
score: 0.8
- model: mistral-small3.1
score: 0.8
- model: phi4
score: 0.0
- name: math
use_reasoning: true
reasoning_description: "Mathematical problems require step-by-step reasoning"
reasoning_effort: high # Math problems need high reasoning effort
model_scores:
- model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
score: 1.0
- model: phi4
score: 0.9
- model: mistral-small3.1
score: 0.8
- model: gemma3:27b
score: 0.6
- name: physics
use_reasoning: true
reasoning_description: "Physics concepts need logical analysis"
model_scores:
- model: gemma3:27b
score: 0.4
- model: phi4
score: 0.4
- model: mistral-small3.1
score: 0.4
- name: computer science
use_reasoning: true
reasoning_description: "Programming and algorithms need logical reasoning"
model_scores:
- model: gemma3:27b
score: 0.6
- model: mistral-small3.1
score: 0.6
- model: phi4
score: 0.0
- name: philosophy
use_reasoning: false
reasoning_description: "Philosophical discussions are conversational"
model_scores:
- model: phi4
score: 0.6
- model: gemma3:27b
score: 0.2
- model: mistral-small3.1
score: 0.2
- name: engineering
use_reasoning: true
reasoning_description: "Engineering problems require systematic problem-solving"
model_scores:
- model: gemma3:27b
score: 0.6
- model: mistral-small3.1
score: 0.6
- model: phi4
score: 0.2

default_model: mistral-small3.1

# API Configuration
api:
batch_classification:
# Metrics configuration for monitoring batch classification performance
metrics:
enabled: true # Enable comprehensive metrics collection
detailed_goroutine_tracking: true # Track individual goroutine lifecycle
high_resolution_timing: false # Use nanosecond precision timing
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
# Histogram buckets for metrics (directly configure what you need)
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

# Reasoning family configurations - define how different model families handle reasoning syntax
reasoning_families:
deepseek:
type: "chat_template_kwargs"
parameter: "thinking"

qwen3:
type: "chat_template_kwargs"
parameter: "enable_thinking"

gpt-oss:
type: "reasoning_effort"
parameter: "reasoning_effort"

gpt:
type: "reasoning_effort"
parameter: "reasoning_effort"

# Global default reasoning effort level
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)
Loading
Loading