Skip to content

Commit 6cb1b55

Browse files
committed
feat(mock): llm-katan support
Signed-off-by: FeiDaLI <[email protected]>
1 parent 211b547 commit 6cb1b55

File tree

10 files changed

+64
-84
lines changed

10 files changed

+64
-84
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
repos:
44
# Basic hooks for Go, Rust, Python And JavaScript files only
55
- repo: https://github.com/pre-commit/pre-commit-hooks
6-
rev: v6.0.0
6+
rev: v4.4.0
77
hooks:
88
- id: trailing-whitespace
99
files: \.(go|rs|py|js)$
@@ -71,15 +71,15 @@ repos:
7171

7272
# Python specific hooks
7373
- repo: https://github.com/psf/black
74-
rev: 25.1.0
74+
rev: 22.12.0
7575
hooks:
7676
- id: black
7777
language_version: python3
7878
files: \.py$
7979
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
8080

8181
- repo: https://github.com/PyCQA/isort
82-
rev: 6.0.1
82+
rev: 5.12.0
8383
hooks:
8484
- id: isort
8585
args: ["--profile", "black"]

e2e-tests/06-pii-detection-test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,9 @@ def test_extproc_pii_detection_comprehensive(self):
652652
status = (
653653
"🔒"
654654
if result["is_blocked"]
655-
else "✅" if result["request_allowed"] else "❌"
655+
else "✅"
656+
if result["request_allowed"]
657+
else "❌"
656658
)
657659
print(f" {status} {result['test_case']}")
658660
print(f" Content: {result['content']}")
@@ -671,9 +673,7 @@ def test_extproc_pii_detection_comprehensive(self):
671673
passed=False,
672674
message="⚠️ No clear evidence of ExtProc PII detection in production pipeline",
673675
)
674-
print(
675-
"📝 NOTE: This may indicate PII detection is not active in ExtProc or"
676-
)
676+
print("📝 NOTE: This may indicate PII detection is not active in ExtProc or")
677677
print(" PII policies are configured to allow all content through")
678678

679679
def test_multiple_pii_types_analysis(self):

src/training/classifier_model_fine_tuning/ft_linear.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -508,19 +508,25 @@ def tokenize_function(examples):
508508

509509
# Evaluate on validation set
510510
logger.info("Evaluating on validation set...")
511-
val_accuracy, val_report, val_conf_matrix, val_predictions = (
512-
evaluate_category_classifier(
513-
model, tokenizer, val_texts, val_categories, idx_to_category, device
514-
)
511+
(
512+
val_accuracy,
513+
val_report,
514+
val_conf_matrix,
515+
val_predictions,
516+
) = evaluate_category_classifier(
517+
model, tokenizer, val_texts, val_categories, idx_to_category, device
515518
)
516519
logger.info(f"Validation accuracy: {val_accuracy:.4f}")
517520

518521
# Evaluate on test set
519522
logger.info("Evaluating on test set...")
520-
test_accuracy, test_report, test_conf_matrix, test_predictions = (
521-
evaluate_category_classifier(
522-
model, tokenizer, test_texts, test_categories, idx_to_category, device
523-
)
523+
(
524+
test_accuracy,
525+
test_report,
526+
test_conf_matrix,
527+
test_predictions,
528+
) = evaluate_category_classifier(
529+
model, tokenizer, test_texts, test_categories, idx_to_category, device
524530
)
525531
logger.info(f"Test accuracy: {test_accuracy:.4f}")
526532

src/training/prompt_guard_fine_tuning/jailbreak_bert_finetuning.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@
100100
import torch
101101

102102
# Suppress common non-critical warnings
103-
os.environ["TOKENIZERS_PARALLELISM"] = (
104-
"false" # Suppress tokenizer parallelism warnings
105-
)
103+
os.environ[
104+
"TOKENIZERS_PARALLELISM"
105+
] = "false" # Suppress tokenizer parallelism warnings
106106
warnings.filterwarnings(
107107
"ignore", message=".*TensorFloat32.*"
108108
) # Suppress TF32 performance hints
@@ -2343,19 +2343,25 @@ def tokenize_function(examples):
23432343

23442344
# Evaluate on validation set
23452345
logger.info("Evaluating on validation set...")
2346-
val_accuracy, val_report, val_conf_matrix, val_predictions = (
2347-
evaluate_jailbreak_classifier(
2348-
model, tokenizer, val_texts, val_categories, idx_to_category, device
2349-
)
2346+
(
2347+
val_accuracy,
2348+
val_report,
2349+
val_conf_matrix,
2350+
val_predictions,
2351+
) = evaluate_jailbreak_classifier(
2352+
model, tokenizer, val_texts, val_categories, idx_to_category, device
23502353
)
23512354
logger.info(f"Validation accuracy: {val_accuracy:.4f}")
23522355

23532356
# Evaluate on test set
23542357
logger.info("Evaluating on test set...")
2355-
test_accuracy, test_report, test_conf_matrix, test_predictions = (
2356-
evaluate_jailbreak_classifier(
2357-
model, tokenizer, test_texts, test_categories, idx_to_category, device
2358-
)
2358+
(
2359+
test_accuracy,
2360+
test_report,
2361+
test_conf_matrix,
2362+
test_predictions,
2363+
) = evaluate_jailbreak_classifier(
2364+
model, tokenizer, test_texts, test_categories, idx_to_category, device
23592365
)
23602366
logger.info(f"Test accuracy: {test_accuracy:.4f}")
23612367

src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,7 @@
6969
import torch
7070
import torch.nn as nn
7171
from datasets import Dataset, load_dataset
72-
from peft import (
73-
LoraConfig,
74-
PeftConfig,
75-
PeftModel,
76-
TaskType,
77-
get_peft_model,
78-
)
72+
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
7973
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
8074
from sklearn.model_selection import train_test_split
8175
from transformers import (

src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,7 @@
7070
import torch
7171
import torch.nn as nn
7272
from datasets import Dataset, load_dataset
73-
from peft import (
74-
LoraConfig,
75-
PeftConfig,
76-
PeftModel,
77-
TaskType,
78-
get_peft_model,
79-
)
73+
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
8074
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
8175
from sklearn.model_selection import train_test_split
8276
from transformers import (

src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,7 @@
7777
import torch
7878
import torch.nn as nn
7979
from datasets import Dataset, load_dataset
80-
from peft import (
81-
LoraConfig,
82-
PeftConfig,
83-
PeftModel,
84-
TaskType,
85-
get_peft_model,
86-
)
80+
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
8781
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
8882
from sklearn.model_selection import train_test_split
8983
from transformers import (

tools/llm-katan-server/README.md

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ A FastAPI wrapper around [llm-katan](https://pypi.org/project/llm-katan/) that p
55
## Architecture
66

77
This server acts as a proxy that:
8+
89
1. Receives OpenAI-compatible API requests
910
2. Forwards them to a running `llm-katan` instance
1011
3. Returns the responses with proper model name mapping
@@ -78,24 +79,3 @@ services:
7879
depends_on:
7980
- llm-katan-backend
8081
```
81-
82-
### Testing
83-
84-
```bash
85-
# Health check
86-
curl http://localhost:8000/health
87-
88-
# List models
89-
curl http://localhost:8000/v1/models
90-
91-
# Chat completion (uses real LLM)
92-
curl -X POST http://localhost:8000/v1/chat/completions \
93-
-H "Content-Type: application/json" \
94-
-d '{
95-
"model": "Qwen/Qwen2-0.5B-Instruct",
96-
"messages": [{"role": "user", "content": "Hello!"}],
97-
"max_tokens": 50
98-
}'
99-
```
100-
101-
Intended for local testing with Docker Compose profile `testing`.

tools/llm-katan-server/app.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import math
2-
import time
32
import os
4-
import requests
3+
import time
54
from typing import List, Optional
65

6+
import requests
77
import uvicorn
88
from fastapi import FastAPI, HTTPException
99
from pydantic import BaseModel
@@ -18,7 +18,9 @@
1818
# Check if HuggingFace token is set
1919
hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
2020
if not hf_token:
21-
print("Warning: HUGGINGFACE_HUB_TOKEN not set. Some models may require authentication.")
21+
print(
22+
"Warning: HUGGINGFACE_HUB_TOKEN not set. Some models may require authentication."
23+
)
2224

2325

2426
class ChatMessage(BaseModel):
@@ -49,37 +51,37 @@ async def chat_completions(req: ChatRequest):
4951
# Forward request to llm-katan backend
5052
llm_katan_request = {
5153
"model": MODEL,
52-
"messages": [{"role": msg.role, "content": msg.content} for msg in req.messages],
54+
"messages": [
55+
{"role": msg.role, "content": msg.content} for msg in req.messages
56+
],
5357
"temperature": req.temperature,
5458
}
55-
59+
5660
if req.max_tokens:
5761
llm_katan_request["max_tokens"] = req.max_tokens
58-
62+
5963
# Make request to llm-katan
6064
response = requests.post(
61-
f"{LLM_KATAN_URL}/v1/chat/completions",
62-
json=llm_katan_request,
63-
timeout=30
65+
f"{LLM_KATAN_URL}/v1/chat/completions", json=llm_katan_request, timeout=30
6466
)
65-
67+
6668
if response.status_code != 200:
6769
raise HTTPException(
6870
status_code=response.status_code,
69-
detail=f"LLM Katan error: {response.text}"
71+
detail=f"LLM Katan error: {response.text}",
7072
)
71-
73+
7274
result = response.json()
73-
75+
7476
# Update the model name in response to match our served model name
7577
result["model"] = req.model
76-
78+
7779
return result
78-
80+
7981
except requests.exceptions.RequestException as e:
8082
# Fallback to simple echo behavior if llm-katan is not available
8183
print(f"Warning: LLM Katan not available ({e}), using fallback response")
82-
84+
8385
# Simple echo-like behavior as fallback
8486
last_user = next(
8587
(m.content for m in reversed(req.messages) if m.role == "user"), ""

website/docs/installation/installation.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,14 @@ model_config:
126126
If you don't have a vLLM backend set up, you can use the provided mock services for testing:
127127
128128
**Option 1: Mock vLLM (Simple Echo Service)**
129+
129130
```bash
130131
# Start a simple mock service that echoes back responses
131132
python tools/mock-vllm/app.py
132133
```
133134

134135
**Option 2: LLM Katan Server (Real LLM with Lightweight Backend)**
136+
135137
```bash
136138
# First, start llm-katan backend (requires pip install llm-katan)
137139
llm-katan --model Qwen/Qwen2-0.5B-Instruct --port 8001
@@ -141,6 +143,7 @@ python tools/llm-katan-server/app.py
141143
```
142144

143145
For the mock services, update your `config/config.yaml`:
146+
144147
```yaml
145148
vllm_endpoints:
146149
- name: "mock-endpoint"
@@ -152,6 +155,7 @@ vllm_endpoints:
152155
- "Qwen/Qwen2-0.5B-Instruct" # For llm-katan-server
153156
weight: 1
154157
```
158+
155159
:::
156160
157161
:::note[**Important: Address Format Requirements**]

0 commit comments

Comments
 (0)