Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
repos:
# Basic hooks for Go, Rust, Python And JavaScript files only
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
rev: v4.4.0
hooks:
- id: trailing-whitespace
files: \.(go|rs|py|js)$
Expand Down Expand Up @@ -71,15 +71,15 @@ repos:

# Python specific hooks
- repo: https://github.com/psf/black
rev: 25.1.0
rev: 22.12.0
hooks:
- id: black
language_version: python3
files: \.py$
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)

- repo: https://github.com/PyCQA/isort
rev: 6.0.1
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black"]
Expand Down
4 changes: 2 additions & 2 deletions candle-binding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ impl BertSimilarity {
let mut tokenizer = self.tokenizer.clone();
tokenizer
.with_truncation(Some(TruncationParams {
max_length: max_length.unwrap_or(512),
max_length: max_length.unwrap_or(100000),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changes to the candle binding go to feat-candle-refactoring

strategy: TruncationStrategy::LongestFirst,
stride: 0,
direction: TruncationDirection::Right,
Expand All @@ -517,7 +517,7 @@ impl BertSimilarity {
let mut tokenizer = self.tokenizer.clone();
tokenizer
.with_truncation(Some(TruncationParams {
max_length: max_length.unwrap_or(512),
max_length: max_length.unwrap_or(100000),
strategy: TruncationStrategy::LongestFirst,
stride: 0,
direction: TruncationDirection::Right,
Expand Down
8 changes: 4 additions & 4 deletions e2e-tests/06-pii-detection-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,9 @@ def test_extproc_pii_detection_comprehensive(self):
status = (
"🔒"
if result["is_blocked"]
else "✅" if result["request_allowed"] else "❌"
else "✅"
if result["request_allowed"]
else "❌"
)
print(f" {status} {result['test_case']}")
print(f" Content: {result['content']}")
Expand All @@ -671,9 +673,7 @@ def test_extproc_pii_detection_comprehensive(self):
passed=False,
message="⚠️ No clear evidence of ExtProc PII detection in production pipeline",
)
print(
"📝 NOTE: This may indicate PII detection is not active in ExtProc or"
)
print("📝 NOTE: This may indicate PII detection is not active in ExtProc or")
print(" PII policies are configured to allow all content through")

def test_multiple_pii_types_analysis(self):
Expand Down
22 changes: 14 additions & 8 deletions src/training/classifier_model_fine_tuning/ft_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,19 +508,25 @@ def tokenize_function(examples):

# Evaluate on validation set
logger.info("Evaluating on validation set...")
val_accuracy, val_report, val_conf_matrix, val_predictions = (
evaluate_category_classifier(
model, tokenizer, val_texts, val_categories, idx_to_category, device
)
(
val_accuracy,
val_report,
val_conf_matrix,
val_predictions,
) = evaluate_category_classifier(
model, tokenizer, val_texts, val_categories, idx_to_category, device
)
logger.info(f"Validation accuracy: {val_accuracy:.4f}")

# Evaluate on test set
logger.info("Evaluating on test set...")
test_accuracy, test_report, test_conf_matrix, test_predictions = (
evaluate_category_classifier(
model, tokenizer, test_texts, test_categories, idx_to_category, device
)
(
test_accuracy,
test_report,
test_conf_matrix,
test_predictions,
) = evaluate_category_classifier(
model, tokenizer, test_texts, test_categories, idx_to_category, device
)
logger.info(f"Test accuracy: {test_accuracy:.4f}")

Expand Down
2 changes: 1 addition & 1 deletion src/training/dual_classifier/dual_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
self,
num_categories: int,
model_name: str = "distilbert-base-uncased",
max_length: int = 512,
max_length: int = 100000,
):
super().__init__()

Expand Down
2 changes: 1 addition & 1 deletion src/training/dual_classifier/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
category_labels: List[int],
pii_labels: List[List[int]], # Token-level PII labels
tokenizer,
max_length: int = 512,
max_length: int = 100000,
):
self.texts = texts
self.category_labels = category_labels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@
import torch

# Suppress common non-critical warnings
os.environ["TOKENIZERS_PARALLELISM"] = (
"false" # Suppress tokenizer parallelism warnings
)
os.environ[
"TOKENIZERS_PARALLELISM"
] = "false" # Suppress tokenizer parallelism warnings
warnings.filterwarnings(
"ignore", message=".*TensorFloat32.*"
) # Suppress TF32 performance hints
Expand Down Expand Up @@ -641,7 +641,7 @@ def _analyze_sequence_lengths(self, texts: List[str], tokenizer) -> Dict[str, in
}

def optimize_sequence_length(
self, texts: List[str], tokenizer, default_max_length: int = 512
self, texts: List[str], tokenizer, default_max_length: int = 100000
) -> int:
"""Find optimal sequence length based on dataset characteristics."""
logger.info("Analyzing sequence length distribution...")
Expand Down Expand Up @@ -2343,19 +2343,25 @@ def tokenize_function(examples):

# Evaluate on validation set
logger.info("Evaluating on validation set...")
val_accuracy, val_report, val_conf_matrix, val_predictions = (
evaluate_jailbreak_classifier(
model, tokenizer, val_texts, val_categories, idx_to_category, device
)
(
val_accuracy,
val_report,
val_conf_matrix,
val_predictions,
) = evaluate_jailbreak_classifier(
model, tokenizer, val_texts, val_categories, idx_to_category, device
)
logger.info(f"Validation accuracy: {val_accuracy:.4f}")

# Evaluate on test set
logger.info("Evaluating on test set...")
test_accuracy, test_report, test_conf_matrix, test_predictions = (
evaluate_jailbreak_classifier(
model, tokenizer, test_texts, test_categories, idx_to_category, device
)
(
test_accuracy,
test_report,
test_conf_matrix,
test_predictions,
) = evaluate_jailbreak_classifier(
model, tokenizer, test_texts, test_categories, idx_to_category, device
)
logger.info(f"Test accuracy: {test_accuracy:.4f}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,7 @@
import torch
import torch.nn as nn
from datasets import Dataset, load_dataset
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
TaskType,
get_peft_model,
)
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from transformers import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,7 @@
import torch
import torch.nn as nn
from datasets import Dataset, load_dataset
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
TaskType,
get_peft_model,
)
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from transformers import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,7 @@
import torch
import torch.nn as nn
from datasets import Dataset, load_dataset
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
TaskType,
get_peft_model,
)
from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from transformers import (
Expand Down
21 changes: 21 additions & 0 deletions tools/llm-katan-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.11-slim

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*

COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY app.py ./

EXPOSE 8000

# Environment variables for configuration
ENV MODEL=Qwen/Qwen2-0.5B-Instruct
ENV SERVED_MODEL_NAME=Qwen/Qwen2-0.5B-Instruct
ENV LLM_KATAN_URL=http://localhost:8001

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
81 changes: 81 additions & 0 deletions tools/llm-katan-server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# LLM Katan Server

A FastAPI wrapper around [llm-katan](https://pypi.org/project/llm-katan/) that provides the same API design as mock-vllm but uses real LLM functionality.

## Architecture

This server acts as a proxy that:

1. Receives OpenAI-compatible API requests
2. Forwards them to a running `llm-katan` instance
3. Returns the responses with proper model name mapping
4. Falls back to echo behavior if `llm-katan` is unavailable

## Features

- Same API design as mock-vllm (FastAPI-based)
- Proxies requests to real `llm-katan` backend
- OpenAI-compatible API endpoints:
- GET /health
- GET /v1/models
- POST /v1/chat/completions
- Fallback behavior when backend is unavailable
- Configurable via environment variables

## Environment Variables

- `MODEL`: HuggingFace model name for llm-katan (default: `Qwen/Qwen2-0.5B-Instruct`)
- `SERVED_MODEL_NAME`: Model name to expose in API (default: same as MODEL)
- `LLM_KATAN_URL`: URL of the llm-katan backend (default: `http://localhost:8001`)
- `HUGGINGFACE_HUB_TOKEN`: HuggingFace authentication token

## Setup

### 1. Start llm-katan backend

```bash
# Install llm-katan
pip install llm-katan

# Start llm-katan server on port 8001
llm-katan --model Qwen/Qwen2-0.5B-Instruct --port 8001
```

### 2. Start this FastAPI server

```bash
# Using Docker
docker run -p 8000:8000 llm-katan-server

# Or directly with Python
pip install -r requirements.txt
python app.py
```

## Usage

### Docker Compose (Recommended)

```yaml
services:
llm-katan-backend:
image: python:3.11-slim
command: >
sh -c "pip install llm-katan &&
llm-katan --model Qwen/Qwen2-0.5B-Instruct --port 8001 --host 0.0.0.0"
ports:
- "8001:8001"
environment:
- HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN}

llm-katan-server:
build: .
ports:
- "8000:8000"
environment:
- MODEL=Qwen/Qwen2-0.5B-Instruct
- SERVED_MODEL_NAME=Qwen/Qwen2-0.5B-Instruct
- LLM_KATAN_URL=http://llm-katan-backend:8001
depends_on:
- llm-katan-backend
```
Loading
Loading