Skip to content

Commit 55b4265

Browse files
authored
fix(classifiers): add PII NER allowlist and document metal feature for macOS (#2541)
Add configurable pii_ner_allowlist to ClassifiersConfig that prevents tokens matching an allowlist entry (case-insensitive) from being redacted by the piiranha NER model. Suppresses common false positives such as "Zeph" being misclassified as [PII:CITY] by piiranha-v1. Default allowlist entries: ["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]. Configurable via [classifiers] pii_ner_allowlist in config.toml. Set to [] to disable the allowlist entirely. Also document that on macOS Apple Silicon, --features full,metal is required for piiranha NER GPU acceleration. Without metal, the 1.1 GB model times out after 30s on CPU and falls back to regex-only detection. Closes #2537, closes #2538
1 parent fd43f00 commit 55b4265

File tree

5 files changed

+253
-2
lines changed

5 files changed

+253
-2
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
1717

1818
### Fixed
1919

20+
- fix(classifiers): add configurable `pii_ner_allowlist` to `ClassifiersConfig` — tokens matching an allowlist entry (case-insensitive) are never redacted by the piiranha NER model, suppressing false positives such as "Zeph" → `[PII:CITY]`; default entries: `["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]`; list is empty-able via config to disable the feature (closes #2537)
21+
- fix(classifiers): document that macOS Apple Silicon requires `--features full,metal` for piiranha NER GPU acceleration; without `metal`, the 1.1 GB model exceeds the 30s timeout on CPU and falls back to regex-only PII detection (closes #2538)
22+
2023
- fix(tools): propagate `claim_source` from `ToolOutput` into the post-execution audit entry in `AdversarialPolicyGateExecutor`; `write_audit` now accepts an explicit `claim_source` parameter so the field is no longer hardcoded to `None` for successful executions (closes #2535)
2124
- fix(tools): `extract_paths` now detects relative path tokens that contain `/` but do not start with `/` or `./` (e.g. `src/main.rs`, `.local/foo/bar`); URL schemes (`://`) and shell variable assignments (`KEY=value`) are excluded from matching (closes #2536)
2225

crates/zeph-config/src/classifiers.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ fn default_pii_ner_max_chars() -> usize {
3535
8192
3636
}
3737

38+
fn default_pii_ner_allowlist() -> Vec<String> {
39+
vec![
40+
"Zeph".into(),
41+
"Rust".into(),
42+
"OpenAI".into(),
43+
"Ollama".into(),
44+
"Claude".into(),
45+
]
46+
}
47+
3848
fn default_three_class_threshold() -> f32 {
3949
0.7
4050
}
@@ -203,6 +213,18 @@ pub struct ClassifiersConfig {
203213
/// timeout on large tool outputs (e.g. `search_code`). Default `8192`.
204214
#[serde(default = "default_pii_ner_max_chars")]
205215
pub pii_ner_max_chars: usize,
216+
217+
/// Allowlist of tokens that are never redacted by the NER PII classifier, regardless
218+
/// of model confidence.
219+
///
220+
/// Matching is case-insensitive and exact (whole span text must equal an allowlist entry).
221+
/// This suppresses common false positives from the piiranha model — for example,
222+
/// "Zeph" is misclassified as a city (PII:CITY) by the base model.
223+
///
224+
/// Default entries: `["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]`.
225+
/// Set to `[]` to disable the allowlist entirely.
226+
#[serde(default = "default_pii_ner_allowlist")]
227+
pub pii_ner_allowlist: Vec<String>,
206228
}
207229

208230
impl Default for ClassifiersConfig {
@@ -225,6 +247,7 @@ impl Default for ClassifiersConfig {
225247
pii_threshold: default_pii_threshold(),
226248
pii_model_sha256: None,
227249
pii_ner_max_chars: default_pii_ner_max_chars(),
250+
pii_ner_allowlist: default_pii_ner_allowlist(),
228251
}
229252
}
230253
}
@@ -258,6 +281,10 @@ mod tests {
258281
);
259282
assert!((cfg.pii_threshold - 0.75).abs() < 1e-6);
260283
assert!(cfg.pii_model_sha256.is_none());
284+
assert_eq!(
285+
cfg.pii_ner_allowlist,
286+
vec!["Zeph", "Rust", "OpenAI", "Ollama", "Claude"]
287+
);
261288
}
262289

263290
#[test]
@@ -337,6 +364,7 @@ mod tests {
337364
pii_threshold: 0.80,
338365
pii_model_sha256: None,
339366
pii_ner_max_chars: 4096,
367+
pii_ner_allowlist: vec!["MyProject".into(), "Rust".into()],
340368
};
341369
let serialized = toml::to_string(&original).unwrap();
342370
let deserialized: ClassifiersConfig = toml::from_str(&serialized).unwrap();
@@ -429,6 +457,30 @@ mod tests {
429457
assert_eq!(cfg.three_class_model_sha256.as_deref(), Some("aabbcc"));
430458
}
431459

460+
#[test]
461+
fn pii_ner_allowlist_default_entries() {
462+
let cfg = ClassifiersConfig::default();
463+
assert!(cfg.pii_ner_allowlist.contains(&"Zeph".to_owned()));
464+
assert!(cfg.pii_ner_allowlist.contains(&"Rust".to_owned()));
465+
assert!(cfg.pii_ner_allowlist.contains(&"OpenAI".to_owned()));
466+
assert!(cfg.pii_ner_allowlist.contains(&"Ollama".to_owned()));
467+
assert!(cfg.pii_ner_allowlist.contains(&"Claude".to_owned()));
468+
}
469+
470+
#[test]
471+
fn pii_ner_allowlist_configurable() {
472+
let toml = r#"pii_ner_allowlist = ["MyProject", "AcmeCorp"]"#;
473+
let cfg: ClassifiersConfig = toml::from_str(toml).unwrap();
474+
assert_eq!(cfg.pii_ner_allowlist, vec!["MyProject", "AcmeCorp"]);
475+
}
476+
477+
#[test]
478+
fn pii_ner_allowlist_empty_disables() {
479+
let toml = "pii_ner_allowlist = []";
480+
let cfg: ClassifiersConfig = toml::from_str(toml).unwrap();
481+
assert!(cfg.pii_ner_allowlist.is_empty());
482+
}
483+
432484
#[test]
433485
fn three_class_threshold_validation_rejects_zero() {
434486
let result: Result<ClassifiersConfig, _> = toml::from_str("three_class_threshold = 0.0");

crates/zeph-core/src/agent/builder.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,23 @@ impl<C: Channel> Agent<C> {
850850
self
851851
}
852852

853+
/// Set the NER PII allowlist on the sanitizer.
854+
///
855+
/// Span texts matching any allowlist entry (case-insensitive, exact) are suppressed
856+
/// from `detect_pii()` results. Must be called after `with_pii_detector`.
857+
#[cfg(feature = "classifiers")]
858+
#[must_use]
859+
pub fn with_pii_ner_allowlist(mut self, entries: Vec<String>) -> Self {
860+
let old = std::mem::replace(
861+
&mut self.security.sanitizer,
862+
zeph_sanitizer::ContentSanitizer::new(
863+
&zeph_sanitizer::ContentIsolationConfig::default(),
864+
),
865+
);
866+
self.security.sanitizer = old.with_pii_ner_allowlist(entries);
867+
self
868+
}
869+
853870
/// Attach a NER classifier backend for PII detection in the union merge pipeline.
854871
///
855872
/// When attached, `sanitize_tool_output()` runs both regex and NER, merges spans, and

crates/zeph-sanitizer/src/lib.rs

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ pub struct ContentSanitizer {
328328
pii_detector: Option<std::sync::Arc<dyn zeph_llm::classifier::PiiDetector>>,
329329
#[cfg(feature = "classifiers")]
330330
pii_threshold: f32,
331+
/// Case-folded allowlist — spans whose text (case-insensitive) matches an entry are
332+
/// suppressed before the result is returned from `detect_pii()`.
333+
#[cfg(feature = "classifiers")]
334+
pii_ner_allowlist: Vec<String>,
331335
#[cfg(feature = "classifiers")]
332336
classifier_metrics: Option<std::sync::Arc<zeph_llm::ClassifierMetrics>>,
333337
}
@@ -364,6 +368,8 @@ impl ContentSanitizer {
364368
#[cfg(feature = "classifiers")]
365369
pii_threshold: 0.75,
366370
#[cfg(feature = "classifiers")]
371+
pii_ner_allowlist: Vec::new(),
372+
#[cfg(feature = "classifiers")]
367373
classifier_metrics: None,
368374
}
369375
}
@@ -468,6 +474,20 @@ impl ContentSanitizer {
468474
self
469475
}
470476

477+
/// Set the NER PII allowlist.
478+
///
479+
/// Span texts that match any entry (case-insensitive, exact match) are suppressed
480+
/// from the `detect_pii()` result. Use this to suppress known false positives such
481+
/// as project names misclassified by the base NER model.
482+
///
483+
/// Entries are stored case-folded at construction time for fast lookup.
484+
#[cfg(feature = "classifiers")]
485+
#[must_use]
486+
pub fn with_pii_ner_allowlist(mut self, entries: Vec<String>) -> Self {
487+
self.pii_ner_allowlist = entries.into_iter().map(|s| s.to_lowercase()).collect();
488+
self
489+
}
490+
471491
/// Attach a [`ClassifierMetrics`] instance to record injection and PII latencies.
472492
#[cfg(feature = "classifiers")]
473493
#[must_use]
@@ -483,6 +503,10 @@ impl ContentSanitizer {
483503
///
484504
/// Returns an empty result when no `pii_detector` is attached.
485505
///
506+
/// Spans whose extracted text matches an allowlist entry (case-insensitive, exact match)
507+
/// are removed before returning. This suppresses common false positives from the
508+
/// piiranha model (e.g. "Zeph" being misclassified as a city).
509+
///
486510
/// # Errors
487511
///
488512
/// Returns `LlmError` if the underlying model fails.
@@ -494,10 +518,21 @@ impl ContentSanitizer {
494518
match &self.pii_detector {
495519
Some(detector) => {
496520
let t0 = std::time::Instant::now();
497-
let result = detector.detect_pii(text).await?;
521+
let mut result = detector.detect_pii(text).await?;
498522
if let Some(ref m) = self.classifier_metrics {
499523
m.record(zeph_llm::classifier::ClassifierTask::Pii, t0.elapsed());
500524
}
525+
if !self.pii_ner_allowlist.is_empty() {
526+
result.spans.retain(|span| {
527+
let span_text = text
528+
.get(span.start..span.end)
529+
.unwrap_or("")
530+
.trim()
531+
.to_lowercase();
532+
!self.pii_ner_allowlist.contains(&span_text)
533+
});
534+
result.has_pii = !result.spans.is_empty();
535+
}
501536
Ok(result)
502537
}
503538
None => Ok(zeph_llm::classifier::PiiResult {
@@ -2215,4 +2250,142 @@ mod tests {
22152250
);
22162251
}
22172252
}
2253+
2254+
// --- pii_ner_allowlist filtering ---
2255+
2256+
#[cfg(feature = "classifiers")]
2257+
mod pii_allowlist {
2258+
use super::*;
2259+
use std::future::Future;
2260+
use std::pin::Pin;
2261+
use std::sync::Arc;
2262+
use zeph_llm::classifier::{PiiDetector, PiiResult, PiiSpan};
2263+
2264+
struct MockPiiDetector {
2265+
result: PiiResult,
2266+
}
2267+
2268+
impl MockPiiDetector {
2269+
fn new(spans: Vec<PiiSpan>) -> Self {
2270+
let has_pii = !spans.is_empty();
2271+
Self {
2272+
result: PiiResult { spans, has_pii },
2273+
}
2274+
}
2275+
}
2276+
2277+
impl PiiDetector for MockPiiDetector {
2278+
fn detect_pii<'a>(
2279+
&'a self,
2280+
_text: &'a str,
2281+
) -> Pin<Box<dyn Future<Output = Result<PiiResult, zeph_llm::LlmError>> + Send + 'a>>
2282+
{
2283+
let result = self.result.clone();
2284+
Box::pin(async move { Ok(result) })
2285+
}
2286+
2287+
fn backend_name(&self) -> &'static str {
2288+
"mock"
2289+
}
2290+
}
2291+
2292+
fn span(start: usize, end: usize) -> PiiSpan {
2293+
PiiSpan {
2294+
entity_type: "CITY".to_owned(),
2295+
start,
2296+
end,
2297+
score: 0.99,
2298+
}
2299+
}
2300+
2301+
// T-A1: allowlist entry filtered from detect_pii result.
2302+
#[tokio::test]
2303+
async fn allowlist_entry_is_filtered() {
2304+
// "Zeph" occupies bytes 6..10 in "Hello Zeph"
2305+
let text = "Hello Zeph";
2306+
let mock = Arc::new(MockPiiDetector::new(vec![span(6, 10)]));
2307+
let s = ContentSanitizer::new(&ContentIsolationConfig::default())
2308+
.with_pii_detector(mock, 0.5)
2309+
.with_pii_ner_allowlist(vec!["Zeph".to_owned()]);
2310+
let result = s.detect_pii(text).await.expect("detect_pii failed");
2311+
assert!(result.spans.is_empty());
2312+
assert!(!result.has_pii);
2313+
}
2314+
2315+
// T-A2: matching is case-insensitive ("zeph" in allowlist filters span "Zeph").
2316+
#[tokio::test]
2317+
async fn allowlist_is_case_insensitive() {
2318+
let text = "Hello Zeph";
2319+
let mock = Arc::new(MockPiiDetector::new(vec![span(6, 10)]));
2320+
let s = ContentSanitizer::new(&ContentIsolationConfig::default())
2321+
.with_pii_detector(mock, 0.5)
2322+
.with_pii_ner_allowlist(vec!["zeph".to_owned()]);
2323+
let result = s.detect_pii(text).await.expect("detect_pii failed");
2324+
assert!(result.spans.is_empty());
2325+
assert!(!result.has_pii);
2326+
}
2327+
2328+
// T-A3: non-allowlist span preserved when another span is filtered.
2329+
#[tokio::test]
2330+
async fn non_allowlist_span_preserved() {
2331+
// text: "Zeph john.doe@example.com"
2332+
// 0123456789...
2333+
let text = "Zeph john.doe@example.com";
2334+
let city_span = span(0, 4);
2335+
let email_span = PiiSpan {
2336+
entity_type: "EMAIL".to_owned(),
2337+
start: 5,
2338+
end: 25,
2339+
score: 0.99,
2340+
};
2341+
let mock = Arc::new(MockPiiDetector::new(vec![city_span, email_span]));
2342+
let s = ContentSanitizer::new(&ContentIsolationConfig::default())
2343+
.with_pii_detector(mock, 0.5)
2344+
.with_pii_ner_allowlist(vec!["Zeph".to_owned()]);
2345+
let result = s.detect_pii(text).await.expect("detect_pii failed");
2346+
assert_eq!(result.spans.len(), 1);
2347+
assert_eq!(result.spans[0].entity_type, "EMAIL");
2348+
assert!(result.has_pii);
2349+
}
2350+
2351+
// T-A4: empty allowlist passes all spans through (is_empty() guard is respected).
2352+
#[tokio::test]
2353+
async fn empty_allowlist_passes_all_spans() {
2354+
let text = "Hello Zeph";
2355+
let mock = Arc::new(MockPiiDetector::new(vec![span(6, 10)]));
2356+
let s = ContentSanitizer::new(&ContentIsolationConfig::default())
2357+
.with_pii_detector(mock, 0.5)
2358+
.with_pii_ner_allowlist(vec![]);
2359+
let result = s.detect_pii(text).await.expect("detect_pii failed");
2360+
assert_eq!(result.spans.len(), 1);
2361+
assert!(result.has_pii);
2362+
}
2363+
2364+
// T-A5: no pii_detector attached returns empty PiiResult.
2365+
#[tokio::test]
2366+
async fn no_pii_detector_returns_empty() {
2367+
let s = ContentSanitizer::new(&ContentIsolationConfig::default());
2368+
let result = s
2369+
.detect_pii("sensitive text")
2370+
.await
2371+
.expect("detect_pii failed");
2372+
assert!(result.spans.is_empty());
2373+
assert!(!result.has_pii);
2374+
}
2375+
2376+
// T-A6: has_pii recalculated to false when all spans are filtered.
2377+
#[tokio::test]
2378+
async fn has_pii_recalculated_after_all_spans_filtered() {
2379+
let text = "Zeph Rust";
2380+
// Two spans, both matching allowlist entries.
2381+
let spans = vec![span(0, 4), span(5, 9)];
2382+
let mock = Arc::new(MockPiiDetector::new(spans));
2383+
let s = ContentSanitizer::new(&ContentIsolationConfig::default())
2384+
.with_pii_detector(mock, 0.5)
2385+
.with_pii_ner_allowlist(vec!["Zeph".to_owned(), "Rust".to_owned()]);
2386+
let result = s.detect_pii(text).await.expect("detect_pii failed");
2387+
assert!(result.spans.is_empty());
2388+
assert!(!result.has_pii);
2389+
}
2390+
}
22182391
}

src/agent_setup.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,9 +642,15 @@ pub(crate) fn apply_pii_classifier_with_cfg<C: Channel>(
642642
tracing::info!(
643643
repo_id = %classifiers.pii_model,
644644
threshold = classifiers.pii_threshold,
645+
allowlist_len = classifiers.pii_ner_allowlist.len(),
645646
"PII classifier attached (model loads lazily on first use)"
646647
);
647-
agent.with_pii_detector(backend_arc, classifiers.pii_threshold)
648+
let agent = agent.with_pii_detector(backend_arc, classifiers.pii_threshold);
649+
if classifiers.pii_ner_allowlist.is_empty() {
650+
agent
651+
} else {
652+
agent.with_pii_ner_allowlist(classifiers.pii_ner_allowlist.clone())
653+
}
648654
}
649655

650656
/// Wire the `CandleNerClassifier` into the PII union merge pipeline.

0 commit comments

Comments
 (0)