|
23 | 23 |
|
24 | 24 | import json
|
25 | 25 | import sys
|
| 26 | +import time |
26 | 27 | import unittest
|
27 | 28 | import uuid
|
28 | 29 |
|
@@ -467,6 +468,214 @@ def test_extproc_pii_filtering(self):
|
467 | 468 | message="✅ Request blocked: ExtProc may have PII filtering enabled",
|
468 | 469 | )
|
469 | 470 |
|
| 471 | + def test_extproc_pii_detection_comprehensive(self): |
| 472 | + """ |
| 473 | + TEST 3.5: ExtProc PII Detection - Comprehensive Production Pipeline Test |
| 474 | +
|
| 475 | + WHAT IS TESTED: Full production pipeline PII detection capabilities |
| 476 | + PIPELINE: client → envoy → extProc → semantic-router → model routing |
| 477 | + EXPECTED RESULT: ⚠️ VALIDATION - Tests actual PII detection in production flow |
| 478 | +
|
| 479 | + This tests the MAIN USE CASE: whether ExtProc actually detects PII and enforces policies |
| 480 | + in the production request pipeline, not just whether responses leak PII. |
| 481 | + """ |
| 482 | + self.print_test_header( |
| 483 | + "TEST 3.5: ExtProc PII Detection - Comprehensive Production Pipeline", |
| 484 | + "Expected: ⚠️ VALIDATION - Tests if ExtProc actually detects PII in production flow", |
| 485 | + ) |
| 486 | + |
| 487 | + # Generate unique PII test cases |
| 488 | + pii_cases, safe_cases = generate_unique_test_cases() |
| 489 | + |
| 490 | + # Test 1: High-value PII that should trigger policy enforcement |
| 491 | + high_value_pii = pii_cases[0]["text"] # SSN case |
| 492 | + |
| 493 | + # Test 2: Safe content that should pass through |
| 494 | + safe_content = safe_cases[0]["text"] # Math problem |
| 495 | + |
| 496 | + test_cases = [ |
| 497 | + { |
| 498 | + "name": "High-Value PII Content", |
| 499 | + "content": high_value_pii, |
| 500 | + "expected_pii": True, |
| 501 | + "expected_action": "May block or route to PII-safe model", |
| 502 | + }, |
| 503 | + { |
| 504 | + "name": "Safe Content", |
| 505 | + "content": safe_content, |
| 506 | + "expected_pii": False, |
| 507 | + "expected_action": "Should route normally", |
| 508 | + }, |
| 509 | + ] |
| 510 | + |
| 511 | + results = [] |
| 512 | + |
| 513 | + for test_case in test_cases: |
| 514 | + print(f"\n🔍 Testing: {test_case['name']}") |
| 515 | + print(f"Content: {test_case['content'][:60]}...") |
| 516 | + |
| 517 | + # Send request through full ExtProc pipeline |
| 518 | + payload = { |
| 519 | + "model": "auto", |
| 520 | + "messages": [{"role": "user", "content": test_case["content"]}], |
| 521 | + "temperature": 0.1, |
| 522 | + } |
| 523 | + |
| 524 | + # Monitor for specific behaviors that indicate PII detection |
| 525 | + start_time = time.time() |
| 526 | + |
| 527 | + response = requests.post( |
| 528 | + f"{ENVOY_URL}{OPENAI_ENDPOINT}", |
| 529 | + headers={"Content-Type": "application/json"}, |
| 530 | + json=payload, |
| 531 | + timeout=60, |
| 532 | + ) |
| 533 | + |
| 534 | + processing_time = (time.time() - start_time) * 1000 |
| 535 | + |
| 536 | + # Analyze response for PII detection indicators |
| 537 | + request_allowed = response.status_code == 200 |
| 538 | + |
| 539 | + # Check for PII policy violation response |
| 540 | + is_pii_blocked = response.status_code == 403 or response.status_code == 400 |
| 541 | + |
| 542 | + # Analyze response content |
| 543 | + response_content = "" |
| 544 | + model_used = "unknown" |
| 545 | + if request_allowed: |
| 546 | + try: |
| 547 | + response_json = response.json() |
| 548 | + if "choices" in response_json: |
| 549 | + response_content = response_json["choices"][0]["message"][ |
| 550 | + "content" |
| 551 | + ] |
| 552 | + model_used = response_json.get("model", "unknown") |
| 553 | + except: |
| 554 | + response_content = "Could not parse response" |
| 555 | + |
| 556 | + # Check for PII-related routing decisions |
| 557 | + # ExtProc may route PII content to specific models or add safety measures |
| 558 | + pii_safe_routing = model_used in [ |
| 559 | + "Model-A", |
| 560 | + "safe-model", |
| 561 | + "pii-compliant-model", |
| 562 | + ] |
| 563 | + |
| 564 | + # Check response quality/safety indicators |
| 565 | + response_looks_safe = ( |
| 566 | + len(response_content) > 50 |
| 567 | + and "sorry" not in response_content.lower() |
| 568 | + and "can't help" not in response_content.lower() |
| 569 | + ) |
| 570 | + |
| 571 | + # Analyze processing time (PII detection may add latency) |
| 572 | + baseline_time = 1000 # ms |
| 573 | + has_extra_processing = processing_time > baseline_time * 1.5 |
| 574 | + |
| 575 | + result = { |
| 576 | + "test_case": test_case["name"], |
| 577 | + "content": test_case["content"][:50] + "...", |
| 578 | + "expected_pii": test_case["expected_pii"], |
| 579 | + "request_allowed": request_allowed, |
| 580 | + "is_blocked": is_pii_blocked, |
| 581 | + "model_used": model_used, |
| 582 | + "processing_time_ms": processing_time, |
| 583 | + "has_extra_processing": has_extra_processing, |
| 584 | + "response_safe": response_looks_safe, |
| 585 | + "response_preview": ( |
| 586 | + response_content[:100] + "..." if response_content else "No content" |
| 587 | + ), |
| 588 | + } |
| 589 | + |
| 590 | + results.append(result) |
| 591 | + |
| 592 | + print(f" 📊 Results:") |
| 593 | + print(f" Request Status: {response.status_code}") |
| 594 | + print(f" Model Used: {model_used}") |
| 595 | + print(f" Processing Time: {processing_time:.1f}ms") |
| 596 | + print(f" Expected PII: {test_case['expected_pii']}") |
| 597 | + print(f" Blocked: {is_pii_blocked}") |
| 598 | + print(f" Extra Processing: {has_extra_processing}") |
| 599 | + |
| 600 | + # Analyze results for PII detection patterns |
| 601 | + pii_test = results[0] # High-value PII test |
| 602 | + safe_test = results[1] # Safe content test |
| 603 | + |
| 604 | + # Check for differential behavior between PII and safe content |
| 605 | + differential_routing = pii_test["model_used"] != safe_test["model_used"] |
| 606 | + differential_processing = ( |
| 607 | + abs(pii_test["processing_time_ms"] - safe_test["processing_time_ms"]) > 200 |
| 608 | + ) |
| 609 | + differential_blocking = pii_test["is_blocked"] != safe_test["is_blocked"] |
| 610 | + |
| 611 | + # Overall PII detection indicators |
| 612 | + pii_detection_indicators = [] |
| 613 | + if pii_test["is_blocked"]: |
| 614 | + pii_detection_indicators.append("Request blocking") |
| 615 | + if differential_routing: |
| 616 | + pii_detection_indicators.append("Model routing differences") |
| 617 | + if differential_processing: |
| 618 | + pii_detection_indicators.append("Processing time differences") |
| 619 | + if not pii_test["response_safe"] and safe_test["response_safe"]: |
| 620 | + pii_detection_indicators.append("Response quality differences") |
| 621 | + |
| 622 | + # Final assessment |
| 623 | + pii_detection_evidence = len(pii_detection_indicators) > 0 |
| 624 | + |
| 625 | + self.print_response_info( |
| 626 | + response, # Use last response for HTTP details |
| 627 | + { |
| 628 | + "Test Cases": len(test_cases), |
| 629 | + "PII Detection Evidence": ( |
| 630 | + "✅ YES" if pii_detection_evidence else "❌ NO" |
| 631 | + ), |
| 632 | + "Detection Indicators": ( |
| 633 | + ", ".join(pii_detection_indicators) |
| 634 | + if pii_detection_indicators |
| 635 | + else "None found" |
| 636 | + ), |
| 637 | + "PII Content Model": pii_test["model_used"], |
| 638 | + "Safe Content Model": safe_test["model_used"], |
| 639 | + "Differential Routing": "✅ YES" if differential_routing else "❌ NO", |
| 640 | + "PII Request Blocked": "✅ YES" if pii_test["is_blocked"] else "❌ NO", |
| 641 | + "Overall Assessment": ( |
| 642 | + "✅ PII DETECTION ACTIVE" |
| 643 | + if pii_detection_evidence |
| 644 | + else "⚠️ NO CLEAR PII DETECTION" |
| 645 | + ), |
| 646 | + }, |
| 647 | + ) |
| 648 | + |
| 649 | + # Print detailed analysis |
| 650 | + print(f"\n📋 Detailed ExtProc PII Analysis:") |
| 651 | + for result in results: |
| 652 | + status = ( |
| 653 | + "🔒" |
| 654 | + if result["is_blocked"] |
| 655 | + else "✅" if result["request_allowed"] else "❌" |
| 656 | + ) |
| 657 | + print(f" {status} {result['test_case']}") |
| 658 | + print(f" Content: {result['content']}") |
| 659 | + print( |
| 660 | + f" Model: {result['model_used']}, Time: {result['processing_time_ms']:.1f}ms" |
| 661 | + ) |
| 662 | + print(f" Status: {'Blocked' if result['is_blocked'] else 'Allowed'}") |
| 663 | + |
| 664 | + if pii_detection_evidence: |
| 665 | + self.print_test_result( |
| 666 | + passed=True, |
| 667 | + message=f"✅ ExtProc PII detection evidence found: {', '.join(pii_detection_indicators)}", |
| 668 | + ) |
| 669 | + else: |
| 670 | + self.print_test_result( |
| 671 | + passed=False, |
| 672 | + message="⚠️ No clear evidence of ExtProc PII detection in production pipeline", |
| 673 | + ) |
| 674 | + print( |
| 675 | + "📝 NOTE: This may indicate PII detection is not active in ExtProc or" |
| 676 | + ) |
| 677 | + print(" PII policies are configured to allow all content through") |
| 678 | + |
470 | 679 | def test_multiple_pii_types_analysis(self):
|
471 | 680 | """
|
472 | 681 | TEST 4: Multiple PII Types Pattern Analysis
|
|
0 commit comments