generated from guardrails-ai/validator-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathapp.py
More file actions
115 lines (98 loc) · 3.19 KB
/
app.py
File metadata and controls
115 lines (98 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
app = FastAPI()
class InferenceData(BaseModel):
name: str
shape: List[int]
data: List
datatype: str
class InputRequest(BaseModel):
inputs: List[InferenceData]
class OutputResponse(BaseModel):
modelname: str
modelversion: str
outputs: List[InferenceData]
class DetectPII:
model_name = "presidio-pii"
validation_method = "sentence"
def __init__(self):
self.pii_analyzer = AnalyzerEngine()
self.pii_anonymizer = AnonymizerEngine()
def infer(self, text_vals: List[str], entities: List[str]) -> OutputResponse:
outputs = []
for idx, text in enumerate(text_vals):
anonymized_text = self.get_anonymized_text(text, entities)
results = anonymized_text if anonymized_text != text else text
outputs.append(
InferenceData(
name=f"result{idx}",
datatype="BYTES",
shape=[len(results)],
data=[results],
)
)
return OutputResponse(
modelname=DetectPII.model_name,
modelversion="1",
outputs=outputs
)
def get_anonymized_text(self, text: str, entities: List[str]) -> str:
results = self.pii_analyzer.analyze(text=text, entities=entities, language="en")
anonymized_text = self.pii_anonymizer.anonymize(
text=text, analyzer_results=results
).text
return anonymized_text
pii_service = DetectPII()
# Define PII entities map
PII_ENTITIES_MAP = {
"pii": [
"EMAIL_ADDRESS",
"PHONE_NUMBER",
"DOMAIN_NAME",
"IP_ADDRESS",
"DATE_TIME",
"LOCATION",
"PERSON",
"URL",
],
"spi": [
"CREDIT_CARD",
"CRYPTO",
"IBAN_CODE",
"NRP",
"MEDICAL_LICENSE",
"US_BANK_NUMBER",
"US_DRIVER_LICENSE",
"US_ITIN",
"US_PASSPORT",
"US_SSN",
],
}
@app.get("/")
async def hello_world():
return "detect-pii"
@app.post("/validate", response_model=OutputResponse)
async def check_pii(input_request: InputRequest):
text_vals = None
pii_entities = None
for inp in input_request.inputs:
if inp.name == "text":
text_vals = inp.data
elif inp.name == "pii_entities":
pii_entities = inp.data
if text_vals is None or pii_entities is None:
raise HTTPException(status_code=400, detail="Invalid input format")
if isinstance(pii_entities, str):
entities_to_filter = PII_ENTITIES_MAP.get(pii_entities)
if entities_to_filter is None:
raise HTTPException(status_code=400, detail="Invalid PII entity type")
elif isinstance(pii_entities, list):
entities_to_filter = pii_entities
else:
raise HTTPException(status_code=400, detail="Invalid PII entity format")
return pii_service.infer(text_vals, entities_to_filter)
# Run the app with uvicorn
# Save this script as app.py and run with: uvicorn app:app --reload