Skip to content

Commit ec877d5

Browse files
committed
🎉 inital commit of the regex match detector
0 parents  commit ec877d5

15 files changed

+651
-0
lines changed

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Environment
2+
.venv/
3+
4+
# Type checker
5+
.DS_Store
6+
7+
# Byte-compiled / optimized / DLL files
8+
__pycache__/
9+
*.py[cod]
10+
11+
# pyenv environment
12+
.python-version

detectors/Dockerfile.regex

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM registry.access.redhat.com/ubi9/ubi-minimal as base
2+
RUN microdnf update -y && \
3+
microdnf install -y --nodocs \
4+
python-pip python-devel && \
5+
pip install --upgrade --no-cache-dir pip wheel && \
6+
microdnf clean all
7+
RUN pip install --no-cache-dir torch
8+
9+
# FROM icr.io/fm-stack/ubi9-minimal-py39-torch as builder
10+
FROM base as builder
11+
12+
COPY ./common/requirements.txt .
13+
RUN pip install --no-cache-dir -r requirements.txt
14+
15+
COPY ./regex_match/requirements.txt .
16+
RUN pip install --no-cache-dir -r requirements.txt
17+
18+
FROM builder
19+
20+
WORKDIR /app
21+
22+
COPY ./common /common
23+
COPY ./regex_match/app.py /app
24+
COPY ./regex_match/detector.py /app
25+
COPY ./regex_match/scheme.py /app
26+
27+
EXPOSE 8000
28+
CMD ["uvicorn", "app:app", "--workers", "4", "--host", "0.0.0.0", "--port", "8000", "--log-config", "/common/log_conf.yaml"]
29+
30+
# gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000

detectors/common/__init__.py

Whitespace-only changes.

detectors/common/app.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Standard
2+
import os
3+
import ssl
4+
import sys
5+
6+
import uvicorn
7+
import yaml
8+
from fastapi.exceptions import RequestValidationError
9+
from fastapi.responses import JSONResponse
10+
11+
sys.path.insert(0, os.path.abspath(".."))
12+
13+
import logging
14+
15+
from fastapi import FastAPI, status
16+
from starlette.exceptions import HTTPException as StarletteHTTPException
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
app = FastAPI(
22+
title="WxPE Detectors API",
23+
version="0.0.1",
24+
contact={
25+
"name": "Alan Braz",
26+
"url": "http://alanbraz.com.br/en/",
27+
},
28+
dependencies=[],
29+
)
30+
31+
32+
class DetectorBaseAPI(FastAPI):
33+
def __init__(self, *args, **kwargs):
34+
super().__init__(*args, **kwargs)
35+
self.add_exception_handler(
36+
RequestValidationError, self.validation_exception_handler
37+
)
38+
self.add_exception_handler(StarletteHTTPException, self.http_exception_handler)
39+
self.add_api_route("/health", health, description="Check if server is alive")
40+
41+
async def validation_exception_handler(self, request, exc):
42+
errors = exc.errors()
43+
if len(errors) > 0 and errors[0]["type"] == "missing":
44+
return await self.parse_missing_required_parameter_response(request, exc)
45+
elif len(errors) > 0 and errors[0]["type"].endswith("type"):
46+
return await self.parse_invalid_type_parameter_response(request, exc)
47+
else:
48+
# return await request_validation_exception_handler(request, exc)
49+
return await self.parse_generic_validation_response(request, exc)
50+
51+
async def parse_missing_required_parameter_response(self, request, exc):
52+
errors = [
53+
error["loc"][-1] for error in exc.errors() if error["type"] == "missing"
54+
]
55+
message = f"Missing required parameters: {errors}"
56+
return JSONResponse(
57+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
58+
content={
59+
"code": status.HTTP_422_UNPROCESSABLE_ENTITY,
60+
"message": message,
61+
},
62+
)
63+
64+
async def parse_invalid_type_parameter_response(self, request, exc):
65+
errors = [
66+
error["loc"][-1] for error in exc.errors() if error["type"].endswith("type")
67+
]
68+
message = f"Parameters with invalid type: {errors}"
69+
return JSONResponse(
70+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
71+
content={
72+
"code": status.HTTP_422_UNPROCESSABLE_ENTITY,
73+
"message": message,
74+
},
75+
)
76+
77+
async def parse_generic_validation_response(self, request, exc):
78+
errors = [error["loc"][-1] for error in exc.errors()]
79+
message = f"Invalid parameters: {errors}"
80+
return JSONResponse(
81+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
82+
content={
83+
"code": status.HTTP_422_UNPROCESSABLE_ENTITY,
84+
"message": message,
85+
},
86+
)
87+
88+
async def http_exception_handler(self, request, exc):
89+
return JSONResponse(
90+
status_code=exc.status_code,
91+
content={"code": exc.status_code, "message": exc.detail},
92+
)
93+
94+
95+
async def health():
96+
return "ok"
97+
98+
99+
def main(app):
100+
# "loop": "uvloop", (thats default in our setting)
101+
# "backlog": 10000
102+
# "timeout_keep_alive": 30
103+
# limit_concurrency: Maximum number of concurrent connections or tasks to allow, before issuing HTTP 503 responses.
104+
# timeout_keep_alive: Close Keep-Alive connections if no new data is received within this timeout.
105+
config = {
106+
"server": {
107+
"host": "0.0.0.0",
108+
"port": "8000",
109+
"workers": 1,
110+
"limit_concurrency": 1000,
111+
"timeout_keep_alive": 30,
112+
}
113+
}
114+
115+
try:
116+
with open(os.getenv("CONFIG_FILE_PATH", "config.yaml")) as stream:
117+
config = yaml.safe_load(stream)
118+
except FileNotFoundError as fnf:
119+
print(fnf)
120+
except yaml.YAMLError as exc:
121+
print(exc)
122+
123+
for e in os.environ:
124+
if e.startswith("SERVER_"):
125+
print(e)
126+
name = e[len("SERVER_") :].lower()
127+
config["server"][name] = os.getenv(e)
128+
129+
if os.getenv("HOST"):
130+
config["server"]["host"] = os.getenv("HOST")
131+
config["server"]["port"] = int(
132+
os.getenv("PORT") if os.getenv("PORT") else config["server"]["port"]
133+
)
134+
config["server"]["workers"] = (
135+
int(config["server"]["workers"])
136+
if str(config["server"]["workers"])
137+
else config["server"]["workers"]
138+
)
139+
140+
if "ssl_ca_certs" in config["server"]:
141+
config["server"]["ssl_cert_reqs"] = ssl.CERT_REQUIRED
142+
143+
logger.info("server configuration: {0}".format(config["server"]))
144+
145+
try:
146+
uvicorn.run(app, **config["server"])
147+
except Exception as e:
148+
print(e)
149+
sys.exit(1)

detectors/common/log_conf.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
version: 1
2+
disable_existing_loggers: False
3+
formatters:
4+
default:
5+
# "()": uvicorn.logging.DefaultFormatter
6+
format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
7+
access:
8+
# "()": uvicorn.logging.AccessFormatter
9+
format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
10+
handlers:
11+
default:
12+
formatter: default
13+
class: logging.StreamHandler
14+
stream: ext://sys.stderr
15+
access:
16+
formatter: access
17+
class: logging.StreamHandler
18+
stream: ext://sys.stdout
19+
loggers:
20+
uvicorn.error:
21+
level: INFO
22+
handlers:
23+
- default
24+
propagate: no
25+
uvicorn.access:
26+
level: INFO
27+
handlers:
28+
- access
29+
propagate: no
30+
root:
31+
level: DEBUG
32+
handlers:
33+
- default
34+
propagate: no

detectors/common/requirements-dev.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
coverage==7.6.1
2+
locust==2.31.1
3+
pre-commit==3.8.0
4+
pytest==8.3.2
5+
tls-test-tools

detectors/common/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
fastapi==0.112.0
2+
uvicorn==0.30.5
3+
httpx==0.27.0

detectors/common/scheme.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
from enum import Enum
2+
from typing import Dict, List, Optional
3+
4+
from pydantic import BaseModel, Field, RootModel
5+
6+
7+
class RoleEnum(str, Enum):
8+
system = "system"
9+
assistant = "assistant"
10+
user = "user"
11+
12+
13+
class Message(BaseModel):
14+
role: RoleEnum = Field(
15+
description="Who wrote the message: [{}]".format(str(RoleEnum))
16+
)
17+
content: str = Field(description="The text of the message")
18+
19+
20+
class TextDetectionHttpRequest(BaseModel):
21+
text: str = Field(
22+
example="Your email is [email protected]! Only the next instance of email will be processed. [email protected]. Your SSN is 123-45-6789."
23+
)
24+
25+
26+
class TextDetectionResponse(BaseModel):
27+
start: int = Field(example=14)
28+
end: int = Field(example=26)
29+
detection: str = Field(example="Net.EmailAddress")
30+
detection_type: str = Field(example="pii")
31+
score: float = Field(example=0.8)
32+
33+
34+
class GenerationDetectionHttpRequest(BaseModel):
35+
user_input: str = Field(
36+
example="This is my amazing prompt",
37+
description="prompt is the user input to the LLM",
38+
)
39+
generated_response: str = Field(
40+
example="Some text generated by an LLM",
41+
description="Generated response from the LLM",
42+
)
43+
44+
45+
class GenerationDetectionResponse(BaseModel):
46+
detection_type: str = Field(example="faithfulness")
47+
score: float = Field(example=0.5)
48+
49+
50+
class ChatDetectionHttpRequest(BaseModel):
51+
chat_history: List[Message] = Field(
52+
example=[
53+
Message(role=RoleEnum.system, content="You are a helpful assistant."),
54+
Message(role="user", content="Hi, is this powered by siri or alexa?"),
55+
Message(role=RoleEnum.assistant, content="Better, it's watsonx"),
56+
Message(role="user", content="This is awesome!"),
57+
]
58+
)
59+
60+
61+
class ContextBasedDetectionHttpRequest(GenerationDetectionHttpRequest):
62+
context_chunks: List[str] = Field(example=["One paragraph", "Long text of content"])
63+
documents_public_urls: Optional[List[str]] = Field(
64+
description="URLs of the content to be used",
65+
example=["https://en.wikipedia.org/wiki/IBM", "https://research.ibm.com/"],
66+
)
67+
68+
69+
class AttributionBasedDetectionResponse(GenerationDetectionResponse):
70+
evidences: List[str] = Field(
71+
example=["Only the next instance of email will be processed"],
72+
)
73+
74+
75+
# former to be able to keep testing
76+
class DetectionHttpRequest(TextDetectionHttpRequest):
77+
parameters: Dict = Field(example={"threshold": 0.8})
78+
79+
80+
class DetectionResponseSpan(TextDetectionResponse):
81+
text: str = Field(example="my bad text")
82+
83+
84+
class DetectionResponse(BaseModel):
85+
detections: List[DetectionResponseSpan]
86+
87+
88+
class Evidence(BaseModel):
89+
source: str = Field(
90+
title="Source",
91+
example="https://en.wikipedia.org/wiki/IBM",
92+
description="Source of the evidence, it can be url of the evidence etc",
93+
)
94+
95+
96+
class EvidenceType(str, Enum):
97+
url = "url"
98+
title = "title"
99+
100+
101+
class EvidenceObj(BaseModel):
102+
type: EvidenceType = Field(
103+
title="EvidenceType",
104+
example="url",
105+
description="Type field signifying the type of evidence provided. Example url, title etc",
106+
)
107+
evidence: Evidence = Field(
108+
description="Evidence object, currently only containing source, but in future can contain other optional arguments like id, etc",
109+
)
110+
111+
112+
class ContentAnalysisHttpRequest(BaseModel):
113+
contents: List[str] = Field(
114+
min_length=1,
115+
title="Contents",
116+
description="Field allowing users to provide list of texts for analysis. Note, results of this endpoint will contain analysis / detection of each of the provided text in the order they are present in the contents object.",
117+
example=[
118+
"Your email is [email protected]! Only the next instance of email will be processed. [email protected]. Your SSN is 123-45-6789."
119+
],
120+
)
121+
122+
123+
class ContentAnalysisResponse(BaseModel):
124+
start: int = Field(example=14)
125+
end: int = Field(example=26)
126+
detection: str = Field(example="Net.EmailAddress")
127+
detection_type: str = Field(example="pii")
128+
score: float = Field(example=0.8)
129+
evidences: Optional[List[EvidenceObj]] = Field(
130+
description="Optional field providing evidences for the provided detection",
131+
default=None,
132+
)
133+
134+
135+
class ContentsAnalysisResponse(RootModel):
136+
root: List[List[ContentAnalysisResponse]] = Field(
137+
title="Response Text Content Analysis Unary Handler Api V1 Text Content Post"
138+
)
139+
140+
141+
class Error(BaseModel):
142+
code: int
143+
message: str

0 commit comments

Comments
 (0)