Skip to content

Commit e4223c5

Browse files
authored
Merge pull request #1 from trustyai-explainability/text_generation_detection
Text generation detection
2 parents 81cda06 + 50540d1 commit e4223c5

File tree

15 files changed

+676
-8
lines changed

15 files changed

+676
-8
lines changed

detectors/Dockerfile.hf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
FROM registry.access.redhat.com/ubi9/ubi-minimal as base
2+
RUN microdnf update -y && \
3+
microdnf install -y --nodocs \
4+
python-pip python-devel && \
5+
pip install --upgrade --no-cache-dir pip wheel && \
6+
microdnf clean all
7+
RUN pip install --no-cache-dir torch
8+
9+
# FROM icr.io/fm-stack/ubi9-minimal-py39-torch as builder
10+
FROM base as builder
11+
12+
COPY ./common/requirements.txt .
13+
RUN pip install --no-cache-dir -r requirements.txt
14+
15+
COPY ./huggingface/requirements.txt .
16+
RUN pip install --no-cache-dir -r requirements.txt
17+
18+
FROM builder
19+
20+
WORKDIR /app
21+
ARG CACHEBUST=1
22+
RUN echo "$CACHEBUST"
23+
COPY ./common /common
24+
COPY ./huggingface/app.py /app
25+
COPY ./huggingface/detector.py /app
26+
COPY ./huggingface/scheme.py /app
27+
28+
EXPOSE 8000
29+
CMD ["uvicorn", "app:app" "--workers", "4", "--host", "0.0.0.0", "--port", "8000", "--log-config", "/common/log_conf.yaml"]
30+
31+
# gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000

detectors/common/app.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414

1515
from fastapi import FastAPI, status
1616
from starlette.exceptions import HTTPException as StarletteHTTPException
17+
from prometheus_fastapi_instrumentator import Instrumentator
1718

1819
logger = logging.getLogger(__name__)
19-
20+
uvicorn_error_logger = logging.getLogger("uvicorn.error")
21+
uvicorn_error_logger.name = "uvicorn"
2022

2123
app = FastAPI(
2224
title="WxPE Detectors API",
@@ -38,6 +40,7 @@ def __init__(self, *args, **kwargs):
3840
self.add_exception_handler(StarletteHTTPException, self.http_exception_handler)
3941
self.add_api_route("/health", health, description="Check if server is alive")
4042

43+
4144
async def validation_exception_handler(self, request, exc):
4245
errors = exc.errors()
4346
if len(errors) > 0 and errors[0]["type"] == "missing":
@@ -95,7 +98,6 @@ async def http_exception_handler(self, request, exc):
9598
async def health():
9699
return "ok"
97100

98-
99101
def main(app):
100102
# "loop": "uvloop", (thats default in our setting)
101103
# "backlog": 10000
@@ -112,6 +114,8 @@ def main(app):
112114
}
113115
}
114116

117+
logger.info("config:", os.getenv("CONFIG_FILE_PATH"))
118+
115119
try:
116120
with open(os.getenv("CONFIG_FILE_PATH", "config.yaml")) as stream:
117121
config = yaml.safe_load(stream)

detectors/common/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
fastapi==0.112.0
22
uvicorn==0.30.5
3-
httpx==0.27.0
3+
httpx==0.27.0
4+
prometheus_client >= 0.18.0
5+
prometheus-fastapi-instrumentator >= 7.0.0

detectors/huggingface/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
```
2+
oc apply -f deployment/model_container.yaml
3+
oc apply -f deployment/servingruntime.yaml
4+
oc apply -f deployment/isvc.yaml
5+
```

detectors/huggingface/__init__.py

Whitespace-only changes.

detectors/huggingface/app.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
import sys
3+
from contextlib import asynccontextmanager
4+
from typing import Annotated
5+
6+
from fastapi import Header
7+
from prometheus_fastapi_instrumentator import Instrumentator
8+
sys.path.insert(0, os.path.abspath(".."))
9+
10+
from common.app import DetectorBaseAPI as FastAPI
11+
from detector import Detector
12+
from scheme import (
13+
ContentAnalysisHttpRequest,
14+
ContentsAnalysisResponse,
15+
Error,
16+
)
17+
18+
detector_objects = {}
19+
20+
21+
@asynccontextmanager
22+
async def lifespan(app: FastAPI):
23+
detector_objects["detector"] = Detector()
24+
yield
25+
# Clean up the ML models and release the resources
26+
detector_objects.clear()
27+
28+
29+
app = FastAPI(lifespan=lifespan, dependencies=[])
30+
Instrumentator().instrument(app).expose(app)
31+
32+
33+
@app.post(
34+
"/api/v1/text/contents",
35+
response_model=ContentsAnalysisResponse,
36+
description="""Detectors that work on content text, be it user prompt or generated text. \
37+
Generally classification type detectors qualify for this. <br>""",
38+
responses={
39+
404: {"model": Error, "description": "Resource Not Found"},
40+
422: {"model": Error, "description": "Validation Error"},
41+
},
42+
)
43+
async def detector_unary_handler(
44+
request: ContentAnalysisHttpRequest,
45+
detector_id: Annotated[str, Header(example="en_syntax_slate.38m.hap")],
46+
):
47+
return ContentsAnalysisResponse(root=detector_objects["detector"].run(request))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: serving.kserve.io/v1beta1
2+
kind: InferenceService
3+
metadata:
4+
name: guardrails-detector-ibm-guardian
5+
labels:
6+
opendatahub.io/dashboard: 'true'
7+
annotations:
8+
openshift.io/display-name: guardrails-detector-ibm-guardian
9+
security.opendatahub.io/enable-auth: 'true'
10+
serving.knative.openshift.io/enablePassthrough: 'true'
11+
sidecar.istio.io/inject: 'true'
12+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
13+
serving.kserve.io/deploymentMode: RawDeployment
14+
spec:
15+
predictor:
16+
maxReplicas: 1
17+
minReplicas: 1
18+
model:
19+
modelFormat:
20+
name: guardrails-detector-huggingface
21+
name: ''
22+
runtime: guardrails-detector-runtime-guardian
23+
storage:
24+
key: aws-connection-minio-data-connection-guardrails-guardian
25+
path: granite-guardian-3.0-2b
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: minio-guardrails-guardian
5+
spec:
6+
ports:
7+
- name: minio-client-port
8+
port: 9000
9+
protocol: TCP
10+
targetPort: 9000
11+
selector:
12+
app: minio-guardrails-guardian
13+
---
14+
apiVersion: v1
15+
kind: PersistentVolumeClaim
16+
metadata:
17+
name: guardrails-models-claim-guardian
18+
spec:
19+
accessModes:
20+
- ReadWriteOnce
21+
volumeMode: Filesystem
22+
# storageClassName: gp3-csi
23+
resources:
24+
requests:
25+
storage: 100Gi
26+
---
27+
apiVersion: apps/v1
28+
kind: Deployment
29+
metadata:
30+
name: guardrails-container-deployment-guardian # <--- change this
31+
labels:
32+
app: minio-guardrails-guardian # <--- change this to match label on the pod
33+
spec:
34+
replicas: 1
35+
selector:
36+
matchLabels:
37+
app: minio-guardrails-guardian # <--- change this to match label on the pod
38+
template: # => from here down copy and paste the pods metadata: and spec: sections
39+
metadata:
40+
labels:
41+
app: minio-guardrails-guardian
42+
maistra.io/expose-route: 'true'
43+
name: minio-guardrails-guardian
44+
spec:
45+
volumes:
46+
- name: model-volume
47+
persistentVolumeClaim:
48+
claimName: guardrails-models-claim-guardian
49+
initContainers:
50+
- name: download-model
51+
image: quay.io/rgeada/llm_downloader:latest
52+
securityContext:
53+
fsGroup: 1001
54+
command:
55+
- bash
56+
- -c
57+
- |
58+
# model="ibm-granite/granite-guardian-hap-38m"
59+
# model="h2oai/deberta_finetuned_pii"
60+
model="ibm-granite/granite-guardian-3.0-2b"
61+
# model="microsoft/Phi-3-mini-4k-instruct"
62+
echo "starting download"
63+
/tmp/venv/bin/huggingface-cli download $model --local-dir /mnt/models/huggingface/$(basename $model)
64+
echo "Done!"
65+
resources:
66+
limits:
67+
memory: "2Gi"
68+
cpu: "2"
69+
volumeMounts:
70+
- mountPath: "/mnt/models/"
71+
name: model-volume
72+
containers:
73+
- args:
74+
- server
75+
- /models
76+
env:
77+
- name: MINIO_ACCESS_KEY
78+
value: THEACCESSKEY
79+
- name: MINIO_SECRET_KEY
80+
value: THESECRETKEY
81+
image: quay.io/trustyai/modelmesh-minio-examples:latest
82+
name: minio
83+
securityContext:
84+
allowPrivilegeEscalation: false
85+
capabilities:
86+
drop:
87+
- ALL
88+
seccompProfile:
89+
type: RuntimeDefault
90+
volumeMounts:
91+
- mountPath: "/models/"
92+
name: model-volume
93+
---
94+
apiVersion: v1
95+
kind: Secret
96+
metadata:
97+
name: aws-connection-minio-data-connection-guardrails-guardian
98+
labels:
99+
opendatahub.io/dashboard: 'true'
100+
opendatahub.io/managed: 'true'
101+
annotations:
102+
opendatahub.io/connection-type: s3
103+
openshift.io/display-name: Minio Data Connection
104+
data:
105+
AWS_ACCESS_KEY_ID: VEhFQUNDRVNTS0VZ
106+
AWS_DEFAULT_REGION: dXMtc291dGg=
107+
AWS_S3_BUCKET: aHVnZ2luZ2ZhY2U=
108+
AWS_S3_ENDPOINT: aHR0cDovL21pbmlvLWd1YXJkcmFpbHMtZ3VhcmRpYW46OTAwMA==
109+
AWS_SECRET_ACCESS_KEY: VEhFU0VDUkVUS0VZ
110+
type: Opaque
111+
---
112+
apiVersion: v1
113+
kind: ServiceAccount
114+
metadata:
115+
name: user-one
116+
---
117+
kind: RoleBinding
118+
apiVersion: rbac.authorization.k8s.io/v1
119+
metadata:
120+
name: user-one-view
121+
subjects:
122+
- kind: ServiceAccount
123+
name: user-one
124+
roleRef:
125+
apiGroup: rbac.authorization.k8s.io
126+
kind: ClusterRole
127+
name: view
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
apiVersion: serving.kserve.io/v1alpha1
2+
kind: ServingRuntime
3+
metadata:
4+
name: guardrails-detector-runtime-guardian
5+
annotations:
6+
openshift.io/display-name: Guardrails Detector ServingRuntime for KServe
7+
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
8+
labels:
9+
opendatahub.io/dashboard: 'true'
10+
spec:
11+
annotations:
12+
prometheus.io/port: '8080'
13+
prometheus.io/path: '/metrics'
14+
multiModel: false
15+
supportedModelFormats:
16+
- autoSelect: true
17+
name: guardrails-detector-huggingface
18+
containers:
19+
- name: kserve-container
20+
image: quay.io/rh-ee-mmisiura/guardrails-detector-huggingface:3d51741
21+
command:
22+
- uvicorn
23+
- app:app
24+
args:
25+
- "--workers"
26+
- "1"
27+
- "--host"
28+
- "0.0.0.0"
29+
- "--port"
30+
- "8000"
31+
- "--log-config"
32+
- "/common/log_conf.yaml"
33+
env:
34+
- name: MODEL_DIR
35+
value: /mnt/models
36+
- name: HF_HOME
37+
value: /tmp/hf_home
38+
ports:
39+
- containerPort: 8000
40+
protocol: TCP
41+
resources:
42+
requests:
43+
memory: "18Gi" # pre-allocate 18Gi of memory -- might be needed for larger models
44+
cpu: "1" # reserve 1 CPU core
45+
limits:
46+
memory: "20Gi" # limit to 20Gi of memory
47+
cpu: "2" # limit to 2 CPU cores

0 commit comments

Comments
 (0)