Skip to content

Commit 30fd0bb

Browse files
committed
🚧 extended the Detector class to enable some compatibility with the granite-guardian CausalLM model
1 parent d1aeb2c commit 30fd0bb

File tree

5 files changed

+193
-63
lines changed

5 files changed

+193
-63
lines changed
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
apiVersion: serving.kserve.io/v1beta1
22
kind: InferenceService
33
metadata:
4-
name: guardrails-detector-ibm-hap
4+
name: guardrails-detector-ibm-guardian
55
labels:
66
opendatahub.io/dashboard: 'true'
77
annotations:
8-
openshift.io/display-name: guardrails-detector-ibm-hap
8+
openshift.io/display-name: guardrails-detector-ibm-guardian
99
security.opendatahub.io/enable-auth: 'true'
1010
serving.knative.openshift.io/enablePassthrough: 'true'
1111
sidecar.istio.io/inject: 'true'
1212
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
13+
serving.kserve.io/deploymentMode: RawDeployment
1314
spec:
1415
predictor:
1516
maxReplicas: 1
@@ -18,7 +19,7 @@ spec:
1819
modelFormat:
1920
name: guardrails-detector-huggingface
2021
name: ''
21-
runtime: guardrails-detector-runtime
22+
runtime: guardrails-detector-runtime-guardian
2223
storage:
23-
key: aws-connection-minio-data-connection
24-
path: granite-guardian-hap-38m
24+
key: aws-connection-minio-data-connection-guardrails-guardian
25+
path: granite-guardian-3.0-2b

detectors/huggingface/deploy/model_container.yaml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,51 @@
11
apiVersion: v1
22
kind: Service
33
metadata:
4-
name: minio
4+
name: minio-guardrails-guardian
55
spec:
66
ports:
77
- name: minio-client-port
88
port: 9000
99
protocol: TCP
1010
targetPort: 9000
1111
selector:
12-
app: minio
12+
app: minio-guardrails-guardian
1313
---
1414
apiVersion: v1
1515
kind: PersistentVolumeClaim
1616
metadata:
17-
name: vllm-models-claim
17+
name: guardrails-models-claim-guardian
1818
spec:
1919
accessModes:
2020
- ReadWriteOnce
2121
volumeMode: Filesystem
2222
# storageClassName: gp3-csi
2323
resources:
2424
requests:
25-
storage: 300Gi
25+
storage: 100Gi
2626
---
2727
apiVersion: apps/v1
2828
kind: Deployment
2929
metadata:
30-
name: llm-container-deployment # <--- change this
30+
name: guardrails-container-deployment-guardian # <--- change this
3131
labels:
32-
app: minio # <--- change this to match label on the pod
32+
app: minio-guardrails-guardian # <--- change this to match label on the pod
3333
spec:
3434
replicas: 1
3535
selector:
3636
matchLabels:
37-
app: minio # <--- change this to match label on the pod
37+
app: minio-guardrails-guardian # <--- change this to match label on the pod
3838
template: # => from here down copy and paste the pods metadata: and spec: sections
3939
metadata:
4040
labels:
41-
app: minio
41+
app: minio-guardrails-guardian
4242
maistra.io/expose-route: 'true'
43-
name: minio
43+
name: minio-guardrails-guardian
4444
spec:
4545
volumes:
4646
- name: model-volume
4747
persistentVolumeClaim:
48-
claimName: vllm-models-claim
48+
claimName: guardrails-models-claim-guardian
4949
initContainers:
5050
- name: download-model
5151
image: quay.io/rgeada/llm_downloader:latest
@@ -55,7 +55,9 @@ spec:
5555
- bash
5656
- -c
5757
- |
58-
model="ibm-granite/granite-guardian-hap-38m"
58+
# model="ibm-granite/granite-guardian-hap-38m"
59+
# model="h2oai/deberta_finetuned_pii"
60+
model="ibm-granite/granite-guardian-3.0-2b"
5961
# model="microsoft/Phi-3-mini-4k-instruct"
6062
echo "starting download"
6163
/tmp/venv/bin/huggingface-cli download $model --local-dir /mnt/models/huggingface/$(basename $model)
@@ -92,7 +94,7 @@ spec:
9294
apiVersion: v1
9395
kind: Secret
9496
metadata:
95-
name: aws-connection-minio-data-connection
97+
name: aws-connection-minio-data-connection-guardrails-guardian
9698
labels:
9799
opendatahub.io/dashboard: 'true'
98100
opendatahub.io/managed: 'true'
@@ -103,7 +105,7 @@ data:
103105
AWS_ACCESS_KEY_ID: VEhFQUNDRVNTS0VZ
104106
AWS_DEFAULT_REGION: dXMtc291dGg=
105107
AWS_S3_BUCKET: aHVnZ2luZ2ZhY2U=
106-
AWS_S3_ENDPOINT: aHR0cDovL21pbmlvOjkwMDA=
108+
AWS_S3_ENDPOINT: aHR0cDovL21pbmlvLWd1YXJkcmFpbHMtZ3VhcmRpYW46OTAwMA==
107109
AWS_SECRET_ACCESS_KEY: VEhFU0VDUkVUS0VZ
108110
type: Opaque
109111
---

detectors/huggingface/deploy/servingruntime.yaml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: serving.kserve.io/v1alpha1
22
kind: ServingRuntime
33
metadata:
4-
name: guardrails-detector-runtime
4+
name: guardrails-detector-runtime-guardian
55
annotations:
66
openshift.io/display-name: Guardrails Detector ServingRuntime for KServe
77
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
@@ -17,13 +17,13 @@ spec:
1717
name: guardrails-detector-huggingface
1818
containers:
1919
- name: kserve-container
20-
image: quay.io/rgeada/guardrails-detector-huggingface
20+
image: quay.io/rh-ee-mmisiura/guardrails-detector-huggingface:c7598db
2121
command:
2222
- uvicorn
2323
- app:app
2424
args:
2525
- "--workers"
26-
- "4"
26+
- "1"
2727
- "--host"
2828
- "0.0.0.0"
2929
- "--port"
@@ -37,4 +37,11 @@ spec:
3737
value: /tmp/hf_home
3838
ports:
3939
- containerPort: 8000
40-
protocol: TCP
40+
protocol: TCP
41+
resources:
42+
requests:
43+
memory: "18Gi" # pre-allocate 18Gi of memory -- might be needed for larger models
44+
cpu: "1" # reserve 1 CPU core
45+
limits:
46+
memory: "20Gi" # limit to 20Gi of memory
47+
cpu: "2" # limit to 2 CPU cores

0 commit comments

Comments
 (0)