trustyai-explainability
diff --git a/‎detectors/huggingface/deploy/isvc.yaml
Lines changed: 6 additions & 5 deletions b/‎detectors/huggingface/deploy/isvc.yaml
Lines changed: 6 additions & 5 deletions
diff --git a/‎detectors/huggingface/deploy/model_container.yaml
Lines changed: 15 additions & 13 deletions b/‎detectors/huggingface/deploy/model_container.yaml
Lines changed: 15 additions & 13 deletions
diff --git a/‎detectors/huggingface/deploy/servingruntime.yaml
Lines changed: 11 additions & 4 deletions b/‎detectors/huggingface/deploy/servingruntime.yaml
Lines changed: 11 additions & 4 deletions
@@ -1,15 +1,16 @@
 apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
-  name: guardrails-detector-ibm-hap
+  name: guardrails-detector-ibm-guardian
   labels:
     opendatahub.io/dashboard: 'true'
   annotations:
-    openshift.io/display-name: guardrails-detector-ibm-hap
+    openshift.io/display-name: guardrails-detector-ibm-guardian
     security.opendatahub.io/enable-auth: 'true'
     serving.knative.openshift.io/enablePassthrough: 'true'
     sidecar.istio.io/inject: 'true'
     sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+    serving.kserve.io/deploymentMode: RawDeployment
 spec:
   predictor:
     maxReplicas: 1
@@ -18,7 +19,7 @@ spec:
       modelFormat:
         name: guardrails-detector-huggingface
       name: ''
-      runtime: guardrails-detector-runtime
+      runtime: guardrails-detector-runtime-guardian
       storage:
-        key: aws-connection-minio-data-connection
-        path: granite-guardian-hap-38m
+        key: aws-connection-minio-data-connection-guardrails-guardian
+        path: granite-guardian-3.0-2b
@@ -1,51 +1,51 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: minio
+  name: minio-guardrails-guardian
 spec:
   ports:
     - name: minio-client-port
       port: 9000
       protocol: TCP
       targetPort: 9000
   selector:
-    app: minio
+    app: minio-guardrails-guardian
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: vllm-models-claim
+  name: guardrails-models-claim-guardian
 spec:
   accessModes:
     - ReadWriteOnce
   volumeMode: Filesystem
   # storageClassName: gp3-csi
   resources:
     requests:
-      storage: 300Gi
+      storage: 100Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-container-deployment # <--- change this
+  name: guardrails-container-deployment-guardian # <--- change this
 labels:
-    app: minio # <--- change this to match label on the pod
+    app: minio-guardrails-guardian # <--- change this to match label on the pod
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: minio  # <--- change this to match label on the pod
+      app: minio-guardrails-guardian  # <--- change this to match label on the pod
   template: # => from here down copy and paste the pods metadata: and spec: sections
     metadata:
       labels:
-        app: minio
+        app: minio-guardrails-guardian
         maistra.io/expose-route: 'true'
-      name: minio
+      name: minio-guardrails-guardian
     spec:
       volumes:
       - name: model-volume
         persistentVolumeClaim:
-          claimName: vllm-models-claim
+          claimName: guardrails-models-claim-guardian
       initContainers:
         - name: download-model
           image: quay.io/rgeada/llm_downloader:latest
@@ -55,7 +55,9 @@ spec:
             - bash
             - -c
             - |
-              model="ibm-granite/granite-guardian-hap-38m"
+              # model="ibm-granite/granite-guardian-hap-38m"
+              # model="h2oai/deberta_finetuned_pii"
+              model="ibm-granite/granite-guardian-3.0-2b"
               # model="microsoft/Phi-3-mini-4k-instruct"
               echo "starting download"
               /tmp/venv/bin/huggingface-cli download $model --local-dir /mnt/models/huggingface/$(basename $model)
@@ -92,7 +94,7 @@ spec:
 apiVersion: v1
 kind: Secret
 metadata:
-  name: aws-connection-minio-data-connection
+  name: aws-connection-minio-data-connection-guardrails-guardian
   labels:
     opendatahub.io/dashboard: 'true'
     opendatahub.io/managed: 'true'
@@ -103,7 +105,7 @@ data:
   AWS_ACCESS_KEY_ID: VEhFQUNDRVNTS0VZ
   AWS_DEFAULT_REGION: dXMtc291dGg=
   AWS_S3_BUCKET: aHVnZ2luZ2ZhY2U=
-  AWS_S3_ENDPOINT: aHR0cDovL21pbmlvOjkwMDA=
+  AWS_S3_ENDPOINT: aHR0cDovL21pbmlvLWd1YXJkcmFpbHMtZ3VhcmRpYW46OTAwMA==
   AWS_SECRET_ACCESS_KEY: VEhFU0VDUkVUS0VZ
 type: Opaque
 ---
 
@@ -1,7 +1,7 @@
 apiVersion: serving.kserve.io/v1alpha1
 kind: ServingRuntime
 metadata:
-  name: guardrails-detector-runtime
+  name: guardrails-detector-runtime-guardian
   annotations:
     openshift.io/display-name: Guardrails Detector ServingRuntime for KServe
     opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
@@ -17,13 +17,13 @@ spec:
       name: guardrails-detector-huggingface
   containers:
     - name: kserve-container
-      image: quay.io/rgeada/guardrails-detector-huggingface
+      image: quay.io/rh-ee-mmisiura/guardrails-detector-huggingface:c7598db
       command:
         - uvicorn
         - app:app
       args:
         - "--workers"
-        - "4"
+        - "1"
         - "--host"
         - "0.0.0.0"
         - "--port"
@@ -37,4 +37,11 @@ spec:
           value: /tmp/hf_home
       ports:
         - containerPort: 8000
-          protocol: TCP
+          protocol: TCP
+      resources:
+        requests:
+          memory: "18Gi"  # pre-allocate 18Gi of memory -- might be needed for larger models
+          cpu: "1"       # reserve 1 CPU core
+        limits:
+          memory: "20Gi"  # limit to 20Gi of memory
+          cpu: "2"       # limit to 2 CPU cores