red-hat-data-services
diff --git a/‎.vscode/launch.json‎
Lines changed: 2 additions & 13 deletions b/‎.vscode/launch.json‎
Lines changed: 2 additions & 13 deletions
diff --git a/‎README.md‎
Lines changed: 22 additions & 18 deletions b/‎README.md‎
Lines changed: 22 additions & 18 deletions
diff --git a/‎demos/remote_demo.ipynb‎ ‎demos/basic_demo.ipynb‎demos/remote_demo.ipynb renamed to demos/basic_demo.ipynb
Lines changed: 550 additions & 276 deletions b/‎demos/remote_demo.ipynb‎ ‎demos/basic_demo.ipynb‎demos/remote_demo.ipynb renamed to demos/basic_demo.ipynb
Lines changed: 550 additions & 276 deletions
diff --git a/‎demos/inline_demo.ipynb‎
Lines changed: 0 additions & 880 deletions b/‎demos/inline_demo.ipynb‎
Lines changed: 0 additions & 880 deletions
diff --git a/‎demos/llama-stack-openshift/README.md‎
Lines changed: 55 additions & 0 deletions b/‎demos/llama-stack-openshift/README.md‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎demos/llama-stack-openshift/deployment/kubeflow-ragas-config.yaml‎
Lines changed: 12 additions & 0 deletions b/‎demos/llama-stack-openshift/deployment/kubeflow-ragas-config.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎demos/llama-stack-openshift/deployment/llama-stack-distribution.yaml‎
Lines changed: 116 additions & 0 deletions b/‎demos/llama-stack-openshift/deployment/llama-stack-distribution.yaml‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎distribution/run-inline.yaml‎
Lines changed: 0 additions & 61 deletions b/‎distribution/run-inline.yaml‎
Lines changed: 0 additions & 61 deletions
diff --git a/‎distribution/run-remote.yaml‎ ‎distribution/run.yaml‎distribution/run-remote.yaml renamed to distribution/run.yaml
Lines changed: 8 additions & 3 deletions b/‎distribution/run-remote.yaml‎ ‎distribution/run.yaml‎distribution/run-remote.yaml renamed to distribution/run.yaml
Lines changed: 8 additions & 3 deletions
diff --git a/‎docs/modules/ROOT/pages/index.adoc‎
Lines changed: 2 additions & 2 deletions b/‎docs/modules/ROOT/pages/index.adoc‎
Lines changed: 2 additions & 2 deletions
@@ -4,23 +4,12 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
-
         {
-            "name": "Debug Ragas Distribution -- Remote",
+            "name": "Debug Ragas Distribution",
             "type": "debugpy",
             "request": "launch",
             "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-remote.yaml"],
-            "cwd": "${workspaceFolder}",
-            "envFile": "${workspaceFolder}/.env",
-            "justMyCode": false
-        },
-        {
-            "name": "Debug Ragas Distribution -- Inline",
-            "type": "debugpy",
-            "request": "launch",
-            "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-inline.yaml"],
+            "args": ["stack", "run", "distribution/run.yaml"],
             "cwd": "${workspaceFolder}",
             "envFile": "${workspaceFolder}/.env",
             "justMyCode": false
 
@@ -14,8 +14,8 @@ This repository implements [Ragas](https://github.com/explodinggradients/ragas)
 The goal is to provide all of Ragas' evaluation functionality over Llama Stack's eval API, while leveraging the Llama Stack's built-in APIs for inference (llms and embeddings), datasets, and benchmarks.
 
 There are two versions of the provider:
-- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
-- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines.
+- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 ## Prerequisites
 - Python 3.12
@@ -41,12 +41,29 @@ There are two versions of the provider:
     ```
 - The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
 
-### Remote provider (default)
+### Inline provider (default with base installation)
+
+Create a `.env` file with the required environment variable:
+```bash
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
+```
+
+Run the server:
+```bash
+dotenv run uv run llama stack run distribution/run.yaml
+```
+
+### Remote provider (requires optional dependencies)
+
+First install the remote dependencies:
+```bash
+uv pip install -e ".[remote]"
+```
 
 Create a `.env` file with the following:
 ```bash
 # Required for both inline and remote
-EMBEDDING_MODEL=all-MiniLM-L6-v2
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
 
 # Required for remote provider
 KUBEFLOW_LLAMA_STACK_URL=<your-llama-stack-url>
@@ -75,22 +92,9 @@ Where:
 
 Run the server:
 ```bash
-dotenv run uv run llama stack run distribution/run-remote.yaml
-```
-
-### Inline provider (need to specify `.inline` in the module name)
-
-Create a `.env` file with the required environment variable:
-```bash
-EMBEDDING_MODEL=all-MiniLM-L6-v2
-```
-
-Run the server:
-```bash
-dotenv run uv run llama stack run distribution/run-inline.yaml
+dotenv run uv run llama stack run distribution/run.yaml
 ```
 
-You will notice that `run-inline.yaml` file has the module name as `llama_stack_provider_ragas.inline`, in order to specify the inline provider.
 
 ## Usage
 See the demos in the `demos` directory.
@@ -0,0 +1,55 @@
+# Deploying Llama Stack on OpenShift AI with the remote Ragas eval provider
+
+## Prerequisites
+* OpenShift AI or Open Data Hub installed on your OpenShift Cluster
+* Data Science Pipeline Server configured
+* Llama Stack Operator installed
+* A VLLM hosted Model either through Kserve or MaaS. You can follow these [docs](https://docs.redhat.com/en/documentation/red_hat_openshift_ai_cloud_service/1/html/working_with_rag/deploying-a-rag-stack-in-a-data-science-project_rag#Deploying-a-llama-model-with-kserve_rag) until step 3.4
+
+## Setup
+Create a secret for storing your model's information.
+```
+export INFERENCE_MODEL="llama-3-2-3b"
+export VLLM_URL="https://llama-32-3b-instruct-predictor:8443/v1"
+export VLLM_TLS_VERIFY="false" # Use "true" in production!
+export VLLM_API_TOKEN="<token identifier>"
+
+oc create secret generic llama-stack-inference-model-secret \
+  --from-literal INFERENCE_MODEL="$INFERENCE_MODEL" \
+  --from-literal VLLM_URL="$VLLM_URL" \
+  --from-literal VLLM_TLS_VERIFY="$VLLM_TLS_VERIFY" \
+  --from-literal VLLM_API_TOKEN="$VLLM_API_TOKEN"
+```
+
+## Setup Deployment files
+### Configuring the `kubeflow-ragas-config` ConfigMap
+Update the [kubeflow-ragas-config](deployment/kubeflow-ragas-config.yaml) with the following data:
+``` bash
+# See project README for more details
+EMBEDDING_MODEL=all-MiniLM-L6-v2
+KUBEFLOW_LLAMA_STACK_URL=<your-llama-stack-url>
+KUBEFLOW_PIPELINES_ENDPOINT=<your-kfp-endpoint>
+KUBEFLOW_NAMESPACE=<your-namespace>
+KUBEFLOW_BASE_IMAGE=quay.io/diegosquayorg/my-ragas-provider-image:latest
+KUBEFLOW_RESULTS_S3_PREFIX=s3://my-bucket/ragas-results
+KUBEFLOW_S3_CREDENTIALS_SECRET_NAME=<secret-name>
+```
+
+> [!NOTE]
+> The `KUBEFLOW_LLAMA_STACK_URL` must be an external route.
+
+### Configuring the `pipelines_token` Secret
+Unfortunately the Llama Stack distribution service account does not have privilages to create pipeline runs. In order to work around this we must provide a user token as a secret to the Llama Stack Distribution.
+
+Create the secret with:
+``` bash
+# Gather your token with `oc whoami -t`
+kubectl create secret generic kubeflow-pipelines-token \
+  --from-literal=KUBEFLOW_PIPELINES_TOKEN=<your-pipelines-token>
+```
+
+## Deploy Llama Stack on OpenShift
+You can now deploy the configuration files and the Llama Stack distribution with `oc apply -f deployment/kubeflow-ragas-config.yaml` and `oc apply -f deployment/llama-stack-distribution.yaml`
+
+You should now have a Llama Stack server on OpenShift with the remote ragas eval provider configured.
+You can now follow the [remote_demo.ipynb](../../demos/remote_demo.ipynb) demo but ensure you are running it in a Data Science workbench and use the `LLAMA_STACK_URL` defined earlier. Alternatively you can run it locally if you create a Route.
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: kubeflow-ragas-config
+data:
+  EMBEDDING_MODEL: "all-MiniLM-L6-v2"
+  KUBEFLOW_LLAMA_STACK_URL: "<your-llama-stack-url>"
+  KUBEFLOW_PIPELINES_ENDPOINT: "<your-kfp-endpoint>"
+  KUBEFLOW_NAMESPACE: "<your-namespace>"
+  KUBEFLOW_BASE_IMAGE: "quay.io/diegosquayorg/my-ragas-provider-image:latest"
+  KUBEFLOW_RESULTS_S3_PREFIX: "s3://my-bucket/ragas-results"
+  KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: "<secret-name>"
@@ -0,0 +1,116 @@
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: lsd-ragas-example
+spec:
+  replicas: 1
+  server:
+    containerSpec:
+      resources:
+        requests:
+          cpu: 4
+          memory: "12Gi"
+        limits:
+          cpu: 6
+          memory: "14Gi"
+      env:
+        - name: INFERENCE_MODEL
+          valueFrom:
+            secretKeyRef:
+              key: INFERENCE_MODEL
+              name: llama-stack-inference-model-secret
+              optional: true
+        - name: VLLM_MAX_TOKENS
+          value: "4096"
+        - name: VLLM_URL
+          valueFrom:
+            secretKeyRef:
+              key: VLLM_URL
+              name: llama-stack-inference-model-secret
+              optional: true
+        - name: VLLM_TLS_VERIFY
+          valueFrom:
+            secretKeyRef:
+              key: VLLM_TLS_VERIFY
+              name: llama-stack-inference-model-secret
+              optional: true
+        - name: VLLM_API_TOKEN
+          valueFrom:
+            secretKeyRef:
+              key: VLLM_API_TOKEN
+              name: llama-stack-inference-model-secret
+              optional: true
+        - name: MILVUS_DB_PATH
+          value: ~/milvus.db
+        - name: FMS_ORCHESTRATOR_URL
+          value: "http://localhost"
+        - name: KUBEFLOW_PIPELINES_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_PIPELINES_ENDPOINT
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_NAMESPACE
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_NAMESPACE
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_BASE_IMAGE
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_BASE_IMAGE
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_LLAMA_STACK_URL
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_LLAMA_STACK_URL
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_RESULTS_S3_PREFIX
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_RESULTS_S3_PREFIX
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_S3_CREDENTIALS_SECRET_NAME
+          valueFrom:
+            configMapKeyRef:
+              key: KUBEFLOW_S3_CREDENTIALS_SECRET_NAME
+              name: kubeflow-ragas-config
+              optional: true
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              key: EMBEDDING_MODEL
+              name: kubeflow-ragas-config
+              optional: true
+        - name: KUBEFLOW_PIPELINES_TOKEN
+          valueFrom:
+            secretKeyRef:
+              key: KUBEFLOW_PIPELINES_TOKEN
+              name: kubeflow-pipelines-token
+              optional: true
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              key: AWS_ACCESS_KEY_ID
+              name: aws-credentials
+              optional: true
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              key: AWS_SECRET_ACCESS_KEY
+              name: aws-credentials
+              optional: true
+        - name: AWS_DEFAULT_REGION
+          valueFrom:
+            secretKeyRef:
+              key: AWS_DEFAULT_REGION
+              name: aws-credentials
+              optional: true
+      name: llama-stack
+      port: 8321
+    distribution:
+      name: rh-dev
@@ -9,9 +9,9 @@ apis:
   - datasetio
 providers:
   eval:
-    - provider_id: trustyai_ragas
+    - provider_id: ${env.KUBEFLOW_LLAMA_STACK_URL:+trustyai_ragas_remote}
       provider_type: remote::trustyai_ragas
-      module: llama_stack_provider_ragas
+      module: llama_stack_provider_ragas.remote
       config:
         embedding_model: ${env.EMBEDDING_MODEL}
         kubeflow_config:
@@ -21,7 +21,12 @@ providers:
           namespace: ${env.KUBEFLOW_NAMESPACE}
           llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL}
           base_image: ${env.KUBEFLOW_BASE_IMAGE}
-          pipelines_token: ${env.KUBEFLOW_PIPELINES_TOKEN:=}
+          pipelines_api_token: ${env.KUBEFLOW_PIPELINES_TOKEN:=}
+    - provider_id: ${env.EMBEDDING_MODEL:+trustyai_ragas_inline}
+      provider_type: inline::trustyai_ragas
+      module: llama_stack_provider_ragas.inline
+      config:
+        embedding_model: ${env.EMBEDDING_MODEL}
   datasetio:
     - provider_id: localfs
       provider_type: inline::localfs
 
@@ -15,8 +15,8 @@ The goal is to provide all of Ragas' evaluation functionality over Llama Stack's
 
 There are two versions of the provider:
 
-* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. This is the *default* when using the module-based import.
-* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
+* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 == Getting Started