Merge pull request #304 from sohanmaheshwar/jupyter-component

sohanmaheshwar · web-flow · commit 2c82d5d20245 · 2025-01-31T13:33:37.000+01:00
Jupyter Notebook component &amp; RAG pipeline guide
diff --git a/components/JupyterNotebookViewer.tsx b/components/JupyterNotebookViewer.tsx
@@ -0,0 +1,22 @@
+import React from "react";
+
+interface JupyterNotebookViewerProps {
+  fileUrl: string;
+}
+
+const JupyterNotebookViewer: React.FC<JupyterNotebookViewerProps> = ({ fileUrl }) => {
+  const nbviewerUrl = `https://nbviewer.org/github/${encodeURIComponent(fileUrl)}`;
+
+  return (
+    <div className="p-4">
+      <iframe
+        src={nbviewerUrl}
+        width="100%"
+        height="800px"
+        style={{ border: "none" }}
+      />
+    </div>
+  );
+};
+
+export default JupyterNotebookViewer;
diff --git a/pages/spicedb/ops/_meta.json b/pages/spicedb/ops/_meta.json
@@ -1,6 +1,7 @@
 {
   "observability": "Observability Tooling",
   "deploying-spicedb-operator": "Deploying the SpiceDB Operator",
-  "deploying-spicedb-on-eks": "Deploying SpiceDB on AWS EKS",
-  "bulk-operations": "Bulk Importing Relationships"
+  "deploying-spicedb-on-eks": "Deploying SpiceDB on Amazon EKS",
+  "bulk-operations": "Bulk Importing Relationships",
+  "secure-rag-pipelines": "Secure Your RAG Pipelines with Fine Grained Authorization"
 }
diff --git a/pages/spicedb/ops/secure-rag-pipelines.mdx b/pages/spicedb/ops/secure-rag-pipelines.mdx
@@ -0,0 +1,77 @@
+import JupyterNotebookViewer from "@/components/JupyterNotebookViewer";
+
+# Secure Your RAG Pipelines With Fine Grained Authorization
+
+Here's how you can use SpiceDB to safeguard sensitive data in RAG pipelines.
+You will learn how to pre-filter and post-filter vector database queries with a list of authorized object IDs to improve security and efficiency.
+
+This guide uses OpenAI, Pinecone, Langchain, Jupyter Notebook and SpiceDB
+
+## Why is this important?
+
+Building enterprise-ready AI poses challenges around data security, accuracy, scalability, and integration, especially in compliance-regulated industries like healthcare and finance.
+Firms are increasing efforts to mitigate risks associated with LLMs, particularly regarding sensitive data exfiltration of personally identifiable information and/or sensitive company data.
+The primary mitigation strategy is to build guardrails around Retrieval-Augmented Generation (RAG) to safeguard data while also optimizing query response quality and efficiency.
+
+To enable precise guardrails, one must implement permissions systems with advanced fine grained authorization capabilities such as returning lists of authorized subjects and accessible resources.
+Such systems ensure timely access to authorized data while preventing exfiltration of sensitive information, making RAGs more efficient and improving performance at scale.
+
+## Setup and Prerequisites
+
+- Access to a [SpiceDB](https://authzed.com/spicedb) instance.
+You can find instructions for installing SpiceDB [here](https://authzed.com/docs/spicedb/getting-started/install/macos)
+- A [Pinecone account](https://www.pinecone.io/) and API key
+- An [OpenAI Platform account](https://platform.openai.com/docs/overview) and API key
+- [Jupyter Notebook](https://jupyter.org/) running locally
+
+### Running SpiceDB
+
+Once you've installed SpiceDB, run a local instance with this command in your terminal:
+
+`spicedb serve --grpc-preshared-key rag-rebac-walkthrough`
+
+and you should see something like this that indicates an instance of SpiceDB is running locally:
+
+```
+8:28PM INF configured logging async=false format=auto log_level=inf
+o provider=zerolog
+8:28PM INF GOMEMLIMIT is updated GOMEMLIMIT=25769803776 package=git
+hub.com/KimMachineGun/automemlimit/memlimit
+8:28PM INF configured opentelemetry tracing endpoint= insecure=fals
+e provider=none sampleRatio=0.01 service=spicedb v=0
+8:28PM WRN this version of SpiceDB is out of date. See: https://git
+hub.com/authzed/spicedb/releases/tag/v1.39.1 latest-released-versio
+n=v1.39.1 this-version=v1.37.2
+8:28PM INF configuration ClusterDispatchCacheConfig.CacheKindForTes
+ting=(empty) ClusterDispatchCacheConfig.Enabled=true ClusterDispatc
+8:28PM INF using memory datastore engine
+8:28PM WRN in-memory datastore is not persistent and not feasible t
+8:28PM INF configured namespace cache defaultTTL=0 maxCost="32 MiB"
+8:28PM INF schema watch explicitly disabled
+8:28PM INF configured dispatch cache defaultTTL=20600 maxCost="164
+8:28PM INF configured dispatcher balancerconfig={"loadBalancingConfig":[{"consistent-hashring":{"replicationFactor":100,"spread":1}}]} concurrency-limit-check-permission=50 concurrency-limit-lookup-resources=50 concurrency-limit-lookup-subjects=50 concurrency-limit-reachable-resources=50
+8:28PM INF grpc server started serving addr=:50051 insecure=true network=tcp service=grpc workers=0
+8:28PM INF running server datastore=*schemacaching.definitionCachingProxy
+8:28PM INF http server started serving addr=:9090 insecure=true service=metrics
+8:28PM INF telemetry reporter scheduled endpoint=https://telemetry.authzed.com interval=1h0m0s next=5m14s
+```
+
+#### Download the Jupyter Notebook
+
+Clone the `workshops` [repository](https://github.com/authzed/workshops/) to your system and type `cd secure-rag-pipelines` to enter the working directory.
+
+Start the `01-rag.ipynb` Notebook locally by typing `jupyter 01-rag.ipynb` (or `python3 -m notebook`) in your terminal.
+
+## Add Fine Grained Authorization
+
+Here's the Jupyter Notebook with step-by-step instructions
+
+<JupyterNotebookViewer fileUrl="authzed/workshops/blob/main/secure-rag-pipelines/01-rag.ipynb" />
+
+## Using DeepSeek or Google Colab
+
+If you want to replace the OpenAI LLM with the DeepSeek (or any other) LLM, [check out this branch](https://github.com/authzed/workshops/tree/deepseek).
+It follows similar steps as the above guide, but uses the DeepSeek LLM via [OpenRouter](https://openrouter.ai/)
+
+To run through this workshop on a cloud notebook, [here's a branch](https://github.com/authzed/workshops/tree/google-colab) that uses Google Colab.
+Note that this guide requires an instance of SpiceDB running on [AuthZed Serverless](https://app.authzed.com/) for which you can create a free account.

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"observability": "Observability Tooling",`
`3`	`3`	`"deploying-spicedb-operator": "Deploying the SpiceDB Operator",`
`4`		`- "deploying-spicedb-on-eks": "Deploying SpiceDB on AWS EKS",`
`5`		`- "bulk-operations": "Bulk Importing Relationships"`
	`4`	`+ "deploying-spicedb-on-eks": "Deploying SpiceDB on Amazon EKS",`
	`5`	`+ "bulk-operations": "Bulk Importing Relationships",`
	`6`	`+ "secure-rag-pipelines": "Secure Your RAG Pipelines with Fine Grained Authorization"`
`6`	`7`	`}`