feast-dev
diff --git a/‎.github/workflows/feast_apply_aws.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/feast_apply_aws.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/feast_apply_gcp.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/feast_apply_gcp.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/feast_plan_aws.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/feast_plan_aws.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/feast_plan_gcp.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/feast_plan_gcp.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎module_4_rag/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎module_4_rag/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎module_4_rag/.python-version‎
Lines changed: 1 addition & 0 deletions b/‎module_4_rag/.python-version‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎module_4_rag/Dockerfile‎
Lines changed: 37 additions & 0 deletions b/‎module_4_rag/Dockerfile‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎module_4_rag/README.md‎
Lines changed: 93 additions & 0 deletions b/‎module_4_rag/README.md‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎module_4_rag/app.py‎
Lines changed: 49 additions & 0 deletions b/‎module_4_rag/app.py‎
Lines changed: 49 additions & 0 deletions
@@ -13,7 +13,7 @@ jobs:
         id: setup-python
         uses: actions/setup-python@v2
         with:
-          python-version: "3.7"
+          python-version: "3.9"
           architecture: x64
       - name: Set up AWS SDK
         uses: aws-actions/configure-aws-credentials@v1
 
@@ -13,7 +13,7 @@ jobs:
         id: setup-python
         uses: actions/setup-python@v2
         with:
-          python-version: "3.7"
+          python-version: "3.9"
           architecture: x64
       - name: Set up Cloud SDK
         uses: google-github-actions/setup-gcloud@v0
 
@@ -10,7 +10,7 @@ jobs:
         id: setup-python
         uses: actions/setup-python@v2
         with:
-          python-version: "3.7"
+          python-version: "3.9"
           architecture: x64
       - name: Set up AWS SDK
         uses: aws-actions/configure-aws-credentials@v1
 
@@ -10,7 +10,7 @@ jobs:
         id: setup-python
         uses: actions/setup-python@v2
         with:
-          python-version: "3.7"
+          python-version: "3.9"
           architecture: x64
       - name: Set up Cloud SDK
         uses: google-github-actions/setup-gcloud@v0
 
@@ -11,4 +11,5 @@ terraform.tfstate.backup
 .vscode/*
 **/derby.log
 **/metastore_db/*
-.env
+.env
+.idea
@@ -0,0 +1 @@
+data/*
@@ -0,0 +1 @@
+3.9
@@ -0,0 +1,37 @@
+FROM python:3.9
+
+# Set environment varibles
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+
+# Set work directory
+WORKDIR /code
+
+
+# Install dependencies
+RUN LIBMEMCACHED=/opt/local
+RUN apt-get update && apt-get install -y \
+        libmemcached11 \
+        libmemcachedutil2 \
+        libmemcached-dev \
+        libz-dev \
+        curl \
+        gettext
+
+ENV PYTHONHASHSEED=random \
+  PIP_NO_CACHE_DIR=off \
+  PIP_DISABLE_PIP_VERSION_CHECK=on \
+  PIP_DEFAULT_TIMEOUT=100 \
+  # Poetry's configuration: \
+  POETRY_NO_INTERACTION=1 \
+  POETRY_VIRTUALENVS_CREATE=false \
+  POETRY_CACHE_DIR='/var/cache/pypoetry' \
+  POETRY_HOME='/usr/local' \
+  POETRY_VERSION=1.4.1
+
+RUN curl -sSL https://install.python-poetry.org | python3 - --version $POETRY_VERSION
+
+COPY pyproject.toml poetry.lock /code/
+RUN poetry install --no-interaction --no-ansi --no-root
+
+COPY . ./code/
@@ -0,0 +1,93 @@
+This is a demo to show how you can use Feast to do RAG
+
+## Installation via PyEnv and Poetry
+
+This demo assumes you have Pyenv (2.3.10) and Poetry (1.4.1) installed on your machine as well as Python 3.9.
+
+```bash
+pyenv local 3.9
+poetry shell
+poetry install
+```
+## Setting up the data and Feast
+
+To fetch the data simply run
+```bash
+python pull_states.py
+```
+Which will output a file called `city_wikipedia_summaries.csv`.
+
+Then run
+```bash
+python batch_score_documents.py
+```
+Which will output data to `data/city_wikipedia_summaries_with_embeddings.parquet`
+
+Next we'll need to do some Feast work and move the data into a repo created by
+Feast.
+
+## Feast
+
+To get started, make sure to have Feast installed and PostGreSQL.
+
+First run
+```bash
+cp ./data feature_repo/
+```
+
+And then open the `module_4.ipynb` notebook and follow those instructions.
+
+It will walk you through a trivial tutorial to retrieve the top `k` most similar
+documents using PGVector.
+
+# Overview
+
+The overview is relatively simple, the goal is to define an architecture
+to support the following:
+
+```mermaid
+flowchart TD;
+    A[Pull Data] --> B[Batch Score Embeddings];
+    B[Batch Score Embeddings] --> C[Materialize Online];
+    C[Materialize Online] --> D[Retrieval Augmented Generation];
+```
+
+# Results
+
+The simple demo shows the code below with the retrieved data shown.
+
+```python
+import pandas as pd
+
+from feast import FeatureStore
+from batch_score_documents import run_model, TOKENIZER, MODEL
+from transformers import AutoTokenizer, AutoModel
+
+df = pd.read_parquet("./feature_repo/data/city_wikipedia_summaries_with_embeddings.parquet")
+
+store = FeatureStore(repo_path=".")
+
+# Prepare a query vector
+question = "the most populous city in the U.S. state of Texas?"
+
+tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
+model = AutoModel.from_pretrained(MODEL)
+query_embedding = run_model(question, tokenizer, model)
+query = query_embedding.detach().cpu().numpy().tolist()[0]
+
+# Retrieve top k documents
+features = store.retrieve_online_documents(
+    feature="city_embeddings:Embeddings",
+    query=query,
+    top_k=3
+)
+```
+And running `features_df.head()` will show:
+
+```
+features_df.head()
+        Embeddings	                                        distance
+0	[0.11749928444623947, -0.04684492573142052, 0....	0.935567
+1	[0.10329511761665344, -0.07897591590881348, 0....	0.939936
+2	[0.11634305864572525, -0.10321836173534393, -0...	0.983343
+```
@@ -0,0 +1,49 @@
+from flask import (
+    Flask,
+    jsonify,
+    request,
+    render_template,
+)
+from flasgger import Swagger
+from datetime import datetime
+
+app = Flask(__name__)
+swagger = Swagger(app)
+
+
+@app.route("/get_documents")
+def get_documents():
+    """Example endpoint returning features by id
+    This is using docstrings for specifications.
+    ---
+    parameters:
+      - name: state
+        type: string
+        in: query
+        required: true
+        default: NJ
+    responses:
+      200:
+        description: A JSON of documents
+        schema:
+          id: Document ID
+          properties:
+            is_gt_18_years_old:
+              type: array
+              items:
+                schema:
+                  id: value
+                  type: number
+    """
+    question = request.form["question"]
+    documents = store.get_online_documents(query)
+    return render_template("documents.html", documents=documents)
+
+
+@app.route("/")
+def home():
+    return render_template("home.html")
+
+
+if __name__ == "__main__":
+    app.run(debug=True)