Skip to content

Commit 839e07b

Browse files
updated
Signed-off-by: Francisco Javier Arceo <[email protected]>
1 parent 7422a9c commit 839e07b

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

module_4_rag/README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,43 @@ flowchart TD;
5252
C[Materialize Online] --> D[Retrieval Augmented Generation];
5353
```
5454

55+
# Results
56+
57+
The simple demo shows the code below with the retrieved data shown.
58+
59+
```python
60+
import pandas as pd
61+
62+
from feast import FeatureStore
63+
from batch_score_documents import run_model, TOKENIZER, MODEL
64+
from transformers import AutoTokenizer, AutoModel
65+
66+
df = pd.read_parquet("./feature_repo/data/city_wikipedia_summaries_with_embeddings.parquet")
67+
68+
store = FeatureStore(repo_path=".")
69+
70+
# Prepare a query vector
71+
question = "the most populous city in the U.S. state of Texas?"
72+
73+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
74+
model = AutoModel.from_pretrained(MODEL)
75+
query_embedding = run_model(question, tokenizer, model)
76+
query = query_embedding.detach().cpu().numpy().tolist()[0]
77+
78+
# Retrieve top k documents
79+
features = store.retrieve_online_documents(
80+
feature="city_embeddings:Embeddings",
81+
query=query,
82+
top_k=3
83+
)
84+
```
85+
And running `features_df` will show:
86+
87+
```
88+
$features_df
89+
90+
Embeddings distance
91+
0 [0.11749928444623947, -0.04684492573142052, 0.... 0.935567
92+
1 [0.10329511761665344, -0.07897591590881348, 0.... 0.939936
93+
2 [0.11634305864572525, -0.10321836173534393, -0... 0.983343
94+
```

module_4_rag/module_4.ipynb

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@
3838
"metadata": {},
3939
"outputs": [],
4040
"source": [
41+
"import os\n",
4142
"import pandas as pd\n",
42-
"import warnings\n",
43+
"from feast import FeatureStore\n",
4344
"\n",
4445
"from batch_score_documents import run_model, TOKENIZER, MODEL\n",
4546
"from transformers import AutoTokenizer, AutoModel"
@@ -164,15 +165,6 @@
164165
"df.head()"
165166
]
166167
},
167-
{
168-
"cell_type": "code",
169-
"execution_count": 4,
170-
"metadata": {},
171-
"outputs": [],
172-
"source": [
173-
"import os"
174-
]
175-
},
176168
{
177169
"cell_type": "code",
178170
"execution_count": 5,
@@ -380,7 +372,6 @@
380372
}
381373
],
382374
"source": [
383-
"from feast import FeatureStore\n",
384375
"store = FeatureStore(repo_path=\".\")"
385376
]
386377
},

0 commit comments

Comments
 (0)