Skip to content

Commit ab5f6ac

Browse files
authored
docs: add openlayer integration (#827)
1 parent f22e903 commit ab5f6ac

File tree

2 files changed

+307
-0
lines changed

2 files changed

+307
-0
lines changed

docs/howtos/integrations/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ langsmith.ipynb
1212
ragas-arize.ipynb
1313
langfuse.ipynb
1414
athina.ipynb
15+
openlayer.ipynb
1516
zeno.ipynb
1617
tonic-validate.ipynb
1718
ragas_haystack.ipynb
Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "860c9e4b-dc7c-4f2e-8f60-96cccf61d43c",
6+
"metadata": {},
7+
"source": [
8+
"# Evaluating RAG pipelines with Openlayer and Ragas\n",
9+
"\n",
10+
"[Openlayer](https://www.openlayer.com/) is an evaluation tool that fits into your development and production pipelines to help you ship high-quality models with confidence.\n",
11+
"\n",
12+
"This notebook should be used together with [this blog post](https://www.openlayer.com/blog/post/evaluating-rag-pipelines-with-ragas-and-openlayer)."
13+
]
14+
},
15+
{
16+
"cell_type": "markdown",
17+
"id": "3ad3ed0c-e495-4078-ab95-a70fa6322ab1",
18+
"metadata": {},
19+
"source": [
20+
"## Pre-requisites"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"id": "7ded5103-b6ac-482e-9217-347f701333b4",
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"%%bash\n",
31+
"git clone https://huggingface.co/datasets/explodinggradients/prompt-engineering-papers"
32+
]
33+
},
34+
{
35+
"cell_type": "code",
36+
"execution_count": null,
37+
"id": "58f0951f-5de9-4eca-8b0c-e77d5ac99bad",
38+
"metadata": {},
39+
"outputs": [],
40+
"source": [
41+
"import os\n",
42+
"\n",
43+
"os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\""
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"id": "93b95703-0826-47b2-8b0b-e0f982b1e170",
49+
"metadata": {},
50+
"source": [
51+
"## Synthetic test data generation"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"id": "69cfc916-148a-4608-8eac-b75cc988b228",
58+
"metadata": {},
59+
"outputs": [],
60+
"source": [
61+
"from llama_index import SimpleDirectoryReader\n",
62+
"from ragas.testset.generator import TestsetGenerator\n",
63+
"from ragas.testset.evolutions import simple, reasoning, multi_context\n",
64+
"\n",
65+
"# load documents \n",
66+
"dir_path = \"./prompt-engineering-papers\"\n",
67+
"reader = SimpleDirectoryReader(dir_path,num_files_limit=2)\n",
68+
"documents = reader.load_data()\n",
69+
"\n",
70+
"# generator with openai models\n",
71+
"generator = TestsetGenerator.with_openai()\n",
72+
"\n",
73+
"# set question type distribution\n",
74+
"distribution = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}\n",
75+
"\n",
76+
"# generate testset\n",
77+
"testset = generator.generate_with_llamaindex_docs(documents, test_size=10, distributions=distribution)\n",
78+
"test_df = testset.to_pandas()\n",
79+
"test_df.head()"
80+
]
81+
},
82+
{
83+
"cell_type": "markdown",
84+
"id": "9c802981-892e-4fed-bb73-dede5540fc6c",
85+
"metadata": {},
86+
"source": [
87+
"## Building RAG"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"id": "72167cb6-bd8a-4d8b-a14c-142235f2ebe0",
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"import nest_asyncio\n",
98+
"from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
99+
"from llama_index.embeddings import OpenAIEmbedding\n",
100+
"\n",
101+
"\n",
102+
"nest_asyncio.apply()\n",
103+
"\n",
104+
"\n",
105+
"def build_query_engine(documents):\n",
106+
" vector_index = VectorStoreIndex.from_documents(\n",
107+
" documents, service_context=ServiceContext.from_defaults(chunk_size=512),\n",
108+
" embed_model=OpenAIEmbedding(),\n",
109+
" )\n",
110+
"\n",
111+
" query_engine = vector_index.as_query_engine(similarity_top_k=2)\n",
112+
" return query_engine\n"
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"execution_count": null,
118+
"id": "a5e47e5b-fa1a-4f07-b4a4-7493b1d58cc7",
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"query_engine = build_query_engine(documents)"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"id": "6469b8ef-f9a3-4fb0-887a-0b70bce59dc0",
129+
"metadata": {},
130+
"outputs": [],
131+
"source": [
132+
"def generate_single_response(query_engine, question):\n",
133+
" \n",
134+
" response = query_engine.query(question)\n",
135+
" return {\n",
136+
" \"answer\":response.response,\n",
137+
" \"contexts\":[c.node.get_content() for c in response.source_nodes]\n",
138+
" }"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": null,
144+
"id": "2123caed-a573-4e4e-bb60-41c15de6705f",
145+
"metadata": {},
146+
"outputs": [],
147+
"source": [
148+
"question = \"What are some strategies proposed to enhance the in-context learning capability of language models?\"\n",
149+
"generate_single_response(query_engine, question)"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": null,
155+
"id": "3c88035b-3383-44a6-bd8a-08a172f11a36",
156+
"metadata": {},
157+
"outputs": [],
158+
"source": [
159+
"from datasets import Dataset\n",
160+
"\n",
161+
"def generate_ragas_dataset(query_engine, test_df):\n",
162+
"\n",
163+
" test_questions = test_df[\"question\"].values\n",
164+
" responses = [generate_single_response(query_engine,q) for q in test_questions]\n",
165+
"\n",
166+
"\n",
167+
" dataset_dict = {\n",
168+
" \"question\": test_questions,\n",
169+
" \"answer\": [response[\"answer\"] for response in responses],\n",
170+
" \"contexts\":[response[\"contexts\"] for response in responses],\n",
171+
" \"ground_truth\":test_df[\"ground_truth\"].values.tolist()\n",
172+
" \n",
173+
" }\n",
174+
" ds = Dataset.from_dict(dataset_dict)\n",
175+
" return ds"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"id": "437368a5-3819-4ae1-b825-ad95664206ae",
182+
"metadata": {},
183+
"outputs": [],
184+
"source": [
185+
"ragas_dataset = generate_ragas_dataset(query_engine, test_df)\n",
186+
"ragas_df = ragas_dataset.to_pandas()"
187+
]
188+
},
189+
{
190+
"cell_type": "markdown",
191+
"id": "10702a1e-276d-45f9-9d81-2be1bd98ce3d",
192+
"metadata": {},
193+
"source": [
194+
"## Commit to Openlayer"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": null,
200+
"id": "ced5f583-b849-4aae-8397-2bd9006bb69f",
201+
"metadata": {},
202+
"outputs": [],
203+
"source": [
204+
"from openlayer.tasks import TaskType\n",
205+
"\n",
206+
"client = openlayer.OpenlayerClient(\"YOUR_OPENLAYER_API_KEY_HERE\")"
207+
]
208+
},
209+
{
210+
"cell_type": "code",
211+
"execution_count": null,
212+
"id": "15c6af02-c9bc-4368-82a1-43cf849446d3",
213+
"metadata": {},
214+
"outputs": [],
215+
"source": [
216+
"project = client.create_project(\n",
217+
" name=\"My-Rag-Project\",\n",
218+
" task_type=TaskType.LLM,\n",
219+
" description=\"Evaluating an LLM used for product development.\"\n",
220+
")"
221+
]
222+
},
223+
{
224+
"cell_type": "code",
225+
"execution_count": null,
226+
"id": "419f1392-4c44-4856-af5f-1bd04de1de7c",
227+
"metadata": {},
228+
"outputs": [],
229+
"source": [
230+
"validation_dataset_config = {\n",
231+
" \"contextColumnName\": \"contexts\",\n",
232+
" \"questionColumnName\":\"question\",\n",
233+
" \"inputVariableNames\": [\"question\"],\n",
234+
" \"label\": \"validation\",\n",
235+
" \"outputColumnName\": \"answer\",\n",
236+
" \"groundTruthColumnName\":\"ground_truth\"\n",
237+
"}\n",
238+
"project.add_dataframe(\n",
239+
" dataset_df=ragas_df,\n",
240+
" dataset_config=validation_dataset_config,\n",
241+
")"
242+
]
243+
},
244+
{
245+
"cell_type": "code",
246+
"execution_count": null,
247+
"id": "31c51305-2808-4cae-85c2-b261ca0d98c1",
248+
"metadata": {},
249+
"outputs": [],
250+
"source": [
251+
"model_config = {\n",
252+
" \"inputVariableNames\": [\"question\"],\n",
253+
" \"modelType\": \"shell\",\n",
254+
"\t\"metadata\": {\n",
255+
" \"top_k\": 2,\n",
256+
" \"chunk_size\": 512,\n",
257+
" \"embeddings\": \"OpenAI\"\n",
258+
"\t},\n",
259+
"}\n",
260+
"project.add_model(\n",
261+
"\tmodel_config=model_config\n",
262+
")"
263+
]
264+
},
265+
{
266+
"cell_type": "code",
267+
"execution_count": null,
268+
"id": "471643ba-5e5d-4500-9745-f0c355f744a1",
269+
"metadata": {},
270+
"outputs": [],
271+
"source": [
272+
"project.commit(\"Initial commit!\")\n",
273+
"project.push()"
274+
]
275+
},
276+
{
277+
"cell_type": "code",
278+
"execution_count": null,
279+
"id": "b602dbbc-cc60-48b5-9bab-ae684c61cbff",
280+
"metadata": {},
281+
"outputs": [],
282+
"source": []
283+
}
284+
],
285+
"metadata": {
286+
"kernelspec": {
287+
"display_name": "Python 3 (ipykernel)",
288+
"language": "python",
289+
"name": "python3"
290+
},
291+
"language_info": {
292+
"codemirror_mode": {
293+
"name": "ipython",
294+
"version": 3
295+
},
296+
"file_extension": ".py",
297+
"mimetype": "text/x-python",
298+
"name": "python",
299+
"nbconvert_exporter": "python",
300+
"pygments_lexer": "ipython3",
301+
"version": "3.9.18"
302+
}
303+
},
304+
"nbformat": 4,
305+
"nbformat_minor": 5
306+
}

0 commit comments

Comments
 (0)