Skip to content

Commit fce8266

Browse files
thekaranacharyazsimjee
authored andcommitted
Add LlamaIndex example with GuardrailsOutputParser
1 parent 59057f0 commit fce8266

File tree

2 files changed

+378
-0
lines changed

2 files changed

+378
-0
lines changed
Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
6+
"metadata": {},
7+
"source": [
8+
"# Using `GuardrailsOutputParser` in `LlamaIndex`\n"
9+
]
10+
},
11+
{
12+
"attachments": {},
13+
"cell_type": "markdown",
14+
"id": "cf54a5a8",
15+
"metadata": {},
16+
"source": [
17+
"If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.\n"
18+
]
19+
},
20+
{
21+
"attachments": {},
22+
"cell_type": "markdown",
23+
"id": "39bc790a",
24+
"metadata": {},
25+
"source": [
26+
"#### Download Data\n"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": null,
32+
"id": "649bea0c",
33+
"metadata": {},
34+
"outputs": [],
35+
"source": [
36+
"!mkdir -p 'data/paul_graham/'\n",
37+
"!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
43+
"metadata": {},
44+
"source": [
45+
"#### Load documents, build the VectorStoreIndex\n"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"import logging\n",
56+
"import sys\n",
57+
"\n",
58+
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
59+
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
60+
"\n",
61+
"from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
62+
"from IPython.display import Markdown, display\n",
63+
"\n",
64+
"import openai\n",
65+
"\n",
66+
"openai.api_key = \"<YOUR_OPENAI_API_KEY>\""
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
73+
"metadata": {},
74+
"outputs": [],
75+
"source": [
76+
"# load documents\n",
77+
"documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": null,
83+
"id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
84+
"metadata": {},
85+
"outputs": [
86+
{
87+
"name": "stdout",
88+
"output_type": "stream",
89+
"text": [
90+
"INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
91+
"> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
92+
"INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 18579 tokens\n",
93+
"> [build_index_from_documents] Total embedding token usage: 18579 tokens\n"
94+
]
95+
}
96+
],
97+
"source": [
98+
"index = VectorStoreIndex.from_documents(documents, chunk_size=512)"
99+
]
100+
},
101+
{
102+
"cell_type": "markdown",
103+
"id": "8b7d7c61-b5d7-4b8f-b90b-3ebee1103f27",
104+
"metadata": {},
105+
"source": [
106+
"#### Define Query + Guardrails Spec\n"
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": null,
112+
"id": "6fb88295-0840-4e2d-b79b-def0b0a63a7f",
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"from llama_index.output_parsers import GuardrailsOutputParser\n",
117+
"from llama_index.llm_predictor import StructuredLLMPredictor"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": null,
123+
"id": "057139d2-09e8-4b8d-83a1-a2356a1475a8",
124+
"metadata": {},
125+
"outputs": [],
126+
"source": [
127+
"llm_predictor = StructuredLLMPredictor()"
128+
]
129+
},
130+
{
131+
"cell_type": "markdown",
132+
"id": "bc25edf7-9343-4e82-a3f1-eec4281a9371",
133+
"metadata": {},
134+
"source": [
135+
"**Define custom QA and Refine Prompts**\n"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"id": "2833d086-d240-4798-b3c5-a83ac4593b0e",
142+
"metadata": {},
143+
"outputs": [],
144+
"source": [
145+
"from llama_index.prompts import PromptTemplate\n",
146+
"from llama_index.prompts.default_prompts import (\n",
147+
" DEFAULT_TEXT_QA_PROMPT_TMPL,\n",
148+
" DEFAULT_REFINE_PROMPT_TMPL,\n",
149+
")"
150+
]
151+
},
152+
{
153+
"cell_type": "markdown",
154+
"id": "dba8513e",
155+
"metadata": {},
156+
"source": [
157+
"**Define Guardrails Spec**\n"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": null,
163+
"id": "a4b9201d-fe16-4cc0-8135-a08d9928625d",
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"# You can either define a RailSpec and initialise a Guard object from_rail_string()\n",
168+
"# OR define Pydantic classes and initialise a Guard object from_pydantic()\n",
169+
"# For more info: https://docs.guardrailsai.com/defining_guards/pydantic/\n",
170+
"# Guardrails recommends Pydantic\n",
171+
"\n",
172+
"from pydantic import BaseModel, Field\n",
173+
"from typing import List\n",
174+
"import guardrails as gd\n",
175+
"\n",
176+
"\n",
177+
"class Point(BaseModel):\n",
178+
" # In all the fields below, you can define validators as well\n",
179+
" # Left out for brevity\n",
180+
" explanation: str = Field()\n",
181+
" explanation2: str = Field()\n",
182+
" explanation3: str = Field()\n",
183+
"\n",
184+
"\n",
185+
"class BulletPoints(BaseModel):\n",
186+
" points: List[Point] = Field(\n",
187+
" description=\"Bullet points regarding events in the author's life.\"\n",
188+
" )\n",
189+
"\n",
190+
"\n",
191+
"# Define the prompt\n",
192+
"prompt = \"\"\"\n",
193+
"Query string here.\n",
194+
"\n",
195+
"${gr.xml_prefix_prompt}\n",
196+
"\n",
197+
"${output_schema}\n",
198+
"\n",
199+
"${gr.json_suffix_prompt_v2_wo_none}\n",
200+
"\"\"\""
201+
]
202+
},
203+
{
204+
"cell_type": "code",
205+
"execution_count": null,
206+
"id": "f7af4ebf-1dff-48ec-9fb7-8926af45b6a0",
207+
"metadata": {},
208+
"outputs": [],
209+
"source": [
210+
"# Create a guard object\n",
211+
"guard = gd.Guard.from_pydantic(output_class=BulletPoints, prompt=prompt)\n",
212+
"\n",
213+
"# Create output parse object\n",
214+
"output_parser = GuardrailsOutputParser(guard, llm=llm_predictor.llm)"
215+
]
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": null,
220+
"id": "a9b440d4-6fb4-46e6-973f-44207b432d3f",
221+
"metadata": {},
222+
"outputs": [],
223+
"source": [
224+
"# NOTE: we use the same output parser for both prompts, though you can choose to use different parsers\n",
225+
"# NOTE: here we add formatting instructions to the prompts.\n",
226+
"\n",
227+
"fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n",
228+
"fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)\n",
229+
"\n",
230+
"qa_prompt = PromptTemplate(fmt_qa_tmpl, output_parser=output_parser)\n",
231+
"refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)"
232+
]
233+
},
234+
{
235+
"cell_type": "code",
236+
"execution_count": null,
237+
"id": "1ba18a80-35f4-4fd4-9b13-9f13f84db4fe",
238+
"metadata": {},
239+
"outputs": [
240+
{
241+
"name": "stdout",
242+
"output_type": "stream",
243+
"text": [
244+
"Context information is below.\n",
245+
"---------------------\n",
246+
"{context_str}\n",
247+
"---------------------\n",
248+
"Given the context information and not prior knowledge, answer the query.\n",
249+
"Query: {query_str}\n",
250+
"Answer: \n",
251+
"\n",
252+
"\n",
253+
"Given below is XML that describes the information to extract from this document and the tags to extract it into.\n",
254+
"\n",
255+
"\n",
256+
"<output>\n",
257+
" <list name=\"points\" description=\"Bullet points regarding events in the author's life.\">\n",
258+
" <object>\n",
259+
" <string name=\"explanation\"/>\n",
260+
" <string name=\"explanation2\"/>\n",
261+
" <string name=\"explanation3\"/>\n",
262+
" </object>\n",
263+
" </list>\n",
264+
"</output>\n",
265+
"\n",
266+
"\n",
267+
"\n",
268+
"ONLY return a valid JSON object (no other text is necessary). The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise.\n",
269+
"\n",
270+
"\n"
271+
]
272+
}
273+
],
274+
"source": [
275+
"# take a look at the new QA template!\n",
276+
"print(fmt_qa_tmpl)"
277+
]
278+
},
279+
{
280+
"cell_type": "markdown",
281+
"id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
282+
"metadata": {},
283+
"source": [
284+
"#### Query Index\n"
285+
]
286+
},
287+
{
288+
"cell_type": "code",
289+
"execution_count": null,
290+
"id": "fb9cdf43-0f31-4c36-869b-df9fa50aebdb",
291+
"metadata": {},
292+
"outputs": [
293+
{
294+
"name": "stdout",
295+
"output_type": "stream",
296+
"text": [
297+
"INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 754 tokens\n",
298+
"> [query] Total LLM token usage: 754 tokens\n",
299+
"INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 11 tokens\n",
300+
"> [query] Total embedding token usage: 11 tokens\n"
301+
]
302+
}
303+
],
304+
"source": [
305+
"query_engine = index.as_query_engine(\n",
306+
" text_qa_template=qa_prompt,\n",
307+
" refine_template=refine_prompt,\n",
308+
" llm_predictor=llm_predictor,\n",
309+
")\n",
310+
"response = query_engine.query(\n",
311+
" \"What are the three items the author did growing up?\",\n",
312+
")"
313+
]
314+
},
315+
{
316+
"cell_type": "code",
317+
"execution_count": null,
318+
"id": "bc7760b6-5be3-4303-b97e-3f5edacf674b",
319+
"metadata": {},
320+
"outputs": [
321+
{
322+
"name": "stdout",
323+
"output_type": "stream",
324+
"text": [
325+
"{\n",
326+
" \"output\": {\n",
327+
" \"list\": {\n",
328+
" \"name\": \"points\",\n",
329+
" \"description\": \"Bullet points regarding events in the author's life.\",\n",
330+
" \"object\": {\n",
331+
" \"string\": [\n",
332+
" {\n",
333+
" \"name\": \"explanation\",\n",
334+
" \"content\": \"Writing short stories\"\n",
335+
" },\n",
336+
" {\n",
337+
" \"name\": \"explanation2\",\n",
338+
" \"content\": \"Programming on the IBM 1401\"\n",
339+
" },\n",
340+
" {\n",
341+
" \"name\": \"explanation3\",\n",
342+
" \"content\": \"Building a microcomputer\"\n",
343+
" }\n",
344+
" ]\n",
345+
" }\n",
346+
" }\n",
347+
" }\n",
348+
"}\n"
349+
]
350+
}
351+
],
352+
"source": [
353+
"print(response)"
354+
]
355+
}
356+
],
357+
"metadata": {
358+
"kernelspec": {
359+
"display_name": "Python 3 (ipykernel)",
360+
"language": "python",
361+
"name": "python3"
362+
},
363+
"language_info": {
364+
"codemirror_mode": {
365+
"name": "ipython",
366+
"version": 3
367+
},
368+
"file_extension": ".py",
369+
"mimetype": "text/x-python",
370+
"name": "python",
371+
"nbconvert_exporter": "python",
372+
"pygments_lexer": "ipython3"
373+
}
374+
},
375+
"nbformat": 4,
376+
"nbformat_minor": 5
377+
}

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ nav:
5959
- 'Check key info present in generated summary': examples/text_summarization_quality.ipynb
6060
- 'Detect and limit hallucinations in generated text': examples/provenance.ipynb
6161
- 'Check whether a value is similar to a set of other values': examples/value_within_distribution.ipynb
62+
- 'Using GuardrailsOutputParser in LlamaIndex': examples/llamaindex-output-parsing.ipynb
6263
- 'Integrations':
6364
- 'Azure OpenAI': integrations/azure_openai.ipynb
6465
- 'OpenAI Functions': integrations/openai_functions.ipynb

0 commit comments

Comments
 (0)