Skip to content

Commit 407c2e0

Browse files
authored
feat: more features to prompt object (#1418)
- support for `generate_multiple()` - prompt supports name and language - callback handlers - output parser
1 parent 96cff7a commit 407c2e0

26 files changed

+618
-336
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ clean: ## Clean all generated files
2424
@cd $(GIT_ROOT)/docs && make clean
2525
@cd $(GIT_ROOT) || exit 1
2626
@find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
27-
run-ci: format lint type ## Running all CI checks
2827
test: ## Run tests
2928
@echo "Running tests..."
3029
@pytest --nbmake tests/unit $(shell if [ -n "$(k)" ]; then echo "-k $(k)"; fi)
3130
test-e2e: ## Run end2end tests
3231
echo "running end2end tests..."
3332
@pytest --nbmake tests/e2e -s
34-
33+
run-ci: format lint type test ## Running all CI checks
34+
3535
# Docs
3636
docsite: ## Build and serve documentation
3737
@echo "Generating reference pages..."

docs/howtos/customizations/metrics/modifying-prompts-metrics.ipynb

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
],
4343
"source": [
4444
"from ragas.metrics._simple_criteria import SimpleCriteriaScoreWithReference\n",
45-
"scorer = SimpleCriteriaScoreWithReference(name='random',definition=\"some definition\")\n",
45+
"\n",
46+
"scorer = SimpleCriteriaScoreWithReference(name=\"random\", definition=\"some definition\")\n",
4647
"scorer.get_prompts()"
4748
]
4849
},
@@ -62,7 +63,7 @@
6263
],
6364
"source": [
6465
"prompts = scorer.get_prompts()\n",
65-
"print(prompts['single_turn_prompt'].to_string())"
66+
"print(prompts[\"single_turn_prompt\"].to_string())"
6667
]
6768
},
6869
{
@@ -81,7 +82,7 @@
8182
"metadata": {},
8283
"outputs": [],
8384
"source": [
84-
"prompt = scorer.get_prompts()['single_turn_prompt']\n",
85+
"prompt = scorer.get_prompts()[\"single_turn_prompt\"]\n",
8586
"prompt.instruction += \"\\nOnly output valid JSON.\""
8687
]
8788
},
@@ -92,9 +93,7 @@
9293
"metadata": {},
9394
"outputs": [],
9495
"source": [
95-
"scorer.set_prompts(**{\n",
96-
" 'single_turn_prompt':prompt\n",
97-
"})"
96+
"scorer.set_prompts(**{\"single_turn_prompt\": prompt})"
9897
]
9998
},
10099
{
@@ -122,7 +121,7 @@
122121
}
123122
],
124123
"source": [
125-
"print(scorer.get_prompts()['single_turn_prompt'].instruction)"
124+
"print(scorer.get_prompts()[\"single_turn_prompt\"].instruction)"
126125
]
127126
},
128127
{
@@ -153,7 +152,7 @@
153152
}
154153
],
155154
"source": [
156-
"prompt = scorer.get_prompts()['single_turn_prompt']\n",
155+
"prompt = scorer.get_prompts()[\"single_turn_prompt\"]\n",
157156
"\n",
158157
"prompt.examples"
159158
]
@@ -165,7 +164,10 @@
165164
"metadata": {},
166165
"outputs": [],
167166
"source": [
168-
"from ragas.metrics._simple_criteria import SingleTurnSimpleCriteriaWithReferenceInput, SimpleCriteriaOutput"
167+
"from ragas.metrics._simple_criteria import (\n",
168+
" SingleTurnSimpleCriteriaWithReferenceInput,\n",
169+
" SimpleCriteriaOutput,\n",
170+
")"
169171
]
170172
},
171173
{
@@ -178,15 +180,15 @@
178180
"new_example = [\n",
179181
" (\n",
180182
" SingleTurnSimpleCriteriaWithReferenceInput(\n",
181-
" user_input='Who was the first president of the United States?',\n",
182-
" response='Thomas Jefferson was the first president of the United States.',\n",
183-
" criteria='Score responses in range of 0 (low) to 5 (high) based similarity with reference.',\n",
184-
" reference='George Washington was the first president of the United States.'\n",
183+
" user_input=\"Who was the first president of the United States?\",\n",
184+
" response=\"Thomas Jefferson was the first president of the United States.\",\n",
185+
" criteria=\"Score responses in range of 0 (low) to 5 (high) based similarity with reference.\",\n",
186+
" reference=\"George Washington was the first president of the United States.\",\n",
185187
" ),\n",
186188
" SimpleCriteriaOutput(\n",
187-
" reason='The response incorrectly states Thomas Jefferson instead of George Washington. While both are significant historical figures, the answer does not match the reference.',\n",
188-
" score=2\n",
189-
" )\n",
189+
" reason=\"The response incorrectly states Thomas Jefferson instead of George Washington. While both are significant historical figures, the answer does not match the reference.\",\n",
190+
" score=2,\n",
191+
" ),\n",
190192
" )\n",
191193
"]"
192194
]
@@ -208,9 +210,7 @@
208210
"metadata": {},
209211
"outputs": [],
210212
"source": [
211-
"scorer.set_prompts(**{\n",
212-
" 'single_turn_prompt':prompt\n",
213-
"})"
213+
"scorer.set_prompts(**{\"single_turn_prompt\": prompt})"
214214
]
215215
},
216216
{
@@ -228,7 +228,7 @@
228228
}
229229
],
230230
"source": [
231-
"print(scorer.get_prompts()['single_turn_prompt'].examples)"
231+
"print(scorer.get_prompts()[\"single_turn_prompt\"].examples)"
232232
]
233233
},
234234
{
@@ -246,7 +246,7 @@
246246
"metadata": {},
247247
"outputs": [],
248248
"source": [
249-
"scorer.get_prompts()['single_turn_prompt'].to_string()"
249+
"scorer.get_prompts()[\"single_turn_prompt\"].to_string()"
250250
]
251251
}
252252
],

docs/howtos/customizations/metrics/write_your_own_metric.ipynb

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,12 @@
7373
"from ragas.prompt import PydanticPrompt\n",
7474
"from pydantic import BaseModel, Field\n",
7575
"\n",
76+
"\n",
7677
"class RefusalInput(BaseModel):\n",
77-
" user_input:str = Field(description=\"the user request\")\n",
78+
" user_input: str = Field(description=\"the user request\")\n",
7879
" response: str = Field(description=\"response from AI\")\n",
7980
"\n",
81+
"\n",
8082
"class RefusalOutput(BaseModel):\n",
8183
" refusal: bool = Field(description=\"boolean indicating if request was refused\")\n",
8284
"\n",
@@ -87,18 +89,20 @@
8789
" output_model = RefusalOutput\n",
8890
" examples = [\n",
8991
" (\n",
90-
" RefusalInput(user_input=\"Can you tell me how to hack a system?\", \n",
91-
" response=\"I'm sorry, I can't assist with that.\"),\n",
92-
" RefusalOutput(refusal=True)\n",
92+
" RefusalInput(\n",
93+
" user_input=\"Can you tell me how to hack a system?\",\n",
94+
" response=\"I'm sorry, I can't assist with that.\",\n",
95+
" ),\n",
96+
" RefusalOutput(refusal=True),\n",
9397
" ),\n",
9498
" (\n",
95-
" RefusalInput(user_input=\"What's the weather like today?\", \n",
96-
" response=\"The weather is sunny with a high of 25°C.\"),\n",
97-
" RefusalOutput(refusal=False)\n",
98-
" )\n",
99-
" ]\n",
100-
"\n",
101-
" \n"
99+
" RefusalInput(\n",
100+
" user_input=\"What's the weather like today?\",\n",
101+
" response=\"The weather is sunny with a high of 25°C.\",\n",
102+
" ),\n",
103+
" RefusalOutput(refusal=False),\n",
104+
" ),\n",
105+
" ]"
102106
]
103107
},
104108
{
@@ -144,14 +148,22 @@
144148
"\n",
145149
" async def _single_turn_ascore(self, sample, callbacks):\n",
146150
"\n",
147-
" prompt_input = RefusalInput(user_input=sample.user_input, response=sample.response)\n",
148-
" prompt_response = await self.refusal_prompt.generate(data=prompt_input,llm=self.llm)\n",
151+
" prompt_input = RefusalInput(\n",
152+
" user_input=sample.user_input, response=sample.response\n",
153+
" )\n",
154+
" prompt_response = await self.refusal_prompt.generate(\n",
155+
" data=prompt_input, llm=self.llm\n",
156+
" )\n",
149157
" return int(prompt_response.refusal)\n",
150158
"\n",
151159
" async def _multi_turn_ascore(self, sample, callbacks):\n",
152160
"\n",
153161
" conversations = sample.user_input\n",
154-
" conversations = [message for message in conversations if isinstance(message, AIMessage) or isinstance(message, HumanMessage)]\n",
162+
" conversations = [\n",
163+
" message\n",
164+
" for message in conversations\n",
165+
" if isinstance(message, AIMessage) or isinstance(message, HumanMessage)\n",
166+
" ]\n",
155167
"\n",
156168
" grouped_messages = []\n",
157169
" for msg in conversations:\n",
@@ -160,24 +172,19 @@
160172
" elif isinstance(msg, AIMessage) and human_msg:\n",
161173
" grouped_messages.append((human_msg, msg))\n",
162174
" human_msg = None\n",
163-
" \n",
164175
"\n",
165176
" grouped_messages = [item for item in grouped_messages if item[0]]\n",
166177
" scores = []\n",
167178
" for turn in grouped_messages:\n",
168-
" prompt_input = RefusalInput(user_input=turn[0].content, response=turn[1].content)\n",
169-
" prompt_response = await self.refusal_prompt.generate(data=prompt_input,llm=self.llm)\n",
179+
" prompt_input = RefusalInput(\n",
180+
" user_input=turn[0].content, response=turn[1].content\n",
181+
" )\n",
182+
" prompt_response = await self.refusal_prompt.generate(\n",
183+
" data=prompt_input, llm=self.llm\n",
184+
" )\n",
170185
" scores.append(prompt_response.refusal)\n",
171186
"\n",
172-
" return sum(scores)\n",
173-
" \n",
174-
" \n",
175-
" \n",
176-
"\n",
177-
" \n",
178-
" \n",
179-
" \n",
180-
" "
187+
" return sum(scores)"
181188
]
182189
},
183190
{
@@ -255,21 +262,41 @@
255262
"metadata": {},
256263
"outputs": [],
257264
"source": [
258-
"sample = MultiTurnSample(user_input=[\n",
259-
" HumanMessage(content=\"Hey, book a table at the nearest best Chinese restaurant for 8:00pm\"),\n",
260-
" AIMessage(content=\"Sure, let me find the best options for you.\", tool_calls=[\n",
261-
" ToolCall(name=\"restaurant_search\", args={\"cuisine\": \"Chinese\", \"time\": \"8:00pm\"})\n",
262-
" ]),\n",
263-
" ToolMessage(content=\"Found a few options: 1. Golden Dragon, 2. Jade Palace\"),\n",
264-
" AIMessage(content=\"I found some great options: Golden Dragon and Jade Palace. Which one would you prefer?\"),\n",
265-
" HumanMessage(content=\"Let's go with Golden Dragon.\"),\n",
266-
" AIMessage(content=\"Great choice! I'll book a table for 8:00pm at Golden Dragon.\", tool_calls=[\n",
267-
" ToolCall(name=\"restaurant_book\", args={\"name\": \"Golden Dragon\", \"time\": \"8:00pm\"})\n",
268-
" ]),\n",
269-
" ToolMessage(content=\"Table booked at Golden Dragon for 8:00pm.\"),\n",
270-
" AIMessage(content=\"Your table at Golden Dragon is booked for 8:00pm. Enjoy your meal!\"),\n",
271-
" HumanMessage(content=\"thanks\"),\n",
272-
"])"
265+
"sample = MultiTurnSample(\n",
266+
" user_input=[\n",
267+
" HumanMessage(\n",
268+
" content=\"Hey, book a table at the nearest best Chinese restaurant for 8:00pm\"\n",
269+
" ),\n",
270+
" AIMessage(\n",
271+
" content=\"Sure, let me find the best options for you.\",\n",
272+
" tool_calls=[\n",
273+
" ToolCall(\n",
274+
" name=\"restaurant_search\",\n",
275+
" args={\"cuisine\": \"Chinese\", \"time\": \"8:00pm\"},\n",
276+
" )\n",
277+
" ],\n",
278+
" ),\n",
279+
" ToolMessage(content=\"Found a few options: 1. Golden Dragon, 2. Jade Palace\"),\n",
280+
" AIMessage(\n",
281+
" content=\"I found some great options: Golden Dragon and Jade Palace. Which one would you prefer?\"\n",
282+
" ),\n",
283+
" HumanMessage(content=\"Let's go with Golden Dragon.\"),\n",
284+
" AIMessage(\n",
285+
" content=\"Great choice! I'll book a table for 8:00pm at Golden Dragon.\",\n",
286+
" tool_calls=[\n",
287+
" ToolCall(\n",
288+
" name=\"restaurant_book\",\n",
289+
" args={\"name\": \"Golden Dragon\", \"time\": \"8:00pm\"},\n",
290+
" )\n",
291+
" ],\n",
292+
" ),\n",
293+
" ToolMessage(content=\"Table booked at Golden Dragon for 8:00pm.\"),\n",
294+
" AIMessage(\n",
295+
" content=\"Your table at Golden Dragon is booked for 8:00pm. Enjoy your meal!\"\n",
296+
" ),\n",
297+
" HumanMessage(content=\"thanks\"),\n",
298+
" ]\n",
299+
")"
273300
]
274301
},
275302
{

docs/howtos/customizations/run_config.ipynb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,11 @@
2929
"metadata": {},
3030
"outputs": [],
3131
"source": [
32-
"\n",
33-
"\n",
3432
"from ragas.run_config import RunConfig\n",
3533
"\n",
3634
"# increasing max_workers to 64 and timeout to 60 seconds\n",
3735
"\n",
38-
"my_run_config=RunConfig(max_workers=64, timeout=60) "
36+
"my_run_config = RunConfig(max_workers=64, timeout=60)"
3937
]
4038
},
4139
{
@@ -56,15 +54,15 @@
5654
"from datasets import load_dataset\n",
5755
"from ragas import evaluate\n",
5856
"\n",
59-
"dataset = load_dataset(\"explodinggradients/amnesty_qa\",\"english_v3\")\n",
57+
"dataset = load_dataset(\"explodinggradients/amnesty_qa\", \"english_v3\")\n",
6058
"\n",
6159
"samples = []\n",
62-
"for row in dataset['eval']:\n",
60+
"for row in dataset[\"eval\"]:\n",
6361
" sample = SingleTurnSample(\n",
64-
" user_input=row['user_input'],\n",
65-
" reference=row['reference'],\n",
66-
" response=row['response'],\n",
67-
" retrieved_contexts=row['retrieved_contexts']\n",
62+
" user_input=row[\"user_input\"],\n",
63+
" reference=row[\"reference\"],\n",
64+
" response=row[\"response\"],\n",
65+
" retrieved_contexts=row[\"retrieved_contexts\"],\n",
6866
" )\n",
6967
" samples.append(sample)\n",
7068
"\n",

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,7 @@ build-backend = "setuptools.build_meta"
5959
write_to = "src/ragas/_version.py"
6060

6161
[tool.pytest.ini_options]
62-
addopts = "-n 4"
63-
asyncio_default_fixture_loop_scope = "function"
62+
addopts = "-n 0"
63+
asyncio_default_fixture_loop_scope = "function"
64+
[pytest]
65+
testpaths = ["tests"]

src/ragas/exceptions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,15 @@ class ExceptionInRunner(RagasException):
1919
def __init__(self):
2020
msg = "The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead."
2121
super().__init__(msg)
22+
23+
24+
class RagasOutputParserException(RagasException):
25+
"""
26+
Exception raised when the output parser fails to parse the output.
27+
"""
28+
29+
def __init__(self, num_retries: int):
30+
msg = (
31+
f"The output parser failed to parse the output after {num_retries} retries."
32+
)
33+
super().__init__(msg)

src/ragas/integrations/langchain.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def __init__(self, metric: Metric, **kwargs: t.Any):
4848
t.cast(MetricWithLLM, self.metric).llm = LangchainLLMWrapper(llm)
4949
if isinstance(self.metric, MetricWithEmbeddings):
5050
embeddings = get_or_init(kwargs, "embeddings", OpenAIEmbeddings)
51-
t.cast(
52-
MetricWithEmbeddings, self.metric
53-
).embeddings = LangchainEmbeddingsWrapper(embeddings)
51+
t.cast(MetricWithEmbeddings, self.metric).embeddings = (
52+
LangchainEmbeddingsWrapper(embeddings)
53+
)
5454
self.metric.init(run_config)
5555

5656
assert isinstance(

src/ragas/llms/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def is_multiple_completion_supported(llm: BaseLanguageModel) -> bool:
4747
@dataclass
4848
class BaseRagasLLM(ABC):
4949
run_config: RunConfig = field(default_factory=RunConfig)
50+
multiple_completion_supported: bool = False
5051

5152
def set_run_config(self, run_config: RunConfig):
5253
self.run_config = run_config

src/ragas/llms/output_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def get_json_format_instructions(pydantic_object: t.Type[TBaseModel]) -> str:
5353
return resp
5454

5555

56-
class RagasoutputParser(PydanticOutputParser):
56+
class RagasOutputParserOld(PydanticOutputParser):
5757
async def aparse( # type: ignore
5858
self, result: str, prompt: PromptValue, llm: BaseRagasLLM, max_retries: int = 1
5959
):

0 commit comments

Comments
 (0)