Skip to content

Commit b990d68

Browse files
authored
feat: upload testset generation upload (#1647)
1 parent fd44b4c commit b990d68

File tree

7 files changed

+51
-40
lines changed

7 files changed

+51
-40
lines changed

docs/getstarted/rag_evaluation.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ The dataset used here is from [Amnesty QA RAG](https://huggingface.co/datasets/e
88

99
```python
1010
from datasets import load_dataset
11-
dataset = load_dataset("explodinggradients/amnesty_qa","english_v3")
11+
dataset = load_dataset(
12+
"explodinggradients/amnesty_qa",
13+
"english_v3",
14+
trust_remote_code=True
15+
)
1216
```
1317

1418
Load the dataset into Ragas EvaluationDataset object.

src/ragas/evaluation.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
88
from langchain_core.embeddings import Embeddings as LangchainEmbeddings
99
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
10-
from llama_index.core.base.embeddings.base import BaseEmbedding as LlamaIndexEmbedding
11-
from llama_index.core.base.llms.base import BaseLLM as LlamaIndexLLM
1210

1311
from ragas._analytics import EvaluationEvent, track, track_was_completed
1412
from ragas.callbacks import ChainType, RagasTracer, new_group
@@ -21,14 +19,13 @@
2119
from ragas.embeddings.base import (
2220
BaseRagasEmbeddings,
2321
LangchainEmbeddingsWrapper,
24-
LlamaIndexEmbeddingsWrapper,
2522
embedding_factory,
2623
)
2724
from ragas.exceptions import ExceptionInRunner
2825
from ragas.executor import Executor
2926
from ragas.integrations.helicone import helicone_config
3027
from ragas.llms import llm_factory
31-
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper, LlamaIndexLLMWrapper
28+
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
3229
from ragas.metrics import AspectCritic
3330
from ragas.metrics._answer_correctness import AnswerCorrectness
3431
from ragas.metrics.base import (
@@ -59,10 +56,8 @@
5956
def evaluate(
6057
dataset: t.Union[Dataset, EvaluationDataset],
6158
metrics: t.Optional[t.Sequence[Metric]] = None,
62-
llm: t.Optional[BaseRagasLLM | LangchainLLM | LlamaIndexLLM] = None,
63-
embeddings: t.Optional[
64-
BaseRagasEmbeddings | LangchainEmbeddings | LlamaIndexEmbedding
65-
] = None,
59+
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
60+
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
6661
callbacks: Callbacks = None,
6762
in_ci: bool = False,
6863
run_config: RunConfig = RunConfig(),
@@ -187,12 +182,8 @@ def evaluate(
187182
# set the llm and embeddings
188183
if isinstance(llm, LangchainLLM):
189184
llm = LangchainLLMWrapper(llm, run_config=run_config)
190-
elif isinstance(llm, LlamaIndexLLM):
191-
llm = LlamaIndexLLMWrapper(llm, run_config=run_config)
192185
if isinstance(embeddings, LangchainEmbeddings):
193186
embeddings = LangchainEmbeddingsWrapper(embeddings)
194-
elif isinstance(embeddings, LlamaIndexEmbedding):
195-
embeddings = LlamaIndexEmbeddingsWrapper(embeddings)
196187

197188
# init llms and embeddings
198189
binary_metrics = []

src/ragas/metrics/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
"ContextRecall",
8080
"context_recall",
8181
"AspectCritic",
82+
"AspectCriticWithReference",
8283
"AnswerRelevancy",
8384
"answer_relevancy",
8485
"ContextEntityRecall",

src/ragas/testset/synthesizers/generate.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from ragas.cost import TokenUsageParser
1313
from ragas.embeddings.base import (
1414
BaseRagasEmbeddings,
15-
LangchainEmbeddingsWrapper,
1615
LlamaIndexEmbeddingsWrapper,
1716
)
1817
from ragas.executor import Executor
@@ -28,7 +27,6 @@
2827
if t.TYPE_CHECKING:
2928
from langchain_core.callbacks import Callbacks
3029
from langchain_core.documents import Document as LCDocument
31-
from langchain_core.embeddings.embeddings import Embeddings as LangchainEmbeddings
3230
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
3331
from llama_index.core.base.embeddings.base import (
3432
BaseEmbedding as LlamaIndexEmbedding,
@@ -55,22 +53,18 @@ class TestsetGenerator:
5553
----------
5654
llm : BaseRagasLLM
5755
The language model to use for the generation process.
58-
embedding_model: BaseRagasEmbeddings
59-
Embedding model for generation process.
6056
knowledge_graph : KnowledgeGraph, default empty
6157
The knowledge graph to use for the generation process.
6258
"""
6359

6460
llm: BaseRagasLLM
65-
embedding_model: BaseRagasEmbeddings
6661
knowledge_graph: KnowledgeGraph = field(default_factory=KnowledgeGraph)
6762
persona_list: t.Optional[t.List[Persona]] = None
6863

6964
@classmethod
7065
def from_langchain(
7166
cls,
7267
llm: LangchainLLM,
73-
embedding_model: LangchainEmbeddings,
7468
knowledge_graph: t.Optional[KnowledgeGraph] = None,
7569
) -> TestsetGenerator:
7670
"""
@@ -79,15 +73,13 @@ def from_langchain(
7973
knowledge_graph = knowledge_graph or KnowledgeGraph()
8074
return cls(
8175
LangchainLLMWrapper(llm),
82-
LangchainEmbeddingsWrapper(embedding_model),
8376
knowledge_graph,
8477
)
8578

8679
@classmethod
8780
def from_llama_index(
8881
cls,
8982
llm: LlamaIndexLLM,
90-
embedding_model: LlamaIndexEmbedding,
9183
knowledge_graph: t.Optional[KnowledgeGraph] = None,
9284
) -> TestsetGenerator:
9385
"""
@@ -96,7 +88,6 @@ def from_llama_index(
9688
knowledge_graph = knowledge_graph or KnowledgeGraph()
9789
return cls(
9890
LlamaIndexLLMWrapper(llm),
99-
LlamaIndexEmbeddingsWrapper(embedding_model),
10091
knowledge_graph,
10192
)
10293

@@ -157,17 +148,15 @@ def generate_with_langchain_docs(
157148
Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
158149
Alternatively you can provide your own transforms through the `transforms` parameter."""
159150
)
160-
if not self.embedding_model and not transforms_embedding_model:
151+
if not transforms_embedding_model:
161152
raise ValueError(
162-
"""An embedding client was not provided.
163-
Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
164-
Alternatively you can provide your own transforms through the `transforms` parameter."""
153+
"""An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."""
165154
)
166155

167156
if not transforms:
168157
transforms = default_transforms(
169158
llm=transforms_llm or self.llm,
170-
embedding_model=transforms_embedding_model or self.embedding_model,
159+
embedding_model=transforms_embedding_model,
171160
)
172161

173162
# convert the documents to Ragas nodes
@@ -221,22 +210,19 @@ def generate_with_llamaindex_docs(
221210
raise ValueError(
222211
"An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
223212
)
224-
if not self.embedding_model and not transforms_embedding_model:
213+
if not transforms_embedding_model:
225214
raise ValueError(
226-
"An embedding client was not provided. Provide an embedding model on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
215+
"An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
227216
)
228217

229218
if not transforms:
230219
if transforms_llm is None:
231220
llm_for_transforms = self.llm
232221
else:
233222
llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm)
234-
if transforms_embedding_model is None:
235-
embedding_model_for_transforms = self.embedding_model
236-
else:
237-
embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
238-
transforms_embedding_model
239-
)
223+
embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
224+
transforms_embedding_model
225+
)
240226
transforms = default_transforms(
241227
llm=llm_for_transforms,
242228
embedding_model=embedding_model_for_transforms,

src/ragas/testset/synthesizers/single_hop/specific.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ class SingleHopScenario(BaseScenario):
3838

3939
@dataclass
4040
class SingleHopSpecificQuerySynthesizer(SingleHopQuerySynthesizer):
41-
4241
name: str = "single_hop_specifc_query_synthesizer"
4342
theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt()
4443

@@ -71,6 +70,8 @@ async def _generate_scenarios(
7170
):
7271
nodes.append(node)
7372

73+
if len(nodes) == 0:
74+
raise ValueError("No nodes found with the `entities` property.")
7475
samples_per_node = int(np.ceil(n / len(nodes)))
7576

7677
scenarios = []

src/ragas/testset/synthesizers/testset_schema.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import typing as t
44
from dataclasses import dataclass, field
5+
from datetime import datetime
6+
from uuid import uuid4
7+
8+
from pydantic import BaseModel, Field
59

610
from ragas.cost import CostCallbackHandler, TokenUsage
711
from ragas.dataset_schema import (
@@ -11,6 +15,7 @@
1115
RagasDataset,
1216
SingleTurnSample,
1317
)
18+
from ragas.utils import RAGAS_API_URL
1419

1520

1621
class TestsetSample(BaseSample):
@@ -29,6 +34,16 @@ class TestsetSample(BaseSample):
2934
synthesizer_name: str
3035

3136

37+
class TestsetPacket(BaseModel):
38+
"""
39+
A packet of testset samples to be uploaded to the server.
40+
"""
41+
42+
samples: t.List[TestsetSample]
43+
run_id: str = Field(default_factory=lambda: str(uuid4()))
44+
created_at: str = Field(default_factory=lambda: datetime.now().isoformat())
45+
46+
3247
@dataclass
3348
class Testset(RagasDataset[TestsetSample]):
3449
"""
@@ -118,3 +133,18 @@ def total_cost(
118133
cost_per_input_token=cost_per_input_token,
119134
cost_per_output_token=cost_per_output_token,
120135
)
136+
137+
def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
138+
import requests
139+
140+
packet = TestsetPacket(samples=self.samples)
141+
response = requests.post(
142+
f"{base_url}/alignment/testset", json=packet.model_dump()
143+
)
144+
if response.status_code != 200:
145+
raise Exception(f"Failed to upload results: {response.text}")
146+
147+
testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}"
148+
if verbose:
149+
print(f"Testset uploaded! View at {testset_endpoint}")
150+
return testset_endpoint

src/ragas/testset/transforms/extractors/llm_based.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ class NERExtractor(LLMBasedExtractor):
263263
"""
264264

265265
property_name: str = "entities"
266-
prompt: NERPrompt = NERPrompt()
266+
prompt: PydanticPrompt[TextWithExtractionLimit, NEROutput] = NERPrompt()
267267
max_num_entities: int = 10
268268

269269
async def extract(self, node: Node) -> t.Tuple[str, t.List[str]]:
@@ -282,9 +282,7 @@ class TopicDescription(BaseModel):
282282

283283

284284
class TopicDescriptionPrompt(PydanticPrompt[StringIO, TopicDescription]):
285-
instruction: str = (
286-
"Provide a concise description of the main topic(s) discussed in the following text."
287-
)
285+
instruction: str = "Provide a concise description of the main topic(s) discussed in the following text."
288286
input_model: t.Type[StringIO] = StringIO
289287
output_model: t.Type[TopicDescription] = TopicDescription
290288
examples: t.List[t.Tuple[StringIO, TopicDescription]] = [

0 commit comments

Comments
 (0)