Skip to content

Commit 5e69e30

Browse files
committed
utils to test with fiqa
1 parent bc3b19f commit 5e69e30

File tree

3 files changed

+423
-0
lines changed

3 files changed

+423
-0
lines changed

tests/e2e/test_dataset_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,39 @@
3030
},
3131
]
3232

33+
# Sample data structure matching the fiqa dataset
34+
SAMPLE_FIQA_DATA = [
35+
{
36+
"user_input": "How to deposit a cheque issued to an associate in my business account?",
37+
"reference": "Have the check reissued to the proper payee. Just have the associate sign the back and then deposit it. It's called a third party cheque and is perfectly legal. I wouldn't be surprised if it has a longer hold period and, as always, you don't get the money if the cheque doesn't clear.",
38+
"response": "The best way to deposit a cheque issued to an associate in your business account is to have the associate sign the back of the cheque and deposit it as a third party cheque.",
39+
"retrieved_contexts": [
40+
"Just have the associate sign the back and then deposit it. It's called a third party cheque and is perfectly legal.",
41+
"I wouldn't be surprised if it has a longer hold period and, as always, you don't get the money if the cheque doesn't clear.",
42+
],
43+
},
44+
{
45+
"user_input": "What is the difference between a mutual fund and an ETF?",
46+
"reference": "Mutual funds are actively managed investment vehicles that pool money from multiple investors. ETFs are passively managed and trade on exchanges like stocks. ETFs typically have lower fees and can be bought and sold throughout the trading day.",
47+
"response": "A mutual fund pools money from investors and is actively managed, while an ETF trades like a stock and typically tracks an index with lower fees.",
48+
"retrieved_contexts": [
49+
"Mutual funds pool money from multiple investors and are actively managed by professional fund managers.",
50+
"ETFs trade on exchanges like stocks and can be bought and sold throughout the trading day.",
51+
"ETFs typically have lower expense ratios compared to mutual funds.",
52+
],
53+
},
54+
{
55+
"user_input": "Should I pay off my mortgage early or invest the money?",
56+
"reference": "It depends on your mortgage interest rate and expected investment returns. If your mortgage rate is low and you expect higher returns from investments, investing may be better. Consider your risk tolerance and financial goals.",
57+
"response": "The decision depends on comparing your mortgage interest rate to expected investment returns, along with your risk tolerance and financial security needs.",
58+
"retrieved_contexts": [
59+
"Compare your mortgage interest rate to expected investment returns to make an informed decision.",
60+
"Consider your risk tolerance and overall financial situation before making this decision.",
61+
"Having no mortgage provides peace of mind and guaranteed savings equal to the interest rate.",
62+
],
63+
},
64+
]
65+
3366

3467
def load_amnesty_dataset_safe(config: str = "english_v3"):
3568
"""
@@ -54,3 +87,28 @@ def load_amnesty_dataset_safe(config: str = "english_v3"):
5487
local_dataset = Dataset.from_list(SAMPLE_AMNESTY_DATA)
5588
logger.info(f"Created local dataset with {len(local_dataset)} samples")
5689
return local_dataset
90+
91+
92+
def load_fiqa_dataset_safe(config: str = "ragas_eval_v3"):
93+
"""
94+
Safely load the fiqa dataset, falling back to local data if remote fails.
95+
96+
Args:
97+
config: Dataset configuration name (default: "ragas_eval_v3" - recommended)
98+
99+
Returns:
100+
Dataset: The loaded dataset
101+
"""
102+
try:
103+
logger.info(f"Attempting to load fiqa dataset with config '{config}'")
104+
dataset = load_dataset("explodinggradients/fiqa", config)["baseline"]
105+
logger.info(f"Successfully loaded dataset with {len(dataset)} samples")
106+
return dataset
107+
except Exception as e:
108+
logger.warning(f"Failed to load remote dataset: {e}")
109+
logger.info("Using local sample data as fallback")
110+
111+
# Create a local dataset from sample data
112+
local_dataset = Dataset.from_list(SAMPLE_FIQA_DATA)
113+
logger.info(f"Created local dataset with {len(local_dataset)} samples")
114+
return local_dataset

tests/utils/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,25 @@
1111
create_modern_embeddings,
1212
create_modern_llm,
1313
)
14+
from .metric_comparison import (
15+
MetricDiffResult,
16+
compare_metrics,
17+
export_comparison_results,
18+
run_metric_on_dataset,
19+
run_metric_on_dataset_with_batching,
20+
)
1421

1522
__all__ = [
23+
# LLM and embeddings setup
1624
"check_api_key",
1725
"create_legacy_llm",
1826
"create_modern_llm",
1927
"create_legacy_embeddings",
2028
"create_modern_embeddings",
29+
# Metric comparison utilities
30+
"MetricDiffResult",
31+
"compare_metrics",
32+
"export_comparison_results",
33+
"run_metric_on_dataset",
34+
"run_metric_on_dataset_with_batching",
2135
]

0 commit comments

Comments
 (0)