Skip to content

Commit d536c5d

Browse files
authored
feat: string metrics migrated to collections (#2356)
1 parent 7a0db3a commit d536c5d

File tree

3 files changed

+676
-0
lines changed

3 files changed

+676
-0
lines changed

src/ragas/metrics/collections/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,21 @@
33
from ragas.metrics.collections._answer_relevancy import AnswerRelevancy
44
from ragas.metrics.collections._bleu_score import BleuScore
55
from ragas.metrics.collections._rouge_score import RougeScore
6+
from ragas.metrics.collections._string import (
7+
DistanceMeasure,
8+
ExactMatch,
9+
NonLLMStringSimilarity,
10+
StringPresence,
11+
)
612
from ragas.metrics.collections.base import BaseMetric
713

814
__all__ = [
915
"BaseMetric", # Base class
1016
"AnswerRelevancy",
1117
"BleuScore",
18+
"DistanceMeasure",
19+
"ExactMatch",
20+
"NonLLMStringSimilarity",
1221
"RougeScore",
22+
"StringPresence",
1323
]
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
"""String-based metrics v2 - Class-based implementations with automatic validation."""
2+
3+
from enum import Enum
4+
5+
from ragas.metrics.collections.base import BaseMetric
6+
from ragas.metrics.result import MetricResult
7+
8+
9+
class DistanceMeasure(Enum):
10+
LEVENSHTEIN = "levenshtein"
11+
HAMMING = "hamming"
12+
JARO = "jaro"
13+
JARO_WINKLER = "jaro_winkler"
14+
15+
16+
class ExactMatch(BaseMetric):
17+
"""
18+
Check if reference and response are exactly identical.
19+
20+
This implementation provides automatic validation and pure async design
21+
without requiring LLM or embedding components.
22+
23+
Usage:
24+
>>> from ragas.metrics.collections import ExactMatch
25+
>>>
26+
>>> metric = ExactMatch()
27+
>>>
28+
>>> result = await metric.ascore(
29+
... reference="Hello World",
30+
... response="Hello World"
31+
... )
32+
>>> print(f"Score: {result.value}") # 1.0
33+
>>>
34+
>>> results = await metric.abatch_score([
35+
... {"reference": "Text 1", "response": "Text 1"},
36+
... {"reference": "Text 2", "response": "Different"},
37+
... ])
38+
39+
Attributes:
40+
name: The metric name
41+
allowed_values: Score range (0.0 to 1.0)
42+
"""
43+
44+
def __init__(
45+
self,
46+
name: str = "exact_match",
47+
**base_kwargs,
48+
):
49+
"""Initialize ExactMatch metric."""
50+
super().__init__(name=name, **base_kwargs)
51+
52+
async def ascore(
53+
self,
54+
reference: str,
55+
response: str,
56+
) -> MetricResult:
57+
"""
58+
Check if reference and response match exactly.
59+
60+
Args:
61+
reference: The reference/ground truth text
62+
response: The response text to evaluate
63+
64+
Returns:
65+
MetricResult with 1.0 if exact match, 0.0 otherwise
66+
"""
67+
score = float(reference == response)
68+
return MetricResult(value=score)
69+
70+
71+
class StringPresence(BaseMetric):
72+
"""
73+
Check if reference string is present in the response.
74+
75+
This implementation provides automatic validation and pure async design
76+
without requiring LLM or embedding components.
77+
78+
Usage:
79+
>>> from ragas.metrics.collections import StringPresence
80+
>>>
81+
>>> metric = StringPresence()
82+
>>>
83+
>>> result = await metric.ascore(
84+
... reference="Paris",
85+
... response="The capital of France is Paris."
86+
... )
87+
>>> print(f"Score: {result.value}") # 1.0
88+
>>>
89+
>>> results = await metric.abatch_score([
90+
... {"reference": "cat", "response": "The cat sat on the mat"},
91+
... {"reference": "dog", "response": "The cat sat on the mat"},
92+
... ])
93+
94+
Attributes:
95+
name: The metric name
96+
allowed_values: Score range (0.0 to 1.0)
97+
"""
98+
99+
def __init__(
100+
self,
101+
name: str = "string_present",
102+
**base_kwargs,
103+
):
104+
"""Initialize StringPresence metric."""
105+
super().__init__(name=name, **base_kwargs)
106+
107+
async def ascore(
108+
self,
109+
reference: str,
110+
response: str,
111+
) -> MetricResult:
112+
"""
113+
Check if reference is present in response.
114+
115+
Args:
116+
reference: The reference string to search for
117+
response: The response text to search in
118+
119+
Returns:
120+
MetricResult with 1.0 if reference is in response, 0.0 otherwise
121+
"""
122+
assert isinstance(reference, str), (
123+
"StringPresence expects a valid reference string"
124+
)
125+
assert isinstance(response, str), (
126+
"StringPresence expects a valid response string"
127+
)
128+
129+
score = float(reference in response)
130+
return MetricResult(value=score)
131+
132+
133+
class NonLLMStringSimilarity(BaseMetric):
134+
"""
135+
Calculate string similarity between reference and response using various distance measures.
136+
137+
This implementation provides automatic validation and pure async design
138+
without requiring LLM or embedding components. Uses rapidfuzz library.
139+
140+
Usage:
141+
>>> from ragas.metrics.collections import NonLLMStringSimilarity, DistanceMeasure
142+
>>>
143+
>>> metric = NonLLMStringSimilarity(distance_measure=DistanceMeasure.LEVENSHTEIN)
144+
>>>
145+
>>> result = await metric.ascore(
146+
... reference="The capital of France is Paris.",
147+
... response="Paris is the capital of France."
148+
... )
149+
>>> print(f"Score: {result.value}")
150+
>>>
151+
>>> results = await metric.abatch_score([
152+
... {"reference": "Text 1", "response": "Response 1"},
153+
... {"reference": "Text 2", "response": "Response 2"},
154+
... ])
155+
156+
Attributes:
157+
name: The metric name
158+
distance_measure: The distance measure to use (default: LEVENSHTEIN)
159+
allowed_values: Score range (0.0 to 1.0)
160+
"""
161+
162+
def __init__(
163+
self,
164+
name: str = "non_llm_string_similarity",
165+
distance_measure: DistanceMeasure = DistanceMeasure.LEVENSHTEIN,
166+
**base_kwargs,
167+
):
168+
"""Initialize NonLLMStringSimilarity metric."""
169+
super().__init__(name=name, **base_kwargs)
170+
self.distance_measure = distance_measure
171+
172+
try:
173+
from rapidfuzz import distance
174+
except ImportError:
175+
raise ImportError(
176+
"rapidfuzz is required for string distance. "
177+
"Please install it using `pip install rapidfuzz`"
178+
)
179+
180+
self.distance_measure_map = {
181+
DistanceMeasure.LEVENSHTEIN: distance.Levenshtein,
182+
DistanceMeasure.HAMMING: distance.Hamming,
183+
DistanceMeasure.JARO: distance.Jaro,
184+
DistanceMeasure.JARO_WINKLER: distance.JaroWinkler,
185+
}
186+
187+
async def ascore(
188+
self,
189+
reference: str,
190+
response: str,
191+
) -> MetricResult:
192+
"""
193+
Calculate string similarity score asynchronously.
194+
195+
Args:
196+
reference: The reference/ground truth text
197+
response: The response text to evaluate
198+
199+
Returns:
200+
MetricResult with similarity score (0.0-1.0)
201+
"""
202+
assert isinstance(reference, str), (
203+
"NonLLMStringSimilarity expects a valid reference string"
204+
)
205+
assert isinstance(response, str), (
206+
"NonLLMStringSimilarity expects a valid response string"
207+
)
208+
209+
score = 1 - self.distance_measure_map[
210+
self.distance_measure
211+
].normalized_distance(reference, response)
212+
213+
assert isinstance(score, float), "Expecting a float"
214+
return MetricResult(value=float(score))

0 commit comments

Comments
 (0)