Skip to content

Commit 3834fe5

Browse files
Chore: add language to event tracking (#592)
Co-authored-by: jjmachan <[email protected]>
1 parent b667668 commit 3834fe5

File tree

6 files changed

+36
-6
lines changed

6 files changed

+36
-6
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
ragas: ${{ steps.filter.outputs.ragas }}
2424
docs: ${{ steps.filter.outputs.docs }}
2525
steps:
26-
- uses: actions/checkout@v3
27-
- uses: dorny/paths-filter@v2
26+
- uses: actions/checkout@v4
27+
- uses: dorny/paths-filter@v3
2828
id: filter
2929
with:
3030
base: "main"
@@ -59,7 +59,7 @@ jobs:
5959
runs-on: ${{ matrix.os }}
6060

6161
steps:
62-
- uses: actions/checkout@v3
62+
- uses: actions/checkout@v4
6363
with:
6464
fetch-depth: 0 # fetch all tags and branches
6565

@@ -108,7 +108,7 @@ jobs:
108108
if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }}
109109

110110
steps:
111-
- uses: actions/checkout@v3
111+
- uses: actions/checkout@v4
112112

113113
- name: Setup python
114114
uses: actions/setup-python@v4

src/ragas/_analytics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,14 @@ class EvaluationEvent(BaseEvent):
8787
metrics: t.List[str]
8888
evaluation_mode: str
8989
num_rows: int
90+
language: str
9091

9192

9293
class TesetGenerationEvent(BaseEvent):
9394
evolution_names: t.List[str]
9495
evolution_percentages: t.List[float]
9596
num_rows: int
97+
language: str
9698

9799

98100
@silent

src/ragas/evaluation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM
1919
from ragas.metrics.critique import AspectCritique
2020
from ragas.run_config import RunConfig
21+
from ragas.utils import get_feature_language
2122

2223
# from ragas.metrics.critique import AspectCritique
2324
from ragas.validation import (
@@ -249,12 +250,15 @@ def evaluate(
249250

250251
# log the evaluation event
251252
metrics_names = [m.name for m in metrics]
253+
metric_lang = [get_feature_language(m) for m in metrics]
254+
metric_lang = np.unique([m for m in metric_lang if m is not None])
252255
track(
253256
EvaluationEvent(
254257
event_type="evaluation",
255258
metrics=metrics_names,
256259
evaluation_mode="",
257260
num_rows=dataset.shape[0],
261+
language=metric_lang[0] if len(metric_lang) > 0 else "",
258262
)
259263
)
260264
return result

src/ragas/testset/generator.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
)
2929
from ragas.testset.extractor import KeyphraseExtractor
3030
from ragas.testset.filters import EvolutionFilter, NodeFilter, QuestionFilter
31-
from ragas.utils import check_if_sum_is_close, is_nan
31+
from ragas.utils import check_if_sum_is_close, get_feature_language, is_nan
3232

3333
if t.TYPE_CHECKING:
3434
from langchain_core.documents import Document as LCDocument
@@ -251,12 +251,15 @@ def generate(
251251
# due to failed evolutions. MaxRetriesExceeded is a common reason
252252
test_data_rows = [r for r in test_data_rows if not is_nan(r)]
253253
test_dataset = TestDataset(test_data=test_data_rows)
254+
evol_lang = [get_feature_language(e) for e in distributions]
255+
evol_lang = [e for e in evol_lang if e is not None]
254256
track(
255257
TesetGenerationEvent(
256258
event_type="testset_generation",
257259
evolution_names=[e.__class__.__name__.lower() for e in distributions],
258260
evolution_percentages=[distributions[e] for e in distributions],
259261
num_rows=len(test_dataset.test_data),
262+
language=evol_lang[0] if len(evol_lang) > 0 else "",
260263
)
261264
)
262265

src/ragas/utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
import numpy as np
99

10+
if t.TYPE_CHECKING:
11+
from ragas.metrics.base import Metric
12+
from ragas.testset.evolutions import Evolution
13+
1014
DEBUG_ENV_VAR = "RAGAS_DEBUG"
1115

1216

@@ -57,3 +61,14 @@ def is_nan(x):
5761
return np.isnan(x)
5862
except TypeError:
5963
return False
64+
65+
66+
def get_feature_language(feature: t.Union[Metric, Evolution]) -> t.Optional[str]:
67+
from ragas.llms.prompt import Prompt
68+
69+
languags = [
70+
value.language
71+
for _, value in vars(feature).items()
72+
if isinstance(value, Prompt)
73+
]
74+
return languags[0] if len(languags) > 0 else None

tests/unit/test_analytics.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,18 @@ def test_evaluation_event():
2121
from ragas._analytics import EvaluationEvent
2222

2323
evaluation_event = EvaluationEvent(
24-
event_type="evaluation", metrics=["harmfulness"], num_rows=1, evaluation_mode=""
24+
event_type="evaluation",
25+
metrics=["harmfulness"],
26+
num_rows=1,
27+
evaluation_mode="",
28+
language="english",
2529
)
2630

2731
payload = dict(evaluation_event)
2832
assert isinstance(payload.get("user_id"), str)
2933
assert isinstance(payload.get("evaluation_mode"), str)
3034
assert isinstance(payload.get("metrics"), list)
35+
assert isinstance(payload.get("language"), str)
3136

3237

3338
def setup_user_id_filepath(tmp_path, monkeypatch):
@@ -101,6 +106,7 @@ def test_testset_generation_tracking(monkeypatch):
101106
evolution_names=[e.__class__.__name__.lower() for e in distributions],
102107
evolution_percentages=[distributions[e] for e in distributions],
103108
num_rows=10,
109+
language="english",
104110
)
105111

106112
assert dict(testset_event_payload)["evolution_names"] == [

0 commit comments

Comments
 (0)