3535 MetricWithLLM ,
3636 MultiTurnMetric ,
3737 SingleTurnMetric ,
38- is_reproducable ,
3938)
4039from ragas .run_config import RunConfig
4140from ragas .utils import convert_v1_to_v2_dataset
@@ -60,7 +59,6 @@ def evaluate(
6059 llm : t .Optional [BaseRagasLLM | LangchainLLM ] = None ,
6160 embeddings : t .Optional [BaseRagasEmbeddings | LangchainEmbeddings ] = None ,
6261 callbacks : Callbacks = None ,
63- in_ci : bool = False ,
6462 run_config : t .Optional [RunConfig ] = None ,
6563 token_usage_parser : t .Optional [TokenUsageParser ] = None ,
6664 raise_exceptions : bool = False ,
@@ -93,10 +91,6 @@ def evaluate(
9391 Lifecycle Langchain Callbacks to run during evaluation. Check the
9492 [langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
9593 for more information.
96- in_ci: bool
97- Whether the evaluation is running in CI or not. If set to True then some
98- metrics will be run to increase the reproducability of the evaluations. This
99- will increase the runtime and cost of evaluations. Default is False.
10094 run_config: RunConfig, optional
10195 Configuration for runtime settings like timeout and retries. If not provided,
10296 default values are used.
@@ -193,7 +187,6 @@ def evaluate(
193187 binary_metrics = []
194188 llm_changed : t .List [int ] = []
195189 embeddings_changed : t .List [int ] = []
196- reproducable_metrics : t .List [int ] = []
197190 answer_correctness_is_set = - 1
198191
199192 # loop through the metrics and perform initializations
@@ -214,12 +207,6 @@ def evaluate(
214207 if isinstance (metric , AnswerCorrectness ):
215208 if metric .answer_similarity is None :
216209 answer_correctness_is_set = i
217- # set reproducibility for metrics if in CI
218- if in_ci and is_reproducable (metric ):
219- if metric .reproducibility == 1 : # type: ignore
220- # only set a value if not already set
221- metric .reproducibility = 3 # type: ignore
222- reproducable_metrics .append (i )
223210
224211 # init all the models
225212 metric .init (run_config )
@@ -354,9 +341,6 @@ def evaluate(
354341 AnswerCorrectness , metrics [answer_correctness_is_set ]
355342 ).answer_similarity = None
356343
357- for i in reproducable_metrics :
358- metrics [i ].reproducibility = 1 # type: ignore
359-
360344 # flush the analytics batcher
361345 from ragas ._analytics import _analytics_batcher
362346
0 commit comments