|
1 |
| -# import os |
2 |
| -# import pytest |
3 |
| -# from unittest.mock import patch |
4 |
| - |
5 |
| -# from langchain.evaluation.hallucination.detector import HallucinationDetector |
6 |
| - |
7 |
| -# # ----------------------------- |
8 |
| -# # Integration Tests (Real HF model) |
9 |
| -# # ----------------------------- |
10 |
| -# skip_if_no_hf = pytest.mark.skipif( |
11 |
| -# "HF_TOKEN" not in os.environ, reason="Hugging Face token not available" |
12 |
| -# ) |
13 |
| - |
14 |
| -# @pytest.fixture(scope="module") |
15 |
| -# @skip_if_no_hf |
16 |
| -# @pytest.mark.requires("integration") |
17 |
| -# def detector_real(): |
18 |
| -# # Only runs locally if HF token is available |
19 |
| -# return HallucinationDetector(model_name="facebook/bart-large-mnli") |
20 |
| - |
21 |
| - |
22 |
| -# @skip_if_no_hf |
23 |
| -# @pytest.mark.requires("integration") |
24 |
| -# def test_extract_claims_integration(detector_real): |
25 |
| -# text = "Barack Obama was the 44th President of the United States. He was born in Kenya." |
26 |
| -# claims = detector_real.extract_claims(text) |
27 |
| -# assert isinstance(claims, list) |
28 |
| -# assert len(claims) == 2 |
29 |
| -# assert "Barack Obama was the 44th President of the United States" in claims |
30 |
| - |
31 |
| - |
32 |
| -# @skip_if_no_hf |
33 |
| -# @pytest.mark.requires("integration") |
34 |
| -# def test_compute_hallucination_rate_integration(detector_real): |
35 |
| -# text = "Barack Obama was the 44th President of the United States. He was born in Kenya." |
36 |
| -# evidence = [ |
37 |
| -# "Barack Obama served as the 44th President of the United States from 2009 to 2017.", |
38 |
| -# "Barack Obama was born in Hawaii, not Kenya." |
39 |
| -# ] |
40 |
| -# result = detector_real.compute_hallucination_rate(text, evidence) |
41 |
| -# unsupported = result["unsupported_claims"] |
42 |
| -# total = result["total_claims"] |
43 |
| -# hallucination_rate = result["hallucination_rate"] |
44 |
| - |
45 |
| -# assert "total_claims" in result |
46 |
| -# assert "unsupported_claims" in result |
47 |
| -# assert "hallucination_rate" in result |
48 |
| -# assert result["total_claims"] == 2 |
49 |
| -# assert unsupported in [1, 2] # Accepts both possible outputs |
50 |
| -# assert 0 <= hallucination_rate <= 1 # Just check it’s a valid rate |
51 |
| - |
52 |
| - |
53 |
| -# # ----------------------------- |
54 |
| -# # Unit Tests (Mocked) |
55 |
| -# # ----------------------------- |
56 |
| -# # Unit test fixture |
57 |
| -# @pytest.fixture(scope="module") |
58 |
| -# def detector_mock(): |
59 |
| -# with patch("langchain.evaluation.hallucination.detector.pipeline") as mock_pipeline: |
60 |
| -# # mock NLI results |
61 |
| -# mock_pipeline.return_value = lambda text: [ |
62 |
| -# {"label": "ENTAILMENT", "score": 0.9} if "President" in text else {"label": "CONTRADICTION", "score": 0.9} |
63 |
| -# ] |
64 |
| -# # Now constructor won't load HF model |
65 |
| -# detector = HallucinationDetector(model_name="any") |
66 |
| -# yield detector |
67 |
| - |
68 |
| - |
69 |
| -# def test_extract_claims_mock(detector_mock): |
70 |
| -# text = "Barack Obama was the 44th President of the United States. He was born in Kenya." |
71 |
| -# claims = detector_mock.extract_claims(text) |
72 |
| -# assert isinstance(claims, list) |
73 |
| -# assert len(claims) == 2 |
74 |
| - |
75 |
| - |
76 |
| -# def test_verify_claim_supported_mock(detector_mock): |
77 |
| -# claim = "Barack Obama was the 44th President of the United States" |
78 |
| -# evidence = "Barack Obama served as the 44th President of the United States from 2009 to 2017." |
79 |
| -# result = detector_mock.verify_claim(claim, evidence) |
80 |
| -# assert result is True |
81 |
| - |
82 |
| - |
83 |
| -# def test_verify_claim_unsupported_mock(detector_mock): |
84 |
| -# claim = "Barack Obama was born in Kenya" |
85 |
| -# evidence = "Barack Obama was born in Hawaii, not Kenya." |
86 |
| -# result = detector_mock.verify_claim(claim, evidence) |
87 |
| -# assert result is False |
88 |
| - |
89 |
| - |
90 |
| -# def test_compute_hallucination_rate_mock(detector_mock): |
91 |
| -# text = "Barack Obama was the 44th President of the United States. He was born in Kenya." |
92 |
| -# evidence = [ |
93 |
| -# "Barack Obama served as the 44th President of the United States from 2009 to 2017.", |
94 |
| -# "Barack Obama was born in Hawaii, not Kenya." |
95 |
| -# ] |
96 |
| -# result = detector_mock.compute_hallucination_rate(text, evidence) |
97 |
| -# assert "total_claims" in result |
98 |
| -# assert "unsupported_claims" in result |
99 |
| -# assert "hallucination_rate" in result |
100 |
| - |
101 |
| - |
102 |
| - |
103 | 1 | import os
|
104 | 2 | import pytest
|
105 | 3 | from unittest.mock import patch
|
|
0 commit comments