Skip to content

Commit 60e4369

Browse files
authored
[evaluation] ci,tests,fix: Improve reliability of nltk data download in CI (Azure#38059)
* refactor: Move meteor nltk data init to ensure_nltk_data_downloaded * tests: Add a fixture that ensures that nltk data is downloaded Allows us to fail fast if something goes wrong * style: Run isort
1 parent 605f72e commit 60e4369

File tree

15 files changed

+61
-43
lines changed

15 files changed

+61
-43
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,19 @@ def get_harm_severity_level(harm_score: int) -> Union[str, float]:
4848

4949
def ensure_nltk_data_downloaded():
5050
"""Download NLTK data packages if not already downloaded."""
51+
nltk_data = [
52+
("wordnet", "corpora/wordnet.zip"),
53+
("perluniprops", "misc/perluniprops.zip"),
54+
("punkt", "tokenizers/punkt.zip"),
55+
("punkt_tab", "tokenizers/punkt_tab.zip"),
56+
]
57+
5158
with _nltk_data_download_lock:
52-
try:
53-
from nltk.tokenize.nist import NISTTokenizer # pylint: disable=unused-import
54-
except LookupError:
55-
nltk.download("perluniprops")
56-
nltk.download("punkt")
57-
nltk.download("punkt_tab")
59+
for _id, resource_name in nltk_data:
60+
try:
61+
nltk.find(resource_name)
62+
except LookupError:
63+
nltk.download(_id)
5864

5965

6066
def nltk_tokenize(text: str) -> List[str]:

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_meteor/_meteor.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4-
import nltk
54
from nltk.translate.meteor_score import meteor_score
65
from promptflow._utils.async_utils import async_run_allowing_running_loop
76

8-
from azure.ai.evaluation._common.utils import nltk_tokenize
7+
from azure.ai.evaluation._common.utils import nltk_tokenize, ensure_nltk_data_downloaded
98

109

1110
class _AsyncMeteorScoreEvaluator:
@@ -14,10 +13,7 @@ def __init__(self, alpha: float = 0.9, beta: float = 3.0, gamma: float = 0.5):
1413
self._beta = beta
1514
self._gamma = gamma
1615

17-
try:
18-
nltk.find("corpora/wordnet.zip")
19-
except LookupError:
20-
nltk.download("wordnet")
16+
ensure_nltk_data_downloaded()
2117

2218
async def __call__(self, *, ground_truth: str, response: str, **kwargs):
2319
reference_tokens = nltk_tokenize(ground_truth)

sdk/evaluation/azure-ai-evaluation/tests/conftest.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,27 @@
99
from unittest.mock import patch
1010

1111
import pytest
12-
from azure.core.credentials import TokenCredential
12+
from ci_tools.variables import in_ci
1313
from devtools_testutils import add_body_key_sanitizer, add_general_regex_sanitizer, add_header_regex_sanitizer, is_live
1414
from devtools_testutils.config import PROXY_URL
1515
from devtools_testutils.fake_credentials import FakeTokenCredential
1616
from devtools_testutils.helpers import get_recording_id
1717
from devtools_testutils.proxy_testcase import transform_request
18+
from filelock import FileLock
1819
from promptflow.client import PFClient
19-
from azure.ai.evaluation import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
2020
from promptflow.executor._line_execution_process_pool import _process_wrapper
2121
from promptflow.executor._process_manager import create_spawned_fork_process_manager
2222
from pytest_mock import MockerFixture
2323

24+
from azure.ai.evaluation import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
25+
from azure.ai.evaluation._common.utils import ensure_nltk_data_downloaded
26+
from azure.core.credentials import TokenCredential
27+
2428
# Import of optional packages
2529
AZURE_INSTALLED = True
2630
try:
2731
import jwt
32+
2833
from azure.ai.ml._ml_client import MLClient
2934
except ImportError:
3035
AZURE_INSTALLED = False
@@ -42,6 +47,21 @@ class SanitizedValues(str, Enum):
4247
USER_OBJECT_ID = "00000000-0000-0000-0000-000000000000"
4348

4449

50+
@pytest.fixture(scope="session", autouse=True)
51+
def ensure_nltk_data() -> None:
52+
"""Ensures that nltk data has been downloaded."""
53+
54+
def try_download_nltk():
55+
for _ in range(3):
56+
ensure_nltk_data_downloaded()
57+
58+
if in_ci():
59+
with FileLock(Path.home() / "azure_ai_evaluation_nltk_data.txt"):
60+
try_download_nltk()
61+
else:
62+
try_download_nltk()
63+
64+
4565
@pytest.fixture(scope="session", autouse=True)
4666
def add_sanitizers(
4767
test_proxy,

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from ci_tools.variables import in_ci
1111

1212
from azure.ai.evaluation import (
13-
evaluate,
1413
ContentSafetyEvaluator,
1514
F1ScoreEvaluator,
1615
FluencyEvaluator,

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_metrics_upload.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
from devtools_testutils import is_live
99
from promptflow.tracing import _start_trace
1010

11+
from azure.ai.evaluation import F1ScoreEvaluator
1112
from azure.ai.evaluation._evaluate import _utils as ev_utils
1213
from azure.ai.evaluation._evaluate._eval_run import EvalRun
1314
from azure.ai.evaluation._evaluate._evaluate import evaluate
14-
from azure.ai.evaluation import F1ScoreEvaluator
1515

1616

1717
@pytest.fixture

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_sim_and_eval.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
1+
import asyncio
12
import json
23
import os
34
import pathlib
45
import time
5-
from typing import Dict, List, Any
6-
import asyncio
6+
from typing import Any, Dict, List
7+
78
import pandas as pd
89
import pytest
910
import requests
1011
from ci_tools.variables import in_ci
1112
from devtools_testutils import is_live
12-
from azure.identity import DefaultAzureCredential
13-
14-
from azure.ai.evaluation import (
15-
evaluate,
16-
ProtectedMaterialEvaluator,
17-
ViolenceEvaluator,
18-
)
1913

14+
from azure.ai.evaluation import ProtectedMaterialEvaluator, ViolenceEvaluator, evaluate
2015
from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialSimulator
16+
from azure.identity import DefaultAzureCredential
2117

2218

2319
@pytest.fixture

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from promptflow.client import PFClient
66

77
from azure.ai.evaluation._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
8+
from azure.ai.evaluation._evaluate._batch_run import CodeClient, EvalRunContext, ProxyClient
89
from azure.ai.evaluation._user_agent import USER_AGENT
9-
from azure.ai.evaluation._evaluate._batch_run import EvalRunContext, CodeClient, ProxyClient
1010

1111

1212
@pytest.fixture

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_built_in_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import pytest
44

5+
from azure.ai.evaluation import FluencyEvaluator, RetrievalEvaluator, SimilarityEvaluator
56
from azure.ai.evaluation._exceptions import EvaluationException
6-
from azure.ai.evaluation import FluencyEvaluator, SimilarityEvaluator, RetrievalEvaluator
77

88

99
async def quality_async_mock():

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_defect_rate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import pandas as pd
55
import pytest
66

7-
from azure.ai.evaluation._evaluate._evaluate import _aggregate_metrics
87
from azure.ai.evaluation import ContentSafetyEvaluator
8+
from azure.ai.evaluation._evaluate._evaluate import _aggregate_metrics
99

1010

1111
def _get_file(name):

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from unittest.mock import MagicMock, patch
77

88
import pytest
9-
from azure.core.exceptions import HttpResponseError
10-
from azure.core.rest import AsyncHttpResponse, HttpRequest
11-
from azure.identity import DefaultAzureCredential
129

1310
from azure.ai.evaluation._common.constants import EvaluationMetrics, HarmSeverityLevel, RAIService
1411
from azure.ai.evaluation._common.rai_service import (
@@ -21,6 +18,9 @@
2118
parse_response,
2219
submit_request,
2320
)
21+
from azure.core.exceptions import HttpResponseError
22+
from azure.core.rest import AsyncHttpResponse, HttpRequest
23+
from azure.identity import DefaultAzureCredential
2424

2525

2626
@pytest.fixture

0 commit comments

Comments
 (0)