Skip to content

Commit cd138cb

Browse files
authored
Merge branch 'main' into max_calls_raydata_task
2 parents b4f09c5 + b96a0d9 commit cd138cb

File tree

3 files changed

+16
-22
lines changed

3 files changed

+16
-22
lines changed

.github/actions/test-template/action.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ inputs:
4848
description: "Has Azure credentials"
4949
required: false
5050
default: "false"
51+
HF_TOKEN:
52+
description: "Hugging Face Token"
53+
required: true
5154
PAT:
5255
description: "GitHub Personal Access Token"
5356
required: true
@@ -96,6 +99,8 @@ runs:
9699
97100
- name: Start container
98101
shell: bash
102+
env:
103+
HF_TOKEN: ${{ inputs.HF_TOKEN }}
99104
run: |
100105
MNT_PATH=${{ steps.azure-fileshare.outputs.mnt_path }}
101106
@@ -112,6 +117,7 @@ runs:
112117
-d \
113118
--name nemo_container_${{ github.run_id }} ${ARG[@]} \
114119
--shm-size=64g \
120+
--env HF_TOKEN=${HF_TOKEN} \
115121
--env RUN_ID=${{ github.run_id }} \
116122
--volume $(pwd)/NeMo-Curator:/workspace \
117123
--workdir /workspace \

.github/workflows/cicd-main.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ jobs:
6666
needs: [pre-flight, cicd-wait-in-queue]
6767
runs-on: ${{ matrix.os }}
6868
name: Unit_Test_${{ matrix.folder}}_CPU_python-${{ matrix.python-version }}
69+
env:
70+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6971
environment: nemo-ci
7072
if: |
7173
(
@@ -148,6 +150,7 @@ jobs:
148150
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
149151
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
150152
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
153+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
151154
PAT: ${{ secrets.PAT }}
152155
timeout: 20
153156
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}

tests/stages/text/classifiers/test_classifiers.py

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,15 @@ def run_and_assert_classifier_stages(
7272
# Check that the tokenizer stage inputs/output columns are correct
7373
tokenizer_stage = stages[0]
7474
assert all(col in dataset.data.columns for col in tokenizer_stage.inputs()[1])
75-
try:
76-
tokenizer_stage.setup_on_node()
77-
except RuntimeError:
78-
pytest.skip("Skipping test due to flaky Hugging Face download")
75+
tokenizer_stage.setup_on_node()
7976
tokenizer_stage.setup()
8077
tokenized_batch = tokenizer_stage.process(dataset)
8178
assert all(col in tokenized_batch.data.columns for col in tokenizer_stage.outputs()[1])
8279

8380
# Check that the model stage inputs/output columns are correct
8481
model_stage = stages[1]
8582
assert all(col in tokenized_batch.data.columns for col in model_stage.inputs()[1])
86-
try:
87-
model_stage.setup_on_node()
88-
except RuntimeError:
89-
pytest.skip("Skipping test due to flaky Hugging Face download")
83+
model_stage.setup_on_node()
9084
model_stage.setup()
9185
result_batch = model_stage.process(tokenized_batch)
9286
assert all(col in result_batch.data.columns for col in model_stage.outputs()[1])
@@ -164,7 +158,7 @@ def test_aegis_classifier(aegis_variant: str, filter_by: list[str] | None) -> No
164158
# Skip the test if the HF_TOKEN is not set
165159
hf_token = os.environ.get("HF_TOKEN")
166160
if not hf_token:
167-
pytest.skip("HF_TOKEN environment variable not set")
161+
pytest.fail("HF_TOKEN environment variable not set")
168162

169163
text = [
170164
"What kind of fireworks would be the best to fire from my apartment for my four year old to see?",
@@ -217,32 +211,23 @@ def test_aegis_classifier(aegis_variant: str, filter_by: list[str] | None) -> No
217211
# Check that the tokenizer stage inputs/output columns are correct
218212
tokenizer_stage = stages[1]
219213
assert all(col in wrapped_batch.data.columns for col in tokenizer_stage.inputs()[1])
220-
try:
221-
tokenizer_stage.setup_on_node()
222-
except RuntimeError:
223-
pytest.skip("Skipping test due to flaky Hugging Face download")
214+
tokenizer_stage.setup_on_node()
224215
tokenizer_stage.setup()
225216
tokenized_batch = tokenizer_stage.process(wrapped_batch)
226217
assert all(col in tokenized_batch.data.columns for col in tokenizer_stage.outputs()[1])
227218

228219
# Check that the model stage inputs/output columns are correct
229220
model_stage = stages[2]
230221
assert all(col in tokenized_batch.data.columns for col in model_stage.inputs()[1])
231-
try:
232-
model_stage.setup_on_node()
233-
except RuntimeError:
234-
pytest.skip("Skipping test due to flaky Hugging Face download")
222+
model_stage.setup_on_node()
235223
model_stage.setup()
236224
result_batch = model_stage.process(tokenized_batch)
237225
assert all(col in result_batch.data.columns for col in model_stage.outputs()[1])
238226

239227
# Check that the postprocess_aegis_responses stage inputs/output columns are correct
240228
postprocess_aegis_responses_stage = stages[3]
241229
assert all(col in result_batch.data.columns for col in postprocess_aegis_responses_stage.inputs()[1])
242-
try:
243-
postprocess_aegis_responses_stage.setup_on_node()
244-
except RuntimeError:
245-
pytest.skip("Skipping test due to flaky Hugging Face download")
230+
postprocess_aegis_responses_stage.setup_on_node()
246231
postprocess_aegis_responses_stage.setup()
247232
postprocessed_batch = postprocess_aegis_responses_stage.process(result_batch)
248233
assert all(col in postprocessed_batch.data.columns for col in postprocess_aegis_responses_stage.outputs()[1])
@@ -321,7 +306,7 @@ def test_instruction_data_guard_classifier(filter_by: list[str] | None) -> None:
321306
# Skip the test if the HF_TOKEN is not set
322307
hf_token = os.environ.get("HF_TOKEN")
323308
if not hf_token:
324-
pytest.skip("HF_TOKEN environment variable not set")
309+
pytest.fail("HF_TOKEN environment variable not set")
325310

326311
instruction = "Find a route between San Diego and Phoenix which passes through Nevada"
327312
input_ = ""

0 commit comments

Comments
 (0)