Skip to content

Commit f720b6e

Browse files
authored
repop cache (axolotl-ai-cloud#2639)
* repop cache * pre-cache as a step * fix the name * add reason for pytest skipif * restore pytorch matrix * remove max-parallel now that we've optimized this a bit
1 parent a980618 commit f720b6e

File tree

2 files changed

+115
-45
lines changed

2 files changed

+115
-45
lines changed

.github/workflows/tests.yml

Lines changed: 85 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,19 @@ jobs:
4444
env:
4545
SKIP: no-commit-to-branch
4646

47-
pytest:
48-
name: PyTest
47+
preload-cache:
48+
name: Preload HF cache
4949
runs-on: ubuntu-latest
5050
strategy:
5151
fail-fast: false
52-
max-parallel: 2
5352
matrix:
5453
python_version: ["3.11"]
55-
pytorch_version: ["2.5.1", "2.6.0", "2.7.0"]
54+
pytorch_version: ["2.6.0"]
5655
timeout-minutes: 20
5756

57+
env:
58+
AXOLOTL_IS_CI_CACHE_PRELOAD: "1"
59+
5860
steps:
5961
- name: Check out repository code
6062
uses: actions/checkout@v4
@@ -105,9 +107,7 @@ jobs:
105107
106108
- name: Run tests
107109
run: |
108-
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ --cov=axolotl --cov-report=xml
109-
pytest -v tests/patched/ --cov=axolotl --cov-append --cov-report=xml
110-
pytest -v tests/cli/ --cov=axolotl --cov-append --cov-report=xml
110+
pytest -v tests/conftest.py
111111
112112
- name: Upload coverage to Codecov
113113
uses: codecov/codecov-action@v5
@@ -130,12 +130,89 @@ jobs:
130130
/home/runner/.cache/huggingface/hub/models--*
131131
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
132132

133+
pytest:
134+
name: PyTest
135+
runs-on: ubuntu-latest
136+
needs: [preload-cache]
137+
strategy:
138+
fail-fast: false
139+
matrix:
140+
python_version: ["3.11"]
141+
pytorch_version: ["2.5.1", "2.6.0", "2.7.0"]
142+
timeout-minutes: 20
143+
144+
steps:
145+
- name: Check out repository code
146+
uses: actions/checkout@v4
147+
148+
- name: Restore HF cache
149+
id: hf-cache-restore
150+
uses: actions/cache/restore@v4
151+
with:
152+
path: |
153+
/home/runner/.cache/huggingface/hub/datasets--*
154+
/home/runner/.cache/huggingface/hub/models--*
155+
key: ${{ runner.os }}-hf-hub-cache-v2
156+
157+
- name: Setup Python
158+
uses: actions/setup-python@v5
159+
with:
160+
python-version: ${{ matrix.python_version }}
161+
cache: 'pip' # caching pip dependencies
162+
163+
- name: upgrade pip
164+
run: |
165+
pip3 install --upgrade pip
166+
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
167+
168+
- name: Install PyTorch
169+
run: |
170+
pip3 install torch==${{ matrix.pytorch_version }}
171+
172+
- name: Install dependencies
173+
run: |
174+
pip3 show torch
175+
pip3 install --no-build-isolation -U -e .
176+
python scripts/unsloth_install.py | sh
177+
python scripts/cutcrossentropy_install.py | sh
178+
pip3 install -r requirements-dev.txt -r requirements-tests.txt
179+
180+
- name: Make sure PyTorch version wasn't clobbered
181+
run: |
182+
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
183+
184+
- name: Ensure axolotl CLI was installed
185+
run: |
186+
axolotl --help
187+
188+
- name: Pre-Download dataset fixture
189+
run: |
190+
huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
191+
192+
- name: Run tests
193+
run: |
194+
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ --cov=axolotl --cov-report=xml
195+
pytest -v tests/patched/ --cov=axolotl --cov-append --cov-report=xml
196+
pytest -v tests/cli/ --cov=axolotl --cov-append --cov-report=xml
197+
198+
- name: Upload coverage to Codecov
199+
uses: codecov/codecov-action@v5
200+
with:
201+
token: ${{ secrets.CODECOV_TOKEN }}
202+
files: ./coverage.xml
203+
flags: unittests,pytorch-${{ matrix.pytorch_version }}
204+
fail_ci_if_error: false
205+
206+
- name: cleanup pip cache
207+
run: |
208+
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
209+
133210
pytest-sdist:
134211
name: PyTest from Source Dist
135212
runs-on: ubuntu-latest
213+
needs: [preload-cache]
136214
strategy:
137215
fail-fast: false
138-
max-parallel: 1
139216
matrix:
140217
python_version: ["3.11"]
141218
pytorch_version: ["2.5.1", "2.6.0", "2.7.0"]
@@ -199,15 +276,6 @@ jobs:
199276
run: |
200277
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
201278
202-
- name: Save HF cache
203-
id: hf-cache
204-
uses: actions/cache/save@v4
205-
with:
206-
path: |
207-
/home/runner/.cache/huggingface/hub/datasets--*
208-
/home/runner/.cache/huggingface/hub/models--*
209-
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
210-
211279
docker-e2e-tests-1st:
212280
if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
213281
# this job needs to be run on self-hosted GPU runners...

tests/conftest.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import functools
66
import importlib
7+
import os
78
import shutil
89
import sys
910
import tempfile
@@ -529,31 +530,32 @@ def dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff(
529530

530531

531532
# # pylint: disable=redefined-outer-name,unused-argument
532-
# def test_load_fixtures(
533-
# download_smollm2_135m_model,
534-
# download_llama_68m_random_model,
535-
# download_qwen_2_5_half_billion_model,
536-
# download_tatsu_lab_alpaca_dataset,
537-
# download_mhenrichsen_alpaca_2k_dataset,
538-
# download_mhenrichsen_alpaca_2k_w_revision_dataset,
539-
# download_mlabonne_finetome_100k_dataset,
540-
# download_argilla_distilabel_capybara_dpo_7k_binarized_dataset,
541-
# download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset,
542-
# download_fozzie_alpaca_dpo_dataset,
543-
# download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset,
544-
# download_argilla_dpo_pairs_dataset,
545-
# download_tiny_shakespeare_dataset,
546-
# download_deepseek_model_fixture,
547-
# download_huggyllama_model_fixture,
548-
# download_llama_1b_model_fixture,
549-
# download_llama3_8b_model_fixture,
550-
# download_llama3_8b_instruct_model_fixture,
551-
# download_phi_35_mini_model_fixture,
552-
# download_phi_3_medium_model_fixture,
553-
# download_mistral_7b_model_fixture,
554-
# download_gemma_2b_model_fixture,
555-
# download_gemma2_9b_model_fixture,
556-
# download_mlx_mistral_7b_model_fixture,
557-
# download_llama2_model_fixture,
558-
# ):
559-
# pass
533+
@pytest.mark.skipif(
534+
os.environ.get("AXOLOTL_IS_CI_CACHE_PRELOAD", "-1") != "1",
535+
reason="Not running in CI cache preload",
536+
)
537+
def test_load_fixtures(
538+
download_smollm2_135m_model,
539+
download_qwen_2_5_half_billion_model,
540+
download_tatsu_lab_alpaca_dataset,
541+
download_mhenrichsen_alpaca_2k_dataset,
542+
download_mhenrichsen_alpaca_2k_w_revision_dataset,
543+
download_mlabonne_finetome_100k_dataset,
544+
download_argilla_distilabel_capybara_dpo_7k_binarized_dataset,
545+
download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset,
546+
download_argilla_dpo_pairs_dataset,
547+
download_tiny_shakespeare_dataset,
548+
download_deepseek_model_fixture,
549+
download_huggyllama_model_fixture,
550+
download_llama_1b_model_fixture,
551+
download_llama3_8b_model_fixture,
552+
download_llama3_8b_instruct_model_fixture,
553+
download_phi_35_mini_model_fixture,
554+
download_phi_3_medium_model_fixture,
555+
download_mistral_7b_model_fixture,
556+
download_gemma_2b_model_fixture,
557+
download_gemma2_9b_model_fixture,
558+
download_mlx_mistral_7b_model_fixture,
559+
download_llama2_model_fixture,
560+
):
561+
pass

0 commit comments

Comments
 (0)