|
1 | 1 | """Modal app to run axolotl GPU tests"""
|
2 | 2 |
|
3 |
| -# pylint: disable=duplicate-code |
4 |
| - |
5 |
| -import os |
6 |
| -import pathlib |
7 |
| -import tempfile |
8 |
| - |
9 |
| -import jinja2 |
10 |
| -import modal |
11 |
| -from jinja2 import select_autoescape |
12 |
| -from modal import App, Image |
13 |
| - |
14 |
| -cicd_path = pathlib.Path(__file__).parent.resolve() |
15 |
| - |
16 |
| -template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) |
17 |
| -template_env = jinja2.Environment( |
18 |
| - loader=template_loader, autoescape=select_autoescape() |
19 |
| -) |
20 |
| -df_template = template_env.get_template("Dockerfile.jinja") |
21 |
| - |
22 |
| -df_args = { |
23 |
| - "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), |
24 |
| - "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), |
25 |
| - "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.4.1"), |
26 |
| - "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.4.1"), |
27 |
| - "CUDA": os.environ.get("CUDA", "121"), |
28 |
| - "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), |
29 |
| - "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), |
30 |
| - "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""), |
31 |
| - "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""), |
32 |
| - "HF_HOME": "/workspace/data/huggingface-cache/hub", |
33 |
| -} |
34 |
| - |
35 |
| -dockerfile_contents = df_template.render(**df_args) |
36 |
| - |
37 |
| -temp_dir = tempfile.mkdtemp() |
38 |
| -with open(pathlib.Path(temp_dir) / "Dockerfile", "w", encoding="utf-8") as f: |
39 |
| - f.write(dockerfile_contents) |
40 |
| - |
41 |
| -cicd_image = Image.from_dockerfile( |
42 |
| - pathlib.Path(temp_dir) / "Dockerfile", |
43 |
| - context_mount=None, |
44 |
| - force_build=True, |
45 |
| - gpu="A10G", |
46 |
| -).env(df_args) |
47 |
| - |
48 |
| -app = App("Axolotl CI/CD", secrets=[]) |
49 |
| - |
50 |
| -hf_cache_volume = modal.Volume.from_name( |
51 |
| - "axolotl-ci-hf-hub-cache", create_if_missing=True |
52 |
| -) |
53 |
| -VOLUME_CONFIG = { |
54 |
| - "/workspace/data/huggingface-cache/hub": hf_cache_volume, |
55 |
| -} |
56 |
| - |
57 |
| -N_GPUS = int(os.environ.get("N_GPUS", 1)) |
58 |
| -GPU_CONFIG = modal.gpu.L40S(count=N_GPUS) |
59 |
| - |
60 |
| - |
61 |
| -def run_cmd(cmd: str, run_folder: str): |
62 |
| - import subprocess # nosec |
63 |
| - |
64 |
| - # Propagate errors from subprocess. |
65 |
| - if exit_code := subprocess.call(cmd.split(), cwd=run_folder): # nosec |
66 |
| - exit(exit_code) # pylint: disable=consider-using-sys-exit |
| 3 | +from .single_gpu import GPU_CONFIG, VOLUME_CONFIG, app, cicd_image, run_cmd |
67 | 4 |
|
68 | 5 |
|
69 | 6 | @app.function(
|
|
0 commit comments