Skip to content

Commit 46ed5d8

Browse files
[ci] update ci (#6254)
* fix for async io * test for upgrading transformers * add ci machine * fix * fix * fix * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_fp16_torch.py * Update build_on_pr.yml * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fiux * fix * fix * fix --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 7ecdf9a commit 46ed5d8

9 files changed

+20
-11
lines changed

.github/workflows/build_on_pr.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ jobs:
8787
name: Build and Test Colossal-AI
8888
needs: detect
8989
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
90-
runs-on: [self-hosted, gpu]
90+
runs-on: ubuntu-latest
9191
container:
92-
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
93-
options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
92+
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
93+
options: --gpus all --shm-size=2g --rm -v /dev/shm -v /data/scratch:/data/scratch
9494
timeout-minutes: 90
9595
defaults:
9696
run:

tests/test_fp8/test_all_to_all_single.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
from colossalai import launch
77
from colossalai.accelerator import get_accelerator
88
from colossalai.quantization.fp8 import all_to_all_single_fp8
9-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
9+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1010

1111

12+
@clear_cache_before_run()
1213
@parameterize("shape", [(4,), (1, 8, 16), (4, 8, 16)])
1314
@parameterize("dtype", [torch.bfloat16, torch.float16])
1415
@parameterize("async_op", [True, False])
@@ -24,6 +25,7 @@ def check_all2all(shape, dtype, async_op):
2425
assert_close(output, output_fp8, rtol=0.1, atol=0.1)
2526

2627

28+
@clear_cache_before_run()
2729
@parameterize("shape", [(8, 8, 16)])
2830
@parameterize("dtype", [torch.bfloat16, torch.float16])
2931
@parameterize("async_op", [True, False])

tests/test_fp8/test_fp8_all_to_all.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
from colossalai import launch
77
from colossalai.accelerator import get_accelerator
88
from colossalai.quantization.fp8 import _all_to_all_fp8
9-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
9+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1010

1111

12+
@clear_cache_before_run()
1213
@parameterize("shape", [(16, 8, 4)])
1314
@parameterize("scatter_dim", [0, 1, 2])
1415
@parameterize("dtype", [torch.bfloat16, torch.float16])

tests/test_fp8/test_fp8_all_to_all_single.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
from colossalai import launch
77
from colossalai.accelerator import get_accelerator
88
from colossalai.quantization.fp8 import all_to_all_single_fp8
9-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
9+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1010

1111
dist.all_to_all_single
1212

1313

14+
@clear_cache_before_run()
1415
@parameterize("shape", [(4), (8, 7), (4, 8, 16)])
1516
@parameterize("dtype", [torch.bfloat16, torch.float16])
1617
@parameterize("fp8_format", ["e4m3", "e5m2"])

tests/test_fp8/test_fp8_allgather.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
from colossalai import launch
77
from colossalai.accelerator import get_accelerator
88
from colossalai.quantization.fp8 import _all_gather_fp8
9-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
9+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1010

1111

12+
@clear_cache_before_run()
1213
@parameterize(
1314
"shape",
1415
[(3, 7, 16)],

tests/test_fp8/test_fp8_allreduce.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from colossalai import launch
66
from colossalai.accelerator import get_accelerator
77
from colossalai.quantization.fp8 import all_reduce_fp8
8-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
8+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
99

1010

1111
@parameterize(
@@ -20,6 +20,7 @@
2020
(8,),
2121
],
2222
)
23+
@clear_cache_before_run()
2324
@parameterize("dtype", [torch.float16, torch.bfloat16])
2425
@parameterize("fp8_format", ["e4m3", "e5m2"])
2526
@parameterize("async_op", [True, False])

tests/test_fp8/test_fp8_cast.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33

44
from colossalai.accelerator import get_accelerator
55
from colossalai.quantization.fp8 import cast_from_fp8, cast_from_fp8_pipeline, cast_to_fp8, cast_to_fp8_pipeline
6-
from colossalai.testing import parameterize
6+
from colossalai.testing import clear_cache_before_run, parameterize
77

88

9+
@clear_cache_before_run()
910
@parameterize("shape", [(100, 10), (10, 100), (3, 7), (2, 1), (1, 2), (2, 2), (4, 2), (5,), (4,), (2,)])
1011
@parameterize("dtype", [torch.bfloat16, torch.float16, torch.float32])
1112
@parameterize("fp8_format", ["e4m3", "e5m2"])

tests/test_fp8/test_fp8_fsdp_comm_hook.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from torch.testing import assert_close
99

1010
from colossalai import launch
11-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
11+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1212

1313
# example modified from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
1414

@@ -28,6 +28,7 @@ def forward(self, x):
2828
return self.net2(self.relu(self.net1(x)))
2929

3030

31+
@clear_cache_before_run()
3132
@parameterize("mode", ["grad", "params"])
3233
def run_model(mode):
3334
rank = dist.get_rank()

tests/test_fp8/test_fp8_reduce_scatter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
from colossalai import launch
77
from colossalai.accelerator import get_accelerator
88
from colossalai.quantization.fp8 import reduce_scatter_fp8
9-
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
9+
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
1010

1111

12+
@clear_cache_before_run()
1213
@parameterize("shape", [(16, 8, 4)])
1314
@parameterize("scatter_dim", [0, 1, 2])
1415
@parameterize("dtype", [torch.bfloat16, torch.float16])

0 commit comments

Comments
 (0)