|
| 1 | +import os |
| 2 | +import random |
1 | 3 | from functools import partial |
2 | 4 |
|
| 5 | +import numpy as np |
3 | 6 | import pytest |
4 | 7 | import torch |
5 | 8 | import torch.multiprocessing as mp |
6 | 9 | from torch.nn.parallel import DistributedDataParallel as DDP |
7 | | -from utils.util import set_seed, tensor_equal, tensor_shard_equal |
8 | 10 | from vit import get_training_components |
9 | 11 |
|
10 | 12 | import colossalai |
| 13 | +from colossalai.context import ParallelMode |
11 | 14 | from colossalai.context.parallel_mode import ParallelMode |
12 | 15 | from colossalai.core import global_context as gpc |
13 | 16 | from colossalai.nn.parallel.data_parallel import ColoDDP |
14 | | -from colossalai.tensor import ColoParameter, ComputePattern, ComputeSpec, DistSpecManager, ProcessGroup, ShardSpec |
| 17 | +from colossalai.tensor import ComputePattern, ComputeSpec, DistSpecManager, ProcessGroup, ShardSpec |
15 | 18 | from colossalai.testing import rerun_if_address_is_in_use |
16 | 19 | from colossalai.utils import free_port |
17 | 20 | from colossalai.utils.cuda import get_current_device |
18 | 21 | from colossalai.utils.model.colo_init_context import ColoInitContext |
19 | 22 |
|
20 | 23 |
|
| 24 | +def set_seed(seed): |
| 25 | + random.seed(seed) |
| 26 | + os.environ['PYTHONHASHSEED'] = str(seed) |
| 27 | + np.random.seed(seed) |
| 28 | + torch.manual_seed(seed) |
| 29 | + torch.cuda.manual_seed(seed) |
| 30 | + torch.backends.cudnn.deterministic = True |
| 31 | + |
| 32 | + |
| 33 | +def tensor_equal(A, B): |
| 34 | + return torch.allclose(A, B, rtol=1e-3, atol=1e-1) |
| 35 | + |
| 36 | + |
| 37 | +def tensor_shard_equal(tensor: torch.Tensor, shard: torch.Tensor): |
| 38 | + assert tensor.ndim == shard.ndim |
| 39 | + if tensor.shape == shard.shape: |
| 40 | + return tensor_equal(tensor, shard) |
| 41 | + else: |
| 42 | + dims_not_eq = torch.nonzero(torch.tensor(tensor.shape) != torch.tensor(shard.shape)) |
| 43 | + if dims_not_eq.numel() == 1: |
| 44 | + # 1D shard |
| 45 | + dim = dims_not_eq.item() |
| 46 | + world_size = gpc.get_world_size(ParallelMode.PARALLEL_1D) |
| 47 | + rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D) |
| 48 | + return tensor_equal(tensor.chunk(world_size, dim)[rank], shard) |
| 49 | + else: |
| 50 | + raise |
| 51 | + |
| 52 | + |
21 | 53 | # Only for all Linear, it's 1d_row split because Linear will be transposed when calculating. |
22 | 54 | # But for other layers, it's 1d_col split. |
23 | 55 | # Layernorm is not supported for now. |
|
0 commit comments