Skip to content

Commit ab91a06

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent d96db80 commit ab91a06

File tree

3 files changed

+2
-39
lines changed

3 files changed

+2
-39
lines changed

colossalai/shardformer/modeling/deepseek_v3.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
1-
import warnings
2-
from typing import List, Optional, Tuple, Union
3-
41
import numpy as np
52
import torch
63
import torch.distributed as dist
7-
import torch.functional as F
84
from torch.distributed import ProcessGroup
9-
from torch.nn import CrossEntropyLoss
10-
from transformers.cache_utils import Cache, DynamicCache
11-
from transformers.modeling_attn_mask_utils import (
12-
_prepare_4d_causal_attention_mask,
13-
_prepare_4d_causal_attention_mask_for_sdpa,
14-
)
15-
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
16-
from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
17-
from transformers.utils import is_flash_attn_2_available, logging
185

196
from colossalai.lazy import LazyInitContext
207
from colossalai.moe._operation import (
@@ -24,18 +11,8 @@
2411
EPGradScalerOut,
2512
all_to_all_uneven,
2613
)
27-
from colossalai.pipeline.stage_manager import PipelineStageManager
28-
from colossalai.quantization.fp8 import all_reduce_fp8
29-
from colossalai.shardformer.layer._operation import (
30-
all_to_all_comm,
31-
gather_forward_split_backward,
32-
linear_with_async_comm,
33-
split_forward_gather_backward,
34-
)
35-
from colossalai.shardformer.layer.linear import Linear1D_Col, Linear1D_Row, ParallelModule
36-
from colossalai.shardformer.shard import ShardConfig
14+
from colossalai.shardformer.layer.linear import ParallelModule
3715
from colossalai.shardformer.shard.utils import set_tensors_to_none
38-
from colossalai.tensor.d_tensor.api import shard_rowwise, sharded_tensor_to_existing_param
3916
from colossalai.tensor.moe_tensor.api import set_moe_tensor_ep_group
4017

4118

colossalai/shardformer/policies/deepseek_v3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Callable, Dict, List, Union
1+
from typing import Dict, Union
22

33
import torch.nn as nn
44

tests/test_shardformer/test_model/test_shard_deepseek_v3.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,20 @@
1-
import os
2-
import shutil
3-
from copy import deepcopy
41
from typing import Tuple
52

63
import pytest
74
import torch
85
import torch.distributed
96
import torch.distributed as dist
107
from torch.testing import assert_close
11-
from transformers import AutoConfig, AutoModel
128

139
import colossalai
14-
from colossalai.booster.booster import Booster
1510
from colossalai.booster.plugin import MoeHybridParallelPlugin
1611
from colossalai.booster.plugin.moe_hybrid_parallel_plugin import MoeHybridParallelPlugin
17-
from colossalai.shardformer.layer.utils import Randomizer
18-
from colossalai.tensor.moe_tensor.api import is_moe_tensor
1912
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
2013
from colossalai.testing.random import seed_all
2114
from tests.kit.model_zoo import model_zoo
22-
from tests.test_moe.moe_utils import assert_loose_close, check_model_equal
2315
from tests.test_shardformer.test_model._utils import (
2416
build_model_from_hybrid_plugin,
25-
check_all_grad_tensors,
26-
check_loss,
27-
check_output_hidden_state,
28-
check_weight,
29-
get_grad_tensors_for_check,
3017
run_forward_backward_with_hybrid_plugin,
31-
unwrap_model,
3218
)
3319

3420

0 commit comments

Comments
 (0)