Skip to content

Commit 6c1f449

Browse files
authored
[BugFix] Try except sequence parallel utils (#8189) (#8274)
* try except sp * fix sp import
1 parent 4749af3 commit 6c1f449

File tree

9 files changed

+84
-52
lines changed

9 files changed

+84
-52
lines changed

model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,16 @@
4848
MinLengthLogitsProcessor,
4949
RepetitionPenaltyLogitsProcessor,
5050
)
51-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
52-
ColumnSequenceParallelLinear,
53-
GatherOp,
54-
RowSequenceParallelLinear,
55-
ScatterOp,
56-
mark_as_sequence_parallel_parameter,
57-
)
51+
try:
52+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
53+
ColumnSequenceParallelLinear,
54+
GatherOp,
55+
RowSequenceParallelLinear,
56+
ScatterOp,
57+
mark_as_sequence_parallel_parameter,
58+
)
59+
except:
60+
pass
5861

5962
from paddlenlp.transformers.segment_parallel_utils import ReshardLayer
6063

model_zoo/gpt-3/ppfleetx/models/language_model/language_module.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@
2424
from ppfleetx.core.module.basic_module import BasicModule
2525
from ppfleetx.data.tokenizers import GPTTokenizer
2626
from ppfleetx.distributed.apis import env
27-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
28-
register_sequence_parallel_allreduce_hooks,
29-
)
27+
try:
28+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
29+
register_sequence_parallel_allreduce_hooks,
30+
)
31+
except:
32+
pass
3033
from ppfleetx.utils.log import logger
3134

3235
# TODO(haohongxiang): to solve the problem of cross-reference

paddlenlp/transformers/__init__.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,20 @@
2929
from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin
3030
from .image_processing_utils import ImageProcessingMixin
3131
from .attention_utils import create_bigbird_rand_mask_idx_list
32-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
33-
GatherOp,
34-
ScatterOp,
35-
AllGatherOp,
36-
ReduceScatterOp,
37-
ColumnSequenceParallelLinear,
38-
RowSequenceParallelLinear,
39-
mark_as_sequence_parallel_parameter,
40-
register_sequence_parallel_allreduce_hooks,
41-
)
32+
33+
try:
34+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
35+
GatherOp,
36+
ScatterOp,
37+
AllGatherOp,
38+
ReduceScatterOp,
39+
ColumnSequenceParallelLinear,
40+
RowSequenceParallelLinear,
41+
mark_as_sequence_parallel_parameter,
42+
register_sequence_parallel_allreduce_hooks,
43+
)
44+
except:
45+
pass
4246
from .export import export_model
4347

4448
# isort: split

paddlenlp/transformers/gpt/modeling.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,17 @@
2929
from paddle.distributed import fleet
3030
from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
3131
from paddle.distributed.fleet.utils import recompute
32-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
33-
ColumnSequenceParallelLinear,
34-
GatherOp,
35-
RowSequenceParallelLinear,
36-
ScatterOp,
37-
mark_as_sequence_parallel_parameter,
38-
)
32+
33+
try:
34+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
35+
ColumnSequenceParallelLinear,
36+
GatherOp,
37+
RowSequenceParallelLinear,
38+
ScatterOp,
39+
mark_as_sequence_parallel_parameter,
40+
)
41+
except:
42+
pass
3943
from paddle.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
4044
from paddle.utils import try_import
4145

paddlenlp/transformers/gpt/modeling_auto.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,14 @@
3030
from paddle.distributed import fleet
3131
from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
3232
from paddle.distributed.fleet.utils import recompute
33-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
34-
ScatterOp,
35-
mark_as_sequence_parallel_parameter,
36-
)
33+
34+
try:
35+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
36+
ScatterOp,
37+
mark_as_sequence_parallel_parameter,
38+
)
39+
except:
40+
pass
3741

3842
from ...utils.converter import StateDictNameMapping
3943
from .. import PretrainedModel, register_base_model

paddlenlp/transformers/gpt/modeling_pp.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919
SharedLayerDesc,
2020
)
2121
from paddle.distributed.fleet.utils import recompute
22-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
23-
mark_as_sequence_parallel_parameter,
24-
)
22+
23+
try:
24+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
25+
mark_as_sequence_parallel_parameter,
26+
)
27+
except:
28+
pass
2529

2630
from paddlenlp.transformers.model_utils import PipelinePretrainedModel
2731

paddlenlp/transformers/llama/modeling.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,16 @@ def swiglu(x, y=None):
4545
return F.silu(x) * y
4646

4747

48-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
49-
ColumnSequenceParallelLinear,
50-
GatherOp,
51-
RowSequenceParallelLinear,
52-
ScatterOp,
53-
mark_as_sequence_parallel_parameter,
54-
)
48+
try:
49+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
50+
ColumnSequenceParallelLinear,
51+
GatherOp,
52+
RowSequenceParallelLinear,
53+
ScatterOp,
54+
mark_as_sequence_parallel_parameter,
55+
)
56+
except:
57+
pass
5558
from paddle.utils import try_import
5659

5760
from paddlenlp.transformers.conversion_utils import (

paddlenlp/transformers/mc2_seqence_parallel_linear.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,14 @@
2323

2424
from paddle import distributed as dist
2525
from paddle.autograd import PyLayer
26-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
27-
ColumnSequenceParallelLinear,
28-
RowSequenceParallelLinear,
29-
)
26+
27+
try:
28+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
29+
ColumnSequenceParallelLinear,
30+
RowSequenceParallelLinear,
31+
)
32+
except:
33+
pass
3034

3135
__all_gather_recomputation__ = False
3236
if int(os.getenv("MC2_Recompute", 0)):

paddlenlp/transformers/mixtral/modeling.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,16 @@
3333
except ImportError:
3434
fused_rotary_position_embedding = None
3535

36-
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
37-
ColumnSequenceParallelLinear,
38-
GatherOp,
39-
RowSequenceParallelLinear,
40-
ScatterOp,
41-
mark_as_sequence_parallel_parameter,
42-
)
36+
try:
37+
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
38+
ColumnSequenceParallelLinear,
39+
GatherOp,
40+
RowSequenceParallelLinear,
41+
ScatterOp,
42+
mark_as_sequence_parallel_parameter,
43+
)
44+
except:
45+
pass
4346

4447
from paddlenlp.transformers.conversion_utils import (
4548
StateDictNameMapping,

0 commit comments

Comments
 (0)