|
23 | 23 | from paddle.autograd import PyLayer
|
24 | 24 | from paddle.distributed.fleet.utils import recompute
|
25 | 25 |
|
26 |
| -from fastdeploy.model_executor.layers.utils import _set_var_distributed, get_tensor |
| 26 | +from fastdeploy.model_executor.layers.utils import get_tensor |
27 | 27 | from fastdeploy.model_executor.models.ernie4_5_vl.dist_utils import (
|
28 | 28 | RowSequenceParallelLinear,
|
29 | 29 | all_gather_group,
|
@@ -207,19 +207,6 @@ def __init__(
|
207 | 207 | self.after_norm = RMSNorm(out_config)
|
208 | 208 |
|
209 | 209 | if self.tensor_parallel_degree > 1:
|
210 |
| - for idx in [2, 3]: |
211 |
| - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].weight) |
212 |
| - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].bias) |
213 |
| - _set_var_distributed(self.spatial_linear[idx].weight, split_axis=0) |
214 |
| - _set_var_distributed(self.spatial_linear[idx].bias, split_axis=0) |
215 |
| - if self.use_temporal_conv: |
216 |
| - for idx in [0, 2, 3]: |
217 |
| - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].weight) |
218 |
| - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].bias) |
219 |
| - |
220 |
| - mark_as_sequence_parallel_parameter(self.mlp.weight) |
221 |
| - mark_as_sequence_parallel_parameter(self.mlp.bias) |
222 |
| - mark_as_sequence_parallel_parameter(self.after_norm.weight) |
223 | 210 | set_weight_attrs(self.spatial_linear[0].weight, {"output_dim": False})
|
224 | 211 |
|
225 | 212 | def spatial_conv_reshape(self, x, spatial_conv_size):
|
|
0 commit comments