|
23 | 23 | from paddle.autograd import PyLayer
|
24 | 24 | from paddle.distributed.fleet.utils import recompute
|
25 | 25 |
|
26 |
| -from fastdeploy.model_executor.layers.utils import _set_var_distributed, get_tensor |
| 26 | +from fastdeploy.model_executor.layers.utils import get_tensor |
27 | 27 | from fastdeploy.model_executor.models.ernie4_5_vl.dist_utils import (
|
28 | 28 | RowSequenceParallelLinear,
|
29 | 29 | all_gather_group,
|
@@ -197,19 +197,6 @@ def __init__(
|
197 | 197 | self.after_norm = RMSNorm(out_config)
|
198 | 198 |
|
199 | 199 | if self.tensor_parallel_degree > 1:
|
200 |
| - for idx in [2, 3]: |
201 |
| - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].weight) |
202 |
| - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].bias) |
203 |
| - _set_var_distributed(self.spatial_linear[idx].weight, split_axis=0) |
204 |
| - _set_var_distributed(self.spatial_linear[idx].bias, split_axis=0) |
205 |
| - if self.use_temporal_conv: |
206 |
| - for idx in [0, 2, 3]: |
207 |
| - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].weight) |
208 |
| - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].bias) |
209 |
| - |
210 |
| - mark_as_sequence_parallel_parameter(self.mlp.weight) |
211 |
| - mark_as_sequence_parallel_parameter(self.mlp.bias) |
212 |
| - mark_as_sequence_parallel_parameter(self.after_norm.weight) |
213 | 200 | set_weight_attrs(self.spatial_linear[0].weight, {"output_dim": False})
|
214 | 201 |
|
215 | 202 | def spatial_conv_reshape(self, x, spatial_conv_size):
|
|
0 commit comments