Skip to content

Commit 40417bf

Browse files
authored
[CodeStyle] Remove some code with paddle.fluid (#2633)
* remove fluid
1 parent 08f0633 commit 40417bf

File tree

14 files changed

+28
-37
lines changed

14 files changed

+28
-37
lines changed

examples/language_model/gpt-3/dygraph/modeling.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import paddle.tensor as tensor
2323
from paddle.fluid import layers
2424
from paddle.nn.layer.transformer import _convert_param_attr_to_list
25-
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
2625

2726
from paddlenlp.transformers import PretrainedModel, register_base_model
2827

examples/language_model/gpt-3/static/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def data_gen():
363363
yield tuple(
364364
[np.expand_dims(np.array(x), axis=0) for x in data])
365365

366-
data_loader = paddle.fluid.io.DataLoader.from_generator(
366+
data_loader = paddle.io.DataLoader.from_generator(
367367
feed_list=data_holders, capacity=70, iterable=False)
368368
data_loader.set_sample_generator(data_gen,
369369
batch_size=args.micro_batch_size,

examples/language_model/gpt-3/static/run_pretrain_static.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def dist_optimizer(args, topo):
7777
args.global_batch_size, micro_batch_size)
7878
acc_steps = bsz_per_dp // micro_batch_size
7979

80-
exec_strategy = paddle.fluid.ExecutionStrategy()
80+
exec_strategy = paddle.static.ExecutionStrategy()
8181
exec_strategy.num_threads = 2
8282
exec_strategy.num_iteration_per_drop_scope = 1
8383

@@ -320,8 +320,7 @@ def do_train(args):
320320

321321
clip = None
322322
if args.grad_clip > 0:
323-
clip = paddle.fluid.clip.GradientClipByGlobalNorm(
324-
clip_norm=args.grad_clip)
323+
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=args.grad_clip)
325324

326325
decay_param = [
327326
p.name for n, p in model.named_parameters()

examples/language_model/moe/dygraph/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def construct_samples_and_shuffle_data(name, data_prefix, documents, sizes,
106106
np_rng.set_state(savedState)
107107

108108
if paddle.distributed.get_world_size() > 1:
109-
if paddle.fluid.framework.in_dygraph_mode():
109+
if paddle.in_dynamic_mode():
110110
paddle.distributed.barrier()
111111

112112
# Load mappings.

examples/language_model/moe/dygraph/framework/adamw.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
from paddle.optimizer import Optimizer
1616
from paddle.optimizer.adam import Adam
17-
from paddle.fluid import core
17+
from paddle.framework import core
1818
from paddle.fluid import framework
19-
from paddle.fluid.framework import Variable
19+
from paddle.static import Variable
2020
from paddle.fluid.dygraph import base as imperative_base
2121
from collections.abc import Callable
2222
import paddle

examples/language_model/moe/dygraph/framework/storage_process.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from paddle.fluid import core
15+
from paddle.framework import core
1616
import numpy as np
1717
from collections import OrderedDict
1818

19-
from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
19+
from paddle.framework import in_dygraph_mode, _in_legacy_dygraph
2020

2121
if in_dygraph_mode():
2222
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_storage import ParamStorage, GradStorage

examples/language_model/moe/dygraph/modeling.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121
import paddle.nn.functional as F
2222
import paddle.tensor as tensor
2323
from paddle.fluid import layers
24-
from paddle.fluid.framework import in_dygraph_mode
2524
from paddle.nn.layer.transformer import _convert_param_attr_to_list
26-
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
2725

2826
from paddlenlp.transformers import PretrainedModel, register_base_model
2927

examples/language_model/moe/dygraph/run_moe_pretrain.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@
3535
from utils import get_timers, set_timers
3636
from types import MethodType
3737
from paddle import _C_ops
38-
from paddle.fluid import core
39-
from paddle.fluid.dygraph import to_variable
38+
from paddle.framework import core
4039
import paddle.distributed as dist
4140
from framework import assign_group_by_size, flatten_dense_tensors, obtain_storage, AdamW, group_sharded_parallel
4241
from paddle.incubate.distributed.models import moe
43-
from paddle.fluid.framework import in_dygraph_mode
4442
from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import ShardingScaler
4543
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import GroupShardedScaler
4644

@@ -179,8 +177,8 @@ def unscale_method(self, optimizer):
179177
if (param._grad_ivar() is not None) and (
180178
param._grad_ivar().dtype == core.VarDesc.VarType.FP32)
181179
]
182-
temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool))
183-
temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool))
180+
temp_found_inf_fp16 = paddle.to_tensor(np.array([0]).astype(np.bool))
181+
temp_found_inf_fp32 = paddle.to_tensor(np.array([0]).astype(np.bool))
184182

185183
if len(param_grads_fp16):
186184
_C_ops.check_finite_and_unscale(param_grads_fp16, self._scale,
@@ -443,7 +441,7 @@ def do_train(args):
443441
scaler = fleet.distributed_scaler(scaler)
444442
scaler._unscale = MethodType(unscale_method, scaler)
445443
else:
446-
wrap_scale_func = GroupShardedScaler if in_dygraph_mode(
444+
wrap_scale_func = GroupShardedScaler if paddle.in_dynamic_mode(
447445
) else ShardingScaler
448446
scaler = wrap_scale_func(scaler)
449447

examples/language_model/moe/dygraph/userdefine_optimizer.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616
import sys
1717
import paddle
1818
from paddle.optimizer import Optimizer
19-
from paddle.fluid.clip import ClipGradByGlobalNorm
20-
from paddle.fluid.dygraph import base as imperative_base
21-
from paddle.fluid import framework
22-
from paddle.fluid.framework import Variable
23-
from paddle.fluid import core
19+
from paddle.nn import ClipGradByGlobalNorm
20+
from paddle import framework
21+
from paddle.static import Variable
22+
from paddle.framework import core
2423
from paddle.fluid import layers
2524
from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
2625

@@ -46,7 +45,7 @@ def __init__(self, clip, hcg):
4645
self._clip = clip
4746
self._hcg = hcg
4847

49-
@imperative_base.no_grad
48+
@paddle.no_grad
5049
def _dygraph_clip(self, params_grads):
5150
params_and_grads = []
5251
sum_square_list_dist = []
@@ -141,7 +140,7 @@ def __init__(self, optimizer, hcg):
141140
self._inner_opt._grad_clip = HybridParallelClipGrad(
142141
self._inner_opt._grad_clip, hcg)
143142

144-
@imperative_base.no_grad
143+
@paddle.no_grad
145144
@framework.dygraph_only
146145
def step(self):
147146
parameters_list = _obtain_optimizer_parameters_list(self._inner_opt)

model_zoo/ernie-1.0/run_pretrain_static.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def dist_optimizer(args, topo):
182182
args.global_batch_size, micro_batch_size)
183183
accumulate_steps = bsz_per_dp // micro_batch_size
184184

185-
exec_strategy = paddle.fluid.ExecutionStrategy()
185+
exec_strategy = paddle.static.ExecutionStrategy()
186186
exec_strategy.num_threads = 1
187187
exec_strategy.num_iteration_per_drop_scope = 10000
188188

@@ -466,8 +466,7 @@ def do_train(args):
466466

467467
clip = None
468468
if args.grad_clip > 0:
469-
clip = paddle.fluid.clip.GradientClipByGlobalNorm(
470-
clip_norm=args.grad_clip)
469+
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=args.grad_clip)
471470

472471
decay_param = [
473472
p.name for n, p in model.named_parameters()

0 commit comments

Comments
 (0)