You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
eval_steps: 10# Number of steps between evaluations
111
+
112
+
# device memory optimization args
113
+
use_flash_attention: true # Whether to use fused attention operations
114
+
use_fused_rms_norm: true # Whether to use fused RMS norm operations, which needs to install fused_ln in slm/model_zoo/gpt-3/external_ops
115
+
use_fused_rope: false # Whether to use fused rope operations
116
+
use_fused_head_and_loss_fn: true # Whether to use fused head and loss function
117
+
use_fused_linear: true # Whether to use fused linear operations. 像是一个没有用的参数
118
+
recompute: false # Whether to enable gradient checkpointing for memory optimization
119
+
recompute_use_reentrant: false # Whether to use reentrant recompute
120
+
recompute_granularity: "full"# Granularity of recompute
121
+
bf16: true # Whether to use mixed precision with bfloat16
122
+
fp16_opt_level: "O2"# Optimization level for fp16 and bf16 training
123
+
amp_master_grad: false # Whether to use float32 weight gradients for master weights in amp opt level=’O2’
124
+
amp_custom_black_list: ["reduce_sum", "softmax_with_cross_entropy", "c_softmax_with_cross_entropy", "elementwise_div", "sin", "cos"] # Custom black list for amp
125
+
amp_custom_white_list: ["lookup_table", "lookup_table_v2", "flash_attn", "matmul", "matmul_v2", "fused_gemm_epilogue"] # Custom white list for amp
126
+
offload_level: "freeze_model"# Level of model offloading to pinned memory, supported values: freeze_model, train_model, optimizer
127
+
release_grads: true # Whether to release gradients
128
+
offload_optim: false # Whether to offload optimizer to pinned memory
129
+
130
+
# benchmark args
131
+
skip_profile_timer: false # Whether to skip profiling time
0 commit comments