Skip to content

Commit b310142

Browse files
authored
rename loss_subbatch_seqlen to loss_subbatch_sequence_length (#2581)
1 parent d8765f2 commit b310142

File tree

6 files changed

+18
-18
lines changed

6 files changed

+18
-18
lines changed

paddleformers/nn/criterion/dpo_loss.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def dpo_logps(
8383
# bsz,seq_len,hidden_size or seq_len,hidden_size
8484
seq_len = labels.shape[1] if labels.ndim == 2 else labels.shape[0]
8585

86-
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
86+
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
8787
per_token_logps = -fused_head_and_loss_fn(
8888
hidden_states,
8989
weight,
@@ -95,7 +95,7 @@ def dpo_logps(
9595
self.config.tensor_parallel_degree,
9696
self.config.tensor_parallel_output,
9797
False, # fused_linear
98-
self.loss_subbatch_seqlen,
98+
self.loss_subbatch_sequence_length,
9999
return_token_loss=True,
100100
ignore_index=0,
101101
)
@@ -120,12 +120,12 @@ def dpo_logps(
120120
logits = logits.unsqueeze(0)
121121
elif logits.dim() == 3 and labels.dim() == 1:
122122
labels = labels.unsqueeze(0)
123-
if self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
123+
if self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
124124
sb_loss_func = subbatch(
125125
self.loss_func,
126126
[0, 1],
127127
[1, 1],
128-
self.loss_subbatch_seqlen,
128+
self.loss_subbatch_sequence_length,
129129
1,
130130
)
131131

paddleformers/nn/criterion/interface.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ def __init__(self, config, return_tuple=True, ignore_eos_token=False, use_infohu
4545
self.kto_config = copy.deepcopy(config.get("kto_config", None))
4646
self.ignored_index = getattr(config, "ignored_index", -100)
4747
self.use_filtered_label_loss = config.get("use_filtered_label_loss", False)
48-
self.loss_subbatch_seqlen = config.get("loss_subbatch_seqlen", -1)
49-
self.use_subbatch = self.loss_subbatch_seqlen > 0
48+
self.loss_subbatch_sequence_length = config.get("loss_subbatch_sequence_length", -1)
49+
self.use_subbatch = self.loss_subbatch_sequence_length > 0
5050
self.sequence_parallel = config.get("sequence_parallel", False)
5151
self.tensor_parallel = config.tensor_parallel_degree > 1
5252
self.use_fused_head_and_loss_fn = config.get("use_fused_head_and_loss_fn", False)
5353
self.enable_parallel_cross_entropy = config.tensor_parallel_degree > 1 and config.tensor_parallel_output
5454
logger.info(
55-
f"loss_subbatch_seqlen: {self.loss_subbatch_seqlen} , use_fused_head_and_loss_fn: {self.use_fused_head_and_loss_fn}, use_filtered_label_loss: {self.use_filtered_label_loss}"
55+
f"loss_subbatch_sequence_length: {self.loss_subbatch_sequence_length} , use_fused_head_and_loss_fn: {self.use_fused_head_and_loss_fn}, use_filtered_label_loss: {self.use_filtered_label_loss}"
5656
)
5757

5858
self.return_tuple = return_tuple

paddleformers/nn/criterion/kto_loss.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def kto_logps(
9696

9797
# bsz,seq_len,hidden_size or seq_len,hidden_size
9898
seq_len = labels.shape[1] if labels.ndim == 2 else labels.shape[0]
99-
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
99+
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
100100
per_token_logps = -fused_head_and_loss_fn(
101101
hidden_states,
102102
weight,
@@ -108,7 +108,7 @@ def kto_logps(
108108
self.config.tensor_parallel_degree,
109109
self.config.tensor_parallel_output,
110110
self.config.fused_linear,
111-
self.loss_subbatch_seqlen,
111+
self.loss_subbatch_sequence_length,
112112
return_token_loss=True,
113113
ignore_index=self.ignored_index,
114114
)
@@ -133,12 +133,12 @@ def kto_logps(
133133
elif logits.dim() == 3 and labels.dim() == 1:
134134
labels = labels.unsqueeze(0)
135135

136-
if self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
136+
if self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
137137
sb_loss_func = subbatch(
138138
self.loss_func,
139139
[0, 1],
140140
[1, 1],
141-
self.loss_subbatch_seqlen,
141+
self.loss_subbatch_sequence_length,
142142
1,
143143
)
144144
per_token_logps = sb_loss_func(logits, labels.unsqueeze(-1))

paddleformers/nn/criterion/sft_loss.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def sft_loss_forward(
8686
masked_lm_labels = labels
8787
# bsz,seq_len,hidden_size or seq_len,hidden_size
8888
seq_len = masked_lm_labels.shape[1] if masked_lm_labels.ndim == 2 else masked_lm_labels.shape[0]
89-
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
89+
if self.use_fused_head_and_loss_fn and self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
9090
masked_lm_loss = fused_head_and_loss_fn(
9191
hidden_states,
9292
lm_head_weight,
@@ -98,7 +98,7 @@ def sft_loss_forward(
9898
self.config.tensor_parallel_degree,
9999
self.config.tensor_parallel_output,
100100
False,
101-
self.loss_subbatch_seqlen,
101+
self.loss_subbatch_sequence_length,
102102
return_token_loss=True,
103103
ignore_index=self.ignored_index,
104104
)
@@ -131,12 +131,12 @@ def sft_loss_forward(
131131

132132
# logits: bsz seq_len
133133
# labels: bsz seq_len vocab_size
134-
if self.use_subbatch and seq_len > self.loss_subbatch_seqlen:
134+
if self.use_subbatch and seq_len > self.loss_subbatch_sequence_length:
135135
sb_loss_func = subbatch(
136136
self.loss_func,
137137
[0, 1],
138138
[1, 1],
139-
self.loss_subbatch_seqlen,
139+
self.loss_subbatch_sequence_length,
140140
1,
141141
)
142142
masked_lm_loss = sb_loss_func(logits, labels.unsqueeze(-1))

paddleformers/transformers/configuration_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,10 @@ class LlmMetaConfig:
287287
("use_fused_head_loss_fn", bool, False, "Whether to use fused head and loss function."),
288288
("use_filtered_label_loss", bool, False, "Whether to use filtered label loss."),
289289
(
290-
"loss_subbatch_seqlen",
290+
"loss_subbatch_sequence_length",
291291
int,
292292
-1,
293-
"Sequence length larger than loss_subbatch_seqlen will be divided into multiple subbatches during loss computation (-1 means disable subbatch).",
293+
"Sequence length larger than loss_subbatch_sequence_length will be divided into multiple subbatches during loss computation (-1 means disable subbatch).",
294294
),
295295
]
296296

tests/nn/test_criterion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_forward_default_sft(self):
3737

3838
def test_forward_non_fuse_subbatch_sft(self):
3939
config = copy.deepcopy(self.config)
40-
config.loss_subbatch_seqlen = 2
40+
config.loss_subbatch_sequence_length = 2
4141
config.use_fused_head_and_loss_fn = False
4242
layer = CriterionLayer(config=config)
4343
layer(self.logits, self.labels)

0 commit comments

Comments
 (0)