Skip to content

Commit 314910e

Browse files
authored
bugfix: ernie dataset tests (#2632)
1 parent 570a004 commit 314910e

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

tests/dataset/test_ernie_datasets.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def test_random_dataset_len(self):
4141
"greedy_intokens": True,
4242
"packing": False,
4343
"mix_strategy": "random",
44+
"encode_one_turn": True,
4445
}
4546

4647
train_dataset = create_dataset_sft(
@@ -69,6 +70,7 @@ def test_concat_dataset_len(self):
6970
"greedy_intokens": True,
7071
"packing": False,
7172
"mix_strategy": "concat",
73+
"encode_one_turn": True,
7274
}
7375

7476
train_dataset = create_dataset_sft(
@@ -97,6 +99,7 @@ def test_interleave_under_dataset_len(self):
9799
"greedy_intokens": True,
98100
"packing": False,
99101
"mix_strategy": "interleave_under",
102+
"encode_one_turn": True,
100103
}
101104

102105
train_dataset = create_dataset_sft(
@@ -125,6 +128,7 @@ def test_interleave_over_dataset_len(self):
125128
"greedy_intokens": True,
126129
"packing": False,
127130
"mix_strategy": "interleave_over",
131+
"encode_one_turn": True,
128132
}
129133

130134
train_dataset = create_dataset_sft(
@@ -162,6 +166,7 @@ def test_random_dataset_len(self):
162166
"mask_out_eos_token": True,
163167
"packing": False,
164168
"mix_strategy": "random",
169+
"encode_one_turn": True,
165170
}
166171

167172
train_dataset = create_dataset_dpo(
@@ -197,6 +202,7 @@ def test_concat_dataset_len(self):
197202
"mask_out_eos_token": True,
198203
"packing": False,
199204
"mix_strategy": "concat",
205+
"encode_one_turn": True,
200206
}
201207

202208
train_dataset = create_dataset_dpo(
@@ -232,6 +238,7 @@ def test_interleave_under_dataset_len(self):
232238
"mask_out_eos_token": True,
233239
"packing": False,
234240
"mix_strategy": "interleave_under",
241+
"encode_one_turn": True,
235242
}
236243

237244
train_dataset = create_dataset_dpo(
@@ -267,6 +274,7 @@ def test_interleave_over_dataset_len(self):
267274
"mask_out_eos_token": True,
268275
"packing": False,
269276
"mix_strategy": "interleave_over",
277+
"encode_one_turn": True,
270278
}
271279

272280
train_dataset = create_dataset_dpo(

0 commit comments

Comments
 (0)