Skip to content

Commit cfc4f54

Browse files
committed
bug fix (examples_offline_packing) : processing samples whose names contain some ':'
1 parent b1b519f commit cfc4f54

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

examples_offline_packing/bmr_packing/s1_get_tokenlens_v4-sft.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
263263

264264
# 定义排序键(按token_len整数排序)
265265
def sort_key(line):
266-
_, token_str = line.strip().split(':', 1)
266+
token_str = line.strip().split(':')[-1]
267267
return int(token_str)
268268

269269
try:

examples_offline_packing/captions-packing/s1_get_tokenlens_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
218218

219219
# 定义排序键(按token_len整数排序)
220220
def sort_key(line):
221-
_, token_str = line.strip().split(':', 1)
221+
token_str = line.strip().split(':')[-1]
222222
return int(token_str)
223223

224224
try:

0 commit comments

Comments
 (0)