Skip to content

Commit 34c18db

Browse files
authored
Merge pull request #49 from fdcp/main
2 parents ed9dc6a + aaa8c88 commit 34c18db

File tree

5 files changed

+8
-5
lines changed

5 files changed

+8
-5
lines changed

examples/llava_ov_1_5/sample_packing/1_s1_get_tokenlens_v3-sft.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
207207
return (None, 0)
208208

209209
def sort_key(line):
210-
_, token_str = line.strip().split(':', 1)
210+
# _, token_str = line.strip().split(':', 1)
211+
token_str = line.strip().split(':')[-1]
211212
return int(token_str)
212213

213214
try:

examples_offline_packing/bmr_packing/s1_get_tokenlens_v4-sft.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
263263

264264
# 定义排序键(按token_len整数排序)
265265
def sort_key(line):
266-
_, token_str = line.strip().split(':', 1)
266+
token_str = line.strip().split(':')[-1]
267267
return int(token_str)
268268

269269
try:

examples_offline_packing/captions-packing/s1_get_tokenlens_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
218218

219219
# 定义排序键(按token_len整数排序)
220220
def sort_key(line):
221-
_, token_str = line.strip().split(':', 1)
221+
token_str = line.strip().split(':')[-1]
222222
return int(token_str)
223223

224224
try:

tools/data_preprocess/offline_packing/s1_get_tokenlens_v2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
311311

312312
# 定义排序键(按token_len整数排序)
313313
def sort_key(line):
314-
_, token_str = line.strip().split(':', 1)
314+
# _, token_str = line.strip().split(':', 1)
315+
token_str = line.strip().split(':')[-1]
315316
return int(token_str)
316317

317318
try:

tools/data_preprocess/offline_packing/s1_get_tokenlens_v3-sft.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ def merge_files_by_token(input_files, output_file, max_token=MAX_TOKEN_LEN):
321321

322322
# 定义排序键(按token_len整数排序)
323323
def sort_key(line):
324-
_, token_str = line.strip().split(':', 1)
324+
# _, token_str = line.strip().split(':', 1)
325+
token_str = line.strip().split(':')[-1]
325326
return int(token_str)
326327

327328
try:

0 commit comments

Comments
 (0)