Skip to content

Commit ecbbf64

Browse files
committed
[bugfix] fix streaming & packing (#5403)
1 parent 0f72fdd commit ecbbf64

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

requirements/install_all.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pip install autoawq -U --no-deps
77
pip install auto_gptq optimum bitsandbytes "gradio<5.33" -U
88
pip install git+https://github.com/modelscope/ms-swift.git
99
pip install timm -U
10-
pip install "deepspeed<0.17" -U
10+
pip install "deepspeed" -U
1111
pip install qwen_vl_utils qwen_omni_utils decord librosa icecream soundfile -U
1212
pip install liger_kernel nvitop pre-commit math_verify py-spy -U
1313
# flash-attn: https://github.com/Dao-AILab/flash-attention/releases

swift/llm/train/sft.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,7 @@ def _prepare_dataset(self):
127127
if i == 1 and predict_with_generate:
128128
# val_dataset
129129
continue
130-
if args.streaming:
131-
preprocessor = EncodePreprocessor(template=template)
132-
dataset = preprocessor(
133-
dataset,
134-
num_proc=args.dataset_num_proc,
135-
load_from_cache_file=args.load_from_cache_file,
136-
strict=args.strict)
137-
elif (args.model_meta.is_multimodal or args.lazy_tokenize):
130+
if (args.model_meta.is_multimodal or args.lazy_tokenize) and not args.streaming:
138131
dataset = LazyLLMDataset(dataset, template.encode, strict=args.strict, random_state=args.data_seed)
139132
if args.packing:
140133
packing_dataset_cls = IterablePackingDataset if args.streaming else PackingDataset
@@ -144,6 +137,13 @@ def _prepare_dataset(self):
144137
num_proc=args.dataset_num_proc,
145138
strict=args.strict,
146139
load_from_cache_file=args.load_from_cache_file)
140+
elif args.streaming:
141+
preprocessor = EncodePreprocessor(template=template)
142+
dataset = preprocessor(
143+
dataset,
144+
num_proc=args.dataset_num_proc,
145+
load_from_cache_file=args.load_from_cache_file,
146+
strict=args.strict)
147147
datasets[i] = dataset
148148
self._show_dataset(*datasets)
149149
return datasets

swift/megatron/argument/megatron_args.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,9 @@ def __post_init__(self):
378378
self.tensorboard_dir = to_abspath(self.tensorboard_dir)
379379
self.extra_megatron_kwargs = json_parse_to_dict(self.extra_megatron_kwargs)
380380
self._init_no_rope_fusion()
381+
if self.load is None and self.no_initialization:
382+
raise ValueError('You did not pass `--load`, so you need to set `--no_initialization false` '
383+
'to allow the model to initialize weights properly.')
381384

382385
def _init_no_rope_fusion(self):
383386
if self.no_rope_fusion is not None:

0 commit comments

Comments
 (0)