Skip to content

Commit e650d8b

Browse files
authored
update perf (#264)
1 parent 8a275ff commit e650d8b

File tree

5 files changed

+24
-17
lines changed

5 files changed

+24
-17
lines changed

scripts/benchmark/test_memory_time/run_single.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,24 +37,21 @@ def test_memory_time(train_args: TrainArguments) -> Dict[str, Dict[str, Any]]:
3737
args_kwargs = get_non_default_args(train_args)
3838
print(f'args_kwargs: {args_kwargs}')
3939
for i in range(train_args.run_time):
40-
start_t = time.time()
4140
sft_args = SftArguments(
4241
dataset_test_ratio=0,
4342
dataset=DatasetName.cls_fudan_news_zh,
44-
train_dataset_sample=1000,
43+
train_dataset_sample=-1,
4544
save_strategy='no',
4645
check_dataset_strategy='warning',
4746
truncation_strategy='truncation_left',
4847
seed=get_seed(random_state),
4948
preprocess_num_proc=4,
5049
**args_kwargs)
5150
output = sft_main(sft_args)
52-
t = (time.time() - start_t) / 60 # min
53-
max_memory = torch.cuda.max_memory_reserved() / 1024**2
5451
torch.cuda.empty_cache()
5552
output = {
56-
'time': f'{t}min',
57-
'memory': f'{max_memory}MiB',
53+
'samples/s': f"{output['train_info']['samples/s']:.2f}",
54+
'memory': output['memory'],
5855
'train_args': check_json_format(args_kwargs),
5956
'model_info': output['model_info'],
6057
}

swift/llm/sft.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,12 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
331331
trainer._add_patterns_to_gitignores(['images/'])
332332
trainer.push_to_hub()
333333
return {
334+
'memory': trainer.perf['memory'],
335+
'train_info': {
336+
'time': trainer.perf['train_time'],
337+
'num_samples': len(train_dataset),
338+
'samples/s': len(train_dataset) / trainer.perf['train_time']
339+
},
334340
'last_model_checkpoint': last_model_checkpoint,
335341
'best_model_checkpoint': trainer.state.best_model_checkpoint,
336342
'best_metric': trainer.state.best_metric,

swift/trainers/trainers.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,20 @@ def __init__(self, *args, **kwargs):
4141
self.model.get_trainable_parameters() if hasattr(
4242
self.model, 'get_trainable_parameters') else None,
4343
}
44-
self._iter_perf = 0
4544

4645
def training_step(self, *args, **kwargs) -> torch.Tensor:
4746
train_time = time.time()
4847
training_output = super().training_step(*args, **kwargs)
4948
train_time = time.time() - train_time
5049
self.perf['train_time'] = self.perf['train_time'] + train_time
51-
self._iter_perf += 1
52-
if self._iter_perf > 20 and not self.perf[
53-
'memory'] and torch.cuda.device_count() > 0:
54-
for i in range(torch.cuda.device_count()):
55-
self.perf['memory'][
56-
f'device:{i}'] = f'{torch.cuda.memory_reserved(i)/1024/1024/1024:.2f}GB'
5750
return training_output
5851

52+
def train(self, *args, **kwargs) -> torch.Tensor:
53+
super().train(*args, **kwargs)
54+
for i in range(torch.cuda.device_count()):
55+
self.perf['memory'][
56+
f'cuda:{i}'] = f'{torch.cuda.max_memory_reserved(i)/1024/1024/1024:.2f}GiB'
57+
5958
def prediction_step(
6059
self,
6160
model: nn.Module,

swift/tuners/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ def get_trainable_parameters(self):
485485
f'|| trainable%: {100 * trainable_params / all_param:.4f}' \
486486
'|| cuda memory: ' \
487487
f'{sum([torch.cuda.memory_allocated(i) for i in range(torch.cuda.device_count())])/1024/1024/1024:.2f}' \
488-
'GB.'
488+
'GiB.'
489489

490490

491491
class Swift:

tests/llm/test_run.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,18 @@ def test_loss_matching(self):
9191
best_model_checkpoint = output['best_model_checkpoint']
9292
print(f'best_model_checkpoint: {best_model_checkpoint}')
9393
torch.cuda.empty_cache()
94+
load_dataset_config = str(bool_var or NO_EVAL_HUMAN)
95+
if load_dataset_config:
96+
show_dataset_sample = 2
97+
else:
98+
show_dataset_sample = -1
9499
infer_main([
95100
'--ckpt_dir', best_model_checkpoint, '--show_dataset_sample',
96-
'-1', '--max_new_tokens', '100', '--use_flash_attn', 'true',
97-
'--verbose',
101+
str(show_dataset_sample), '--max_new_tokens', '100',
102+
'--use_flash_attn', 'true', '--verbose',
98103
str(not bool_var), '--merge_lora_and_save',
99104
str(bool_var), '--load_dataset_config',
100-
str(bool_var or NO_EVAL_HUMAN)
105+
str(load_dataset_config)
101106
])
102107
loss = output['log_history'][-1]['train_loss']
103108
losses.append(loss)

0 commit comments

Comments
 (0)