@@ -61,7 +61,7 @@ Experimental environment: A10, 3090, V100, A100, ...
6161pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
6262git clone https://github.com/modelscope/swift.git
6363cd swift
64- pip install .
64+ pip install -e .
6565# The following script needs to be executed in this directory.
6666cd examples/pytorch/llm
6767
@@ -104,19 +104,74 @@ sft_args = SftArguments(
104104 dataset = [DatasetName.blossom_math_zh],
105105 output_dir = ' output' ,
106106 gradient_checkpointing = True )
107- best_ckpt_dir = sft_main(sft_args)[' best_model_checkpoint' ]
108- print (f ' best_ckpt_dir: { best_ckpt_dir} ' )
107+ result = sft_main(sft_args)
108+ best_model_checkpoint = result[' best_model_checkpoint' ]
109+ print (f ' best_model_checkpoint: { best_model_checkpoint} ' )
109110torch.cuda.empty_cache()
111+
110112infer_args = InferArguments(
111- ckpt_dir = best_ckpt_dir ,
113+ ckpt_dir = best_model_checkpoint ,
112114 load_args_from_ckpt_dir = True ,
113115 stream = True ,
114- val_dataset_sample = 5 )
115- infer_main(infer_args)
116+ show_dataset_sample = 5 )
117+ result = infer_main(infer_args)
118+ print (f ' result: { result} ' )
116119torch.cuda.empty_cache()
120+
117121web_ui_main(infer_args)
118122```
119123
124+ ** Single-Sample Inference** :
125+
126+ Inference using LoRA ** incremental** weights:
127+ ``` python
128+ import os
129+ os.environ[' CUDA_VISIBLE_DEVICES' ] = ' 0'
130+
131+ from swift.llm import (
132+ get_model_tokenizer, get_template, inference, ModelType, get_default_template_type
133+ )
134+ from swift.tuners import Swift
135+ import torch
136+
137+ model_dir = ' vx_xxx/checkpoint-100'
138+ model_type = ModelType.qwen_7b_chat
139+ template_type = get_default_template_type(model_type)
140+
141+ model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16, {' device_map' : ' auto' })
142+
143+ model = Swift.from_pretrained(model, model_dir, inference_mode = True )
144+ template = get_template(template_type, tokenizer)
145+ query = ' xxxxxx'
146+ response, history = inference(model, template, query, verbose = False )
147+ print (f ' response: { response} ' )
148+ print (f ' history: { history} ' )
149+ ```
150+
151+ Inference using LoRA ** merged** complete weights:
152+ ``` python
153+ import os
154+ os.environ[' CUDA_VISIBLE_DEVICES' ] = ' 0'
155+
156+ from swift.llm import (
157+ get_model_tokenizer, get_template, inference, ModelType, get_default_template_type
158+ )
159+ import torch
160+
161+ model_dir = ' vx_xxx/checkpoint-100-merged'
162+ model_type = ModelType.qwen_7b_chat
163+ template_type = get_default_template_type(model_type)
164+
165+ model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16, {' device_map' : ' auto' },
166+ model_dir = model_dir)
167+
168+ template = get_template(template_type, tokenizer)
169+ query = ' xxxxxx'
170+ response, history = inference(model, template, query, verbose = False )
171+ print (f ' response: { response} ' )
172+ print (f ' history: { history} ' )
173+ ```
174+
120175### Run using Swift CLI
121176** SFT** :
122177``` bash
0 commit comments