Skip to content

Commit 4dac876

Browse files
committed
Merge branch 'main' into release/3.0
2 parents 11d4180 + 263fc1c commit 4dac876

File tree

9 files changed

+62
-20
lines changed

9 files changed

+62
-20
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@
344344
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
345345
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
346346
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
347+
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
348+
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
347349
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
348350
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
349351
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ The table below introduces the models integrated with ms-swift:
344344
|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat)|deepseek_v2|deepseek|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
345345
|[deepseek-ai/DeepSeek-V2.5](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)|
346346
|[deepseek-ai/DeepSeek-V2.5-1210](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2.5-1210)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2.5-1210](https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210)|
347+
|[deepseek-ai/DeepSeek-V3-Base](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-Base)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3-Base](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base)|
348+
|[deepseek-ai/DeepSeek-V3](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)|
347349
|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama-65b-v8-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama-65b-v8-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama-65b-v8-bf16)|
348350
|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-13b-v8.1-fp16](https://huggingface.co/OpenBuddy/openbuddy-llama2-13b-v8.1-fp16)|
349351
|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://modelscope.cn/models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|openbuddy_llama|openbuddy|-|-|[OpenBuddy/openbuddy-llama2-70b-v10.1-bf16](https://huggingface.co/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16)|

swift/llm/infer/infer_engine/infer_engine.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,4 @@ def func(target, queue, args, kwargs):
244244

245245
@staticmethod
246246
def safe_asyncio_run(coro):
247-
try:
248-
loop = asyncio.get_running_loop()
249-
except RuntimeError:
250-
loop = None
251-
if loop:
252-
result = InferEngine.thread_run(asyncio.run, args=(coro, ))
253-
else:
254-
result = asyncio.run(coro)
255-
return result
247+
return InferEngine.thread_run(asyncio.run, args=(coro, ))

swift/llm/model/model/deepseek.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ def _dtype_hook(module, input, output):
108108
ModelGroup([
109109
Model('deepseek-ai/DeepSeek-V2.5', 'deepseek-ai/DeepSeek-V2.5'),
110110
Model('deepseek-ai/DeepSeek-V2.5-1210', 'deepseek-ai/DeepSeek-V2.5-1210'),
111+
Model('deepseek-ai/DeepSeek-V3-Base', 'deepseek-ai/DeepSeek-V3-Base'),
112+
Model('deepseek-ai/DeepSeek-V3', 'deepseek-ai/DeepSeek-V3'),
111113
]),
112114
],
113115
TemplateType.deepseek_v2_5,

swift/llm/template/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
610610
if (self.template_meta.template_type == 'dummy' and self.use_chat_template and not self.is_training
611611
and self.mode != 'seq_cls'):
612612
template_backend = 'jinja'
613+
logger.info_once(f'Setting template_backend: {template_backend}')
613614
res_context_list, loss_scale_list, answer_len = (
614615
self._swift_encode(inputs) if template_backend == 'swift' else self._jinja_encode(inputs))
615616
encoded = {}

swift/ui/app.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright (c) Alibaba, Inc. and its affiliates.
22
import os
3+
from copy import copy
34
from dataclasses import fields
45
from functools import partial
56
from typing import List, Union
@@ -72,6 +73,7 @@ def run(self):
7273
for f in fields(self.args):
7374
if getattr(self.args, f.name):
7475
LLMInfer.default_dict[f.name] = getattr(self.args, f.name)
76+
7577
LLMInfer.is_gradio_app = True
7678
LLMInfer.is_multimodal = self.args.model_meta.is_multimodal
7779
LLMInfer.build_ui(LLMInfer)
@@ -93,10 +95,20 @@ def run(self):
9395
value = getattr(self.args, f.name)
9496
if isinstance(value, list):
9597
value = ' '.join([v or '' for v in value])
96-
LLMInfer.elements()[f.name].value = value
97-
app.load(LLMInfer.deploy_model, list(LLMInfer.valid_elements().values()),
98-
[LLMInfer.element('runtime_tab'),
99-
LLMInfer.element('running_tasks')])
98+
LLMInfer.elements()[f.name].value = str(value)
99+
100+
args = copy(self.args)
101+
args.port = find_free_port()
102+
103+
values = []
104+
for key in LLMInfer.valid_elements():
105+
if key in args.__dict__:
106+
value = getattr(args, key)
107+
else:
108+
value = LLMInfer.element(key).value
109+
values.append(value)
110+
_, running_task = LLMInfer.deploy_model(*values)
111+
LLMInfer.element('running_tasks').value = running_task['value']
100112
else:
101113
app.load(
102114
partial(LLMTrain.update_input_model, arg_cls=RLHFArguments),

swift/ui/base.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
import sys
55
import time
66
import typing
7+
from collections import OrderedDict
78
from dataclasses import fields
89
from datetime import datetime
910
from functools import wraps
10-
from typing import Any, Dict, List, OrderedDict, Type
11+
from typing import Any, Dict, List, Type
1112

1213
import gradio as gr
1314
import json
@@ -220,12 +221,12 @@ def elements(cls):
220221

221222
@classmethod
222223
def valid_elements(cls):
224+
valid_elements = OrderedDict()
223225
elements = cls.elements()
224-
return {
225-
key: value
226-
for key, value in elements.items()
227-
if isinstance(value, (Textbox, Dropdown, Slider, Checkbox)) and key != 'train_record'
228-
}
226+
for key, value in elements.items():
227+
if isinstance(value, (Textbox, Dropdown, Slider, Checkbox)) and key != 'train_record':
228+
valid_elements[key] = value
229+
return valid_elements
229230

230231
@classmethod
231232
def element_keys(cls):

swift/ui/llm_infer/llm_infer.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Copyright (c) Alibaba, Inc. and its affiliates.
2+
import atexit
23
import os
34
import re
5+
import signal
46
import sys
57
import time
68
from copy import deepcopy
@@ -298,13 +300,37 @@ def deploy_model(cls, *args):
298300
cnt += 1
299301
if cnt >= 60:
300302
logger.warning_once(f'Deploy costing too much time, please check log file: {log_file}')
303+
if cls.is_gradio_app:
304+
cls.register_clean_hook()
301305
logger.info('Deploy done.')
302306
cls.deployed = True
303307
running_task = Runtime.refresh_tasks(log_file)
304308
if cls.is_gradio_app:
305309
cls.running_task = running_task['value']
306310
return gr.update(open=True), running_task
307311

312+
@classmethod
313+
def clean_deployment(cls):
314+
if not cls.is_gradio_app:
315+
return
316+
317+
logger.info('Killing deployment')
318+
_, args = Runtime.parse_info_from_cmdline(cls.running_task)
319+
os.system(f'pkill -9 -f {args["log_file"]}')
320+
logger.info('Done.')
321+
322+
@classmethod
323+
def register_clean_hook(cls):
324+
atexit.register(LLMInfer.clean_deployment)
325+
signal.signal(signal.SIGINT, LLMInfer.signal_handler)
326+
if os.name != 'nt':
327+
signal.signal(signal.SIGTERM, LLMInfer.signal_handler)
328+
329+
@staticmethod
330+
def signal_handler(*args, **kwargs):
331+
LLMInfer.clean_deployment()
332+
sys.exit(0)
333+
308334
@classmethod
309335
def clear_session(cls):
310336
return '', [], gr.update(value=None), gr.update(value=None), gr.update(value=None), []

tests/test_align/test_template/test_template.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ def test_deepseek_v2_5():
55
tokenizer = get_model_tokenizer('deepseek-ai/DeepSeek-V2.5-1210', load_model=False)[1]
66
template = get_template(tokenizer.model_meta.template, tokenizer)
77
inputs = TemplateInputs(messages=[{
8+
'role': 'system',
9+
'content': '000'
10+
}, {
811
'role': 'user',
912
'content': 'aaa'
1013
}, {
@@ -17,8 +20,9 @@ def test_deepseek_v2_5():
1720
res = template.encode(inputs)
1821
template.print_inputs(res)
1922
template.template_backend = 'jinja'
20-
res = template.encode(inputs)
23+
res2 = template.encode(inputs)
2124
template.print_inputs(res)
25+
assert res['input_ids'] == res2['input_ids']
2226

2327

2428
if __name__ == '__main__':

0 commit comments

Comments
 (0)