Skip to content

Commit 5b94f4f

Browse files
authored
Support deepseekv3 (#3449) (#3455)
1 parent 012cb96 commit 5b94f4f

File tree

19 files changed

+2257
-17
lines changed

19 files changed

+2257
-17
lines changed

examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,9 @@ def get_repo_root(model_name_or_path):
294294
def get_checkpoint_files(model_name_or_path):
295295
cached_repo_dir = get_repo_root(model_name_or_path)
296296
glob_pattern = "*.[bp][it][n]"
297-
if re.search("deepseek-v2", model_name_or_path, re.IGNORECASE):
297+
if re.search("deepseek-v2", model_name_or_path, re.IGNORECASE) or re.search(
298+
"deepseek-v3", model_name_or_path, re.IGNORECASE
299+
):
298300
glob_pattern = "*.[sbp][ait][fn][e][t][e][n][s][o][r][s]"
299301
# extensions: .bin | .pt
300302
# creates a list of paths from all downloaded files in cache dir

examples/cpu/llm/inference/run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
594594
"maira": ("/maira2_local_shard"),
595595
"jamba": ("/jamba_local_shard"),
596596
"deepseek-v2": ("/deepseekv2_local_shard"),
597+
"deepseek-v3": ("/deepseekv3_local_shard"),
597598
}
598599
model_type = next(
599600
(

examples/cpu/llm/inference/single_instance/run_quantization.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from llm.inference.utils.model_class.whisper import WhisperConfig
4646
from llm.inference.utils.model_class.maira2 import MAIRA2Config
4747
from llm.inference.utils.model_class.jamba import JambaConfig
48-
from llm.inference.utils.model_class.deepseek import DeepseekV2Config
48+
from llm.inference.utils.model_class.deepseek import DeepseekV2Config, DeepseekV3Config
4949

5050
parser = argparse.ArgumentParser("LLM generation script (int8 path)", add_help=False)
5151
parser.add_argument(
@@ -437,6 +437,8 @@ def download_and_open(url: str) -> Image.Image:
437437
model = JambaConfig(args.model_id)
438438
elif re.search("deepseekv2", config.architectures[0], re.IGNORECASE):
439439
model = DeepseekV2Config(args.model_id)
440+
elif re.search("deepseekv3", config.architectures[0], re.IGNORECASE):
441+
model = DeepseekV3Config(args.model_id)
440442
else:
441443
raise AssertionError("Not support %s." % (args.model_id))
442444

examples/cpu/llm/inference/utils/model_class/deepseek.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,43 @@ def get_user_model(self, config, benchmark):
4444
trust_remote_code=True,
4545
)
4646
return self.model
47+
48+
49+
class DeepseekV3Config(LLMConfig):
50+
def __init__(self, model_id):
51+
self.name = "deepseekv3"
52+
self.model_id = model_id
53+
self.to_channels_last = False
54+
self.example_inputs_mode = EXAMPLE_INPUTS_MODE.MASK_POS_KV
55+
56+
self.use_global_past_key_value = True
57+
self.use_ipex_autotune = True
58+
59+
def get_user_model(self, config, benchmark):
60+
if benchmark:
61+
try:
62+
with ipex.OnDevice(dtype=torch.float, device="meta"):
63+
self.model = AutoModelForCausalLM.from_config(
64+
config, trust_remote_code=True
65+
)
66+
except (RuntimeError, AttributeError):
67+
self.model = AutoModelForCausalLM.from_config(
68+
config, trust_remote_code=True
69+
)
70+
except Exception:
71+
self.model = AutoModelForCausalLM.from_pretrained(
72+
self.model_id,
73+
torch_dtype=torch.bfloat16,
74+
config=config,
75+
low_cpu_mem_usage=True,
76+
trust_remote_code=True,
77+
)
78+
else:
79+
self.model = AutoModelForCausalLM.from_pretrained(
80+
self.model_id,
81+
torch_dtype=torch.bfloat16,
82+
config=config,
83+
low_cpu_mem_usage=True,
84+
trust_remote_code=True,
85+
)
86+
return self.model

examples/cpu/llm/inference/utils/supported_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"maira-2": (AutoModelForCausalLM, AutoProcessor),
3737
"jamba": (AutoModelForCausalLM, AutoTokenizer),
3838
"deepseek-v2": (AutoModelForCausalLM, AutoTokenizer),
39+
"deepseek-v3": (AutoModelForCausalLM, AutoTokenizer),
3940
"auto": (AutoModelForCausalLM, AutoTokenizer),
4041
}
4142

intel_extension_for_pytorch/transformers/generation/beam_sample.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def _beam_sample(
178178
"Maira2ForConditionalGeneration",
179179
"JambaForCausalLM",
180180
"DeepseekV2ForCausalLM",
181+
"DeepseekV3ForCausalLM",
181182
]:
182183
first_token = False
183184
if hasattr(self.config, "kv_cache_dtype"):

intel_extension_for_pytorch/transformers/generation/beam_search.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ def _beam_search(
205205
"Maira2ForConditionalGeneration",
206206
"JambaForCausalLM",
207207
"DeepseekV2ForCausalLM",
208+
"DeepseekV3ForCausalLM",
208209
]:
209210
first_token = False
210211
has_position_id = model_inputs.get("position_ids", None) is not None

intel_extension_for_pytorch/transformers/generation/greedy_search.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def _greedy_search(
170170
"Maira2ForConditionalGeneration",
171171
"JambaForCausalLM",
172172
"DeepseekV2ForCausalLM",
173+
"DeepseekV3ForCausalLM",
173174
]:
174175
first_token = False
175176
if hasattr(self.config, "kv_cache_dtype"):

intel_extension_for_pytorch/transformers/generation/sample.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def _sample(
197197
"Maira2ForConditionalGeneration",
198198
"JambaForCausalLM",
199199
"DeepseekV2ForCausalLM",
200+
"DeepseekV3ForCausalLM",
200201
]:
201202
first_token = False
202203
if hasattr(self.config, "kv_cache_dtype"):

intel_extension_for_pytorch/transformers/models/cpu/modules/decoder.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(self, module, config, tpp=False, woq=False):
6262
"Maira2ForConditionalGeneration",
6363
"JambaForCausalLM",
6464
"DeepseekV2ForCausalLM",
65+
"DeepseekV3ForCausalLM",
6566
]:
6667
if not self.distributed:
6768
if hasattr(module, "linear_add"):
@@ -143,7 +144,10 @@ def __init__(self, module, config, tpp=False, woq=False):
143144
tpp=tpp,
144145
woq=woq,
145146
)
146-
if self.model_backbone == "DeepseekV2ForCausalLM":
147+
if self.model_backbone in [
148+
"DeepseekV2ForCausalLM",
149+
"DeepseekV3ForCausalLM",
150+
]:
147151
if hasattr(self.mlp, "experts"):
148152
# 0: Default, 1: TPP, 2: DNNL, 3: MKL, 4: WOQ
149153
self.moe_linear_type = 0

0 commit comments

Comments
 (0)