Skip to content

Commit afff4d3

Browse files
authored
[Feature] support seed parameter (#3161)
* support seed * fix * add SamplingMetadata seed test * The next_tokens values are inconsistent! * add air and rejection seed test * fix * add SamplingParams seed test * fix seed=0 * Default to defualt * fix * fix args_utils * fix review * fix review * fix * fix * add xpu,gcu,iluvatar support seed * fix
1 parent 20839ab commit afff4d3

File tree

15 files changed

+386
-5
lines changed

15 files changed

+386
-5
lines changed

fastdeploy/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def __init__(
122122
self.enable_mm = False
123123
self.enable_redundant_experts = False
124124
self.redundant_experts_num = 0
125+
self.seed = 0
125126
self.quantization = None
126127
for key, value in args.items():
127128
if hasattr(self, key):

fastdeploy/engine/args_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,11 @@ class EngineArgs:
316316
Must be explicitly enabled via the `--enable-logprob` startup parameter to output logprob values.
317317
"""
318318

319+
seed: int = 0
320+
"""
321+
Random seed to use for initialization. If not set, defaults to 0.
322+
"""
323+
319324
enable_early_stop: bool = False
320325
"""
321326
Flag to enable early stop. Default is False (disabled).
@@ -484,6 +489,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
484489
default=EngineArgs.enable_logprob,
485490
help="Enable output of token-level log probabilities.",
486491
)
492+
model_group.add_argument(
493+
"--seed",
494+
type=int,
495+
default=EngineArgs.seed,
496+
help="Random seed for initialization. If not specified, defaults to 0.",
497+
)
487498
model_group.add_argument(
488499
"--enable-early-stop",
489500
action="store_true",

fastdeploy/model_executor/layers/sample/meta_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class SamplingMetadata:
4343
top_p: paddle.Tensor
4444
top_k: Optional[paddle.Tensor] = None
4545
min_p: Optional[paddle.Tensor] = None
46+
seed: Optional[paddle.Tensor] = None
4647
max_num_logprobs: Optional[int] = None
4748
enable_early_stop: Optional[int] = False
4849
stop_flags: Optional[paddle.Tensor] = None

fastdeploy/model_executor/layers/sample/sampler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,9 @@ def forward_cuda(
282282

283283
probs = min_p_sampling(probs, sampling_metadata.min_p)
284284

285-
_, next_tokens = top_k_top_p_sampling(probs, sampling_metadata.top_p, sampling_metadata.top_k)
285+
_, next_tokens = top_k_top_p_sampling(
286+
probs, sampling_metadata.top_p, sampling_metadata.top_k, seed=sampling_metadata.seed[0, 0]
287+
)
286288

287289
logprobs_tensors = (
288290
None if num_logprobs is None else self.gather_logprobs(raw_logprobs, num_logprobs, token_ids=next_tokens)

fastdeploy/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from pathlib import Path
3030
from typing import Literal, TypeVar, Union
3131

32+
import numpy as np
33+
import paddle
3234
import requests
3335
import yaml
3436
from aistudio_sdk.snapshot_download import snapshot_download as aistudio_download
@@ -295,6 +297,13 @@ def extract_tar(tar_path, output_dir):
295297
raise RuntimeError(f"Extraction failed: {e!s}")
296298

297299

300+
def set_random_seed(seed: int) -> None:
301+
if seed is not None:
302+
random.seed(seed)
303+
np.random.seed(seed)
304+
paddle.seed(seed)
305+
306+
298307
def download_model(url, output_dir, temp_tar):
299308
"""
300309
下载模型,并将其解压到指定目录。

fastdeploy/worker/gcu_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ def _prepare_inputs(self) -> None:
540540
top_p=self.share_inputs["top_p"],
541541
top_k=self.share_inputs["top_k"],
542542
min_p=self.share_inputs["min_p"],
543+
seed=self.share_inputs["infer_seed"],
543544
step_idx=self.share_inputs["step_idx"],
544545
pre_token_ids=self.share_inputs["pre_ids"],
545546
prompt_ids=self.share_inputs["prompt_ids"],

fastdeploy/worker/gcu_worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from fastdeploy.config import FDConfig
2424
from fastdeploy.engine.request import Request
25-
from fastdeploy.utils import get_logger
25+
from fastdeploy.utils import get_logger, set_random_seed
2626
from fastdeploy.worker.gcu_model_runner import GCUModelRunner
2727
from fastdeploy.worker.output import ModelRunnerOutput
2828
from fastdeploy.worker.worker_base import WorkerBase
@@ -60,6 +60,7 @@ def init_device(self):
6060
else:
6161
raise RuntimeError(f"Not support device type: {self.device_config.device}")
6262

63+
set_random_seed(self.fd_config.model_config.seed)
6364
# Construct model runner
6465
self.model_runner: GCUModelRunner = GCUModelRunner(
6566
fd_config=self.fd_config,
@@ -128,6 +129,7 @@ def graph_optimize_and_warm_up_model(self) -> None:
128129
self.model_runner.sot_warmup()
129130
# 2. Triger cuda grpah capture
130131
self.model_runner.capture_model()
132+
set_random_seed(self.fd_config.model_config.seed)
131133

132134
def check_health(self) -> bool:
133135
""" """

fastdeploy/worker/gpu_model_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def __init__(
131131
fill_value=4,
132132
dtype="int64",
133133
)
134+
134135
self.restore_chunked_prefill_request = dict()
135136

136137
# Initialize attention Backend
@@ -813,6 +814,7 @@ def _prepare_inputs(self) -> None:
813814
top_p=self.share_inputs["top_p"],
814815
top_k=self.share_inputs["top_k"],
815816
min_p=self.share_inputs["min_p"],
817+
seed=self.share_inputs["infer_seed"],
816818
step_idx=self.share_inputs["step_idx"],
817819
pre_token_ids=self.share_inputs["pre_ids"],
818820
prompt_ids=self.share_inputs["prompt_ids"],

fastdeploy/worker/gpu_worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from fastdeploy.engine.request import Request
2828
from fastdeploy.platforms import current_platform
2929
from fastdeploy.plugins.model_runner import load_model_runner_plugins
30-
from fastdeploy.utils import get_logger
30+
from fastdeploy.utils import get_logger, set_random_seed
3131
from fastdeploy.worker.model_runner_base import ModelRunnerBase
3232
from fastdeploy.worker.output import ModelRunnerOutput
3333
from fastdeploy.worker.worker_base import WorkerBase
@@ -75,6 +75,7 @@ def init_device(self):
7575
else:
7676
raise RuntimeError(f"Not support device type: {self.device_config.device}")
7777

78+
set_random_seed(self.fd_config.model_config.seed)
7879
# Construct model runner
7980
self.model_runner: ModelRunnerBase = ModelRunner(
8081
fd_config=self.fd_config,
@@ -129,6 +130,7 @@ def determine_available_memory(self) -> int:
129130

130131
# 2. Profile run
131132
self.model_runner.profile_run()
133+
set_random_seed(self.fd_config.model_config.seed)
132134

133135
# 3. Statistical memory information
134136
paddle_reserved_mem_after_run = paddle.device.cuda.max_memory_reserved(local_rank)

fastdeploy/worker/iluvatar_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ def _prepare_inputs(self) -> None:
509509
temperature=self.share_inputs["temperature"],
510510
top_p=self.share_inputs["top_p"],
511511
top_k=self.share_inputs["top_k"],
512+
seed=self.share_inputs["seed"],
512513
step_idx=self.share_inputs["step_idx"],
513514
pre_token_ids=self.share_inputs["pre_ids"],
514515
prompt_ids=self.share_inputs["prompt_ids"],

0 commit comments

Comments
 (0)