Skip to content

Commit f672a34

Browse files
authored
[FIX 2.1]fix bad_words when sending requests consecutively (#3199)
* fix bad_words * fix log * fix log
1 parent bc0b92b commit f672a34

File tree

5 files changed

+45
-35
lines changed

5 files changed

+45
-35
lines changed

fastdeploy/engine/sampling_params.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,22 @@ def update_from_tokenizer(self, tokenizer):
218218
prompt_token_ids = tokenizer.encode(text=prompt, add_special_tokens=False)["input_ids"]
219219

220220
if len(prompt_token_ids) != 1:
221-
logger.warning(
222-
f"Skip bad_words: {prompt}."
223-
f"Bad words should be a single token."
224-
f"Got tokens: {prompt_token_ids}."
225-
)
221+
if not add_prefix_space:
222+
logger.warning(
223+
f"Skip bad_words: <{prompt}>."
224+
f"Bad words should be a single token."
225+
f"Got tokens: {prompt_token_ids}."
226+
)
226227
continue
227228

228229
if prompt_token_ids[0] > tokenizer.vocab_size:
229-
logger.warning(
230-
f"Skip bad_words: {prompt}."
231-
f"All token id values should be satisfying:"
232-
f" 0 <= token_id < {tokenizer.vocab_size}."
233-
f"Got token: {prompt_token_ids}."
234-
)
230+
if not add_prefix_space:
231+
logger.warning(
232+
f"Skip bad_words: <{prompt}>."
233+
f"All token id values should be satisfying:"
234+
f" 0 <= token_id < {tokenizer.vocab_size}."
235+
f"Got token: {prompt_token_ids}."
236+
)
235237
continue
236238

237239
if prompt_token_ids not in self._bad_words_token_ids:

fastdeploy/worker/gcu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,15 @@ def get_attr_from_request(request, attr, default_value=None):
272272
request.block_tables, dtype="int32"
273273
)
274274

275-
if request.get("bad_words_token_ids") is not None:
275+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
276276
bad_words_len = len(request.get("bad_words_token_ids"))
277-
if bad_words_len > 0:
278-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
279-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
280-
request.get("bad_words_token_ids"), dtype="int64"
281-
)
277+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
278+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
279+
request.get("bad_words_token_ids"), dtype="int64"
280+
)
281+
else:
282+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
283+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
282284

283285
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
284286
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/gpu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -495,13 +495,15 @@ def get_attr_from_request(request, attr, default_value=None):
495495
request.block_tables, dtype="int32"
496496
)
497497

498-
if request.get("bad_words_token_ids") is not None:
498+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
499499
bad_words_len = len(request.get("bad_words_token_ids"))
500-
if bad_words_len > 0:
501-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
502-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
503-
request.get("bad_words_token_ids"), dtype="int64"
504-
)
500+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
501+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
502+
request.get("bad_words_token_ids"), dtype="int64"
503+
)
504+
else:
505+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
506+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
505507

506508
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
507509
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/iluvatar_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,15 @@ def insert_prefill_inputs(self, req_dicts: List[Request], num_running_requests:
243243
request.block_tables, dtype="int32"
244244
)
245245

246-
if request.get("bad_words_token_ids") is not None:
246+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
247247
bad_words_len = len(request.get("bad_words_token_ids"))
248-
if bad_words_len > 0:
249-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
250-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
251-
request.get("bad_words_token_ids"), dtype="int64"
252-
)
248+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
249+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
250+
request.get("bad_words_token_ids"), dtype="int64"
251+
)
252+
else:
253+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
254+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
253255

254256
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
255257
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/xpu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -507,13 +507,15 @@ def process_prefill_inputs(self, req_dicts: List[Request], num_running_requests:
507507
request.block_tables, dtype="int32"
508508
)
509509

510-
if request.get("bad_words_token_ids") is not None:
510+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
511511
bad_words_len = len(request.get("bad_words_token_ids"))
512-
if bad_words_len > 0:
513-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
514-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
515-
request.get("bad_words_token_ids"), dtype="int64"
516-
)
512+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
513+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
514+
request.get("bad_words_token_ids"), dtype="int64"
515+
)
516+
else:
517+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
518+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
517519

518520
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
519521
stop_seqs_num = len(request.get("stop_seqs_len"))

0 commit comments

Comments
 (0)