Skip to content

Commit 72ef5a9

Browse files
authored
[FIX]fix bad_words when sending requests consecutively (#3197)
* fix bad_words * fix log * fix log
1 parent 1f8289e commit 72ef5a9

File tree

5 files changed

+45
-35
lines changed

5 files changed

+45
-35
lines changed

fastdeploy/engine/sampling_params.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,22 @@ def update_from_tokenizer(self, tokenizer):
218218
prompt_token_ids = tokenizer.encode(text=prompt, add_special_tokens=False)["input_ids"]
219219

220220
if len(prompt_token_ids) != 1:
221-
logger.warning(
222-
f"Skip bad_words: {prompt}."
223-
f"Bad words should be a single token."
224-
f"Got tokens: {prompt_token_ids}."
225-
)
221+
if not add_prefix_space:
222+
logger.warning(
223+
f"Skip bad_words: <{prompt}>."
224+
f"Bad words should be a single token."
225+
f"Got tokens: {prompt_token_ids}."
226+
)
226227
continue
227228

228229
if prompt_token_ids[0] > tokenizer.vocab_size:
229-
logger.warning(
230-
f"Skip bad_words: {prompt}."
231-
f"All token id values should be satisfying:"
232-
f" 0 <= token_id < {tokenizer.vocab_size}."
233-
f"Got token: {prompt_token_ids}."
234-
)
230+
if not add_prefix_space:
231+
logger.warning(
232+
f"Skip bad_words: <{prompt}>."
233+
f"All token id values should be satisfying:"
234+
f" 0 <= token_id < {tokenizer.vocab_size}."
235+
f"Got token: {prompt_token_ids}."
236+
)
235237
continue
236238

237239
if prompt_token_ids not in self._bad_words_token_ids:

fastdeploy/worker/gcu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,13 +270,15 @@ def get_attr_from_request(request, attr, default_value=None):
270270
request.block_tables, dtype="int32"
271271
)
272272

273-
if request.get("bad_words_token_ids") is not None:
273+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
274274
bad_words_len = len(request.get("bad_words_token_ids"))
275-
if bad_words_len > 0:
276-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
277-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
278-
request.get("bad_words_token_ids"), dtype="int64"
279-
)
275+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
276+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
277+
request.get("bad_words_token_ids"), dtype="int64"
278+
)
279+
else:
280+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
281+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
280282

281283
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
282284
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/gpu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -489,13 +489,15 @@ def get_attr_from_request(request, attr, default_value=None):
489489
request.block_tables, dtype="int32"
490490
)
491491

492-
if request.get("bad_words_token_ids") is not None:
492+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
493493
bad_words_len = len(request.get("bad_words_token_ids"))
494-
if bad_words_len > 0:
495-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
496-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
497-
request.get("bad_words_token_ids"), dtype="int64"
498-
)
494+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
495+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
496+
request.get("bad_words_token_ids"), dtype="int64"
497+
)
498+
else:
499+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
500+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
499501

500502
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
501503
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/iluvatar_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,13 +242,15 @@ def insert_prefill_inputs(self, req_dicts: List[Request]):
242242
request.block_tables, dtype="int32"
243243
)
244244

245-
if request.get("bad_words_token_ids") is not None:
245+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
246246
bad_words_len = len(request.get("bad_words_token_ids"))
247-
if bad_words_len > 0:
248-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
249-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
250-
request.get("bad_words_token_ids"), dtype="int64"
251-
)
247+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
248+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
249+
request.get("bad_words_token_ids"), dtype="int64"
250+
)
251+
else:
252+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
253+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
252254

253255
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
254256
stop_seqs_num = len(request.get("stop_seqs_len"))

fastdeploy/worker/xpu_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -506,13 +506,15 @@ def process_prefill_inputs(self, req_dicts: List[Request]):
506506
request.block_tables, dtype="int32"
507507
)
508508

509-
if request.get("bad_words_token_ids") is not None:
509+
if request.get("bad_words_token_ids") is not None and len(request.get("bad_words_token_ids")) > 0:
510510
bad_words_len = len(request.get("bad_words_token_ids"))
511-
if bad_words_len > 0:
512-
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
513-
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
514-
request.get("bad_words_token_ids"), dtype="int64"
515-
)
511+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = bad_words_len
512+
self.share_inputs["bad_tokens"][idx : idx + 1, :bad_words_len] = np.array(
513+
request.get("bad_words_token_ids"), dtype="int64"
514+
)
515+
else:
516+
self.share_inputs["bad_tokens_len"][idx : idx + 1] = 1
517+
self.share_inputs["bad_tokens"][idx : idx + 1, :] = np.array([-1], dtype="int64")
516518

517519
if request.get("stop_token_ids") is not None and request.get("stop_seqs_len") is not None:
518520
stop_seqs_num = len(request.get("stop_seqs_len"))

0 commit comments

Comments
 (0)