@@ -784,43 +784,54 @@ def test_non_streaming_chat_with_bad_words(openai_client, capsys):
784
784
"""
785
785
Test bad_words option in non-streaming chat functionality with the local service
786
786
"""
787
+ base_path = os .getenv ("MODEL_PATH" )
788
+ if base_path :
789
+ model_path = os .path .join (base_path , "ernie-4_5-21b-a3b-bf16-paddle" )
790
+ else :
791
+ model_path = "./ernie-4_5-21b-a3b-bf16-paddle"
787
792
response_0 = openai_client .chat .completions .create (
788
793
model = "default" ,
789
794
messages = [{"role" : "user" , "content" : "Hello, how are you?" }],
790
795
temperature = 1 ,
791
796
top_p = 0.0 ,
792
- max_tokens = 10 ,
797
+ max_tokens = 20 ,
793
798
stream = False ,
799
+ extra_body = {"return_token_ids" : True },
794
800
)
795
- output_0 = []
801
+
796
802
assert hasattr (response_0 , "choices" )
797
803
assert len (response_0 .choices ) > 0
798
804
assert hasattr (response_0 .choices [0 ], "message" )
799
- assert hasattr (response_0 .choices [0 ].message , "content" )
805
+ assert hasattr (response_0 .choices [0 ].message , "completion_token_ids" )
806
+ assert isinstance (response_0 .choices [0 ].message .completion_token_ids , list )
807
+
808
+ from fastdeploy .input .ernie_tokenizer import ErnieBotTokenizer
800
809
801
- text_split = response_0 .choices [0 ].message .content .split (" " )
802
- for text in text_split :
803
- output_0 .append (text )
810
+ tokenizer = ErnieBotTokenizer .from_pretrained (model_path , trust_remote_code = True )
811
+ output_tokens_0 = []
812
+ output_ids_0 = []
813
+ for ids in response_0 .choices [0 ].message .completion_token_ids :
814
+ output_tokens_0 .append (tokenizer .decode (ids ))
815
+ output_ids_0 .append (ids )
804
816
805
817
# add bad words
818
+ bad_tokens = output_tokens_0 [6 :10 ]
819
+ bad_token_ids = output_ids_0 [6 :10 ]
806
820
response_1 = openai_client .chat .completions .create (
807
821
model = "default" ,
808
822
messages = [{"role" : "user" , "content" : "Hello, how are you?" }],
809
823
temperature = 1 ,
810
824
top_p = 0.0 ,
811
- max_tokens = 10 ,
812
- extra_body = {"bad_words" : output_0 [ - 5 :] },
825
+ max_tokens = 20 ,
826
+ extra_body = {"bad_words" : bad_tokens , "return_token_ids" : True },
813
827
stream = False ,
814
828
)
815
- output_1 = []
816
829
assert hasattr (response_1 , "choices" )
817
830
assert len (response_1 .choices ) > 0
818
831
assert hasattr (response_1 .choices [0 ], "message" )
819
- assert hasattr (response_1 .choices [0 ].message , "content" )
820
- text_split = response_1 .choices [0 ].message .content .split (" " )
821
- for text in text_split :
822
- output_1 .append (text )
823
- assert output_0 not in output_1
832
+ assert hasattr (response_1 .choices [0 ].message , "completion_token_ids" )
833
+ assert isinstance (response_1 .choices [0 ].message .completion_token_ids , list )
834
+ assert not any (ids in response_1 .choices [0 ].message .completion_token_ids for ids in bad_token_ids )
824
835
825
836
826
837
def test_streaming_chat_with_bad_words (openai_client , capsys ):
@@ -832,75 +843,106 @@ def test_streaming_chat_with_bad_words(openai_client, capsys):
832
843
messages = [{"role" : "user" , "content" : "Hello, how are you?" }],
833
844
temperature = 1 ,
834
845
top_p = 0.0 ,
835
- max_tokens = 10 ,
846
+ max_tokens = 20 ,
836
847
stream = True ,
848
+ extra_body = {"return_token_ids" : True },
837
849
)
838
- output_0 = []
850
+ output_tokens_0 = []
851
+ output_ids_0 = []
852
+ is_first_chunk = True
839
853
for chunk in response_0 :
840
854
assert hasattr (chunk , "choices" )
841
855
assert len (chunk .choices ) > 0
842
856
assert hasattr (chunk .choices [0 ], "delta" )
843
857
assert hasattr (chunk .choices [0 ].delta , "content" )
844
- output_0 .append (chunk .choices [0 ].delta .content )
858
+ assert hasattr (chunk .choices [0 ].delta , "completion_token_ids" )
859
+ if is_first_chunk :
860
+ is_first_chunk = False
861
+ else :
862
+ assert isinstance (chunk .choices [0 ].delta .completion_token_ids , list )
863
+ output_tokens_0 .append (chunk .choices [0 ].delta .content )
864
+ output_ids_0 .extend (chunk .choices [0 ].delta .completion_token_ids )
845
865
846
866
# add bad words
867
+ bad_tokens = output_tokens_0 [6 :10 ]
868
+ bad_token_ids = output_ids_0 [6 :10 ]
847
869
response_1 = openai_client .chat .completions .create (
848
870
model = "default" ,
849
871
messages = [{"role" : "user" , "content" : "Hello, how are you?" }],
850
872
temperature = 1 ,
851
873
top_p = 0.0 ,
852
- max_tokens = 10 ,
853
- extra_body = {"bad_words" : output_0 [ - 5 :] },
874
+ max_tokens = 20 ,
875
+ extra_body = {"bad_words" : bad_tokens , "return_token_ids" : True },
854
876
stream = True ,
855
877
)
856
- output_1 = []
878
+ output_tokens_1 = []
879
+ output_ids_1 = []
880
+ is_first_chunk = True
857
881
for chunk in response_1 :
858
882
assert hasattr (chunk , "choices" )
859
883
assert len (chunk .choices ) > 0
860
884
assert hasattr (chunk .choices [0 ], "delta" )
861
885
assert hasattr (chunk .choices [0 ].delta , "content" )
862
- output_1 .append (chunk .choices [0 ].delta .content )
863
- assert output_0 not in output_1
886
+ assert hasattr (chunk .choices [0 ].delta , "completion_token_ids" )
887
+ if is_first_chunk :
888
+ is_first_chunk = False
889
+ else :
890
+ assert isinstance (chunk .choices [0 ].delta .completion_token_ids , list )
891
+ output_tokens_1 .append (chunk .choices [0 ].delta .content )
892
+ output_ids_1 .extend (chunk .choices [0 ].delta .completion_token_ids )
893
+ assert not any (ids in output_ids_1 for ids in bad_token_ids )
864
894
865
895
866
896
def test_non_streaming_completion_with_bad_words (openai_client , capsys ):
867
897
"""
868
898
Test bad_words option in non-streaming completion functionality with the local service
869
899
"""
900
+ base_path = os .getenv ("MODEL_PATH" )
901
+ if base_path :
902
+ model_path = os .path .join (base_path , "ernie-4_5-21b-a3b-bf16-paddle" )
903
+ else :
904
+ model_path = "./ernie-4_5-21b-a3b-bf16-paddle"
905
+
870
906
response_0 = openai_client .completions .create (
871
907
model = "default" ,
872
908
prompt = "Hello, how are you?" ,
873
909
temperature = 1 ,
874
910
top_p = 0.0 ,
875
- max_tokens = 10 ,
911
+ max_tokens = 20 ,
876
912
stream = False ,
913
+ extra_body = {"return_token_ids" : True },
877
914
)
878
- output_0 = []
879
915
assert hasattr (response_0 , "choices" )
880
916
assert len (response_0 .choices ) > 0
881
- assert hasattr (response_0 .choices [0 ], "text" )
882
- text_split = response_0 .choices [0 ].text .split (" " )
883
- for text in text_split :
884
- output_0 .append (text )
917
+ assert hasattr (response_0 .choices [0 ], "completion_token_ids" )
918
+ assert isinstance (response_0 .choices [0 ].completion_token_ids , list )
919
+
920
+ from fastdeploy .input .ernie_tokenizer import ErnieBotTokenizer
921
+
922
+ tokenizer = ErnieBotTokenizer .from_pretrained (model_path , trust_remote_code = True )
923
+ output_tokens_0 = []
924
+ output_ids_0 = []
925
+ for ids in response_0 .choices [0 ].completion_token_ids :
926
+ output_tokens_0 .append (tokenizer .decode (ids ))
927
+ output_ids_0 .append (ids )
885
928
886
929
# add bad words
930
+ bad_tokens = output_tokens_0 [6 :10 ]
931
+ bad_token_ids = output_ids_0 [6 :10 ]
887
932
response_1 = openai_client .completions .create (
888
933
model = "default" ,
889
934
prompt = "Hello, how are you?" ,
890
935
temperature = 1 ,
891
936
top_p = 0.0 ,
892
- max_tokens = 10 ,
893
- extra_body = {"bad_words" : output_0 [ - 5 :] },
937
+ max_tokens = 20 ,
938
+ extra_body = {"bad_words" : bad_tokens , "return_token_ids" : True },
894
939
stream = False ,
895
940
)
896
- output_1 = []
897
941
assert hasattr (response_1 , "choices" )
898
942
assert len (response_1 .choices ) > 0
899
- assert hasattr (response_1 .choices [0 ], "text" )
900
- text_split = response_1 .choices [0 ].text .split (" " )
901
- for text in text_split :
902
- output_1 .append (text )
903
- assert output_0 not in output_1
943
+ assert hasattr (response_0 .choices [0 ], "completion_token_ids" )
944
+ assert isinstance (response_0 .choices [0 ].completion_token_ids , list )
945
+ assert not any (ids in response_1 .choices [0 ].completion_token_ids for ids in bad_token_ids )
904
946
905
947
906
948
def test_streaming_completion_with_bad_words (openai_client , capsys ):
@@ -912,30 +954,47 @@ def test_streaming_completion_with_bad_words(openai_client, capsys):
912
954
prompt = "Hello, how are you?" ,
913
955
temperature = 1 ,
914
956
top_p = 0.0 ,
915
- max_tokens = 10 ,
957
+ max_tokens = 20 ,
916
958
stream = True ,
959
+ extra_body = {"return_token_ids" : True },
917
960
)
918
- output_0 = []
961
+ output_tokens_0 = []
962
+ output_ids_0 = []
963
+ is_first_chunk = True
919
964
for chunk in response_0 :
920
- assert hasattr (chunk , "choices" )
921
- assert len (chunk .choices ) > 0
922
- assert hasattr (chunk .choices [0 ], "text" )
923
- output_0 .append (chunk .choices [0 ].text )
965
+ if is_first_chunk :
966
+ is_first_chunk = False
967
+ else :
968
+ assert hasattr (chunk , "choices" )
969
+ assert len (chunk .choices ) > 0
970
+ assert hasattr (chunk .choices [0 ], "text" )
971
+ assert hasattr (chunk .choices [0 ], "completion_token_ids" )
972
+ output_tokens_0 .append (chunk .choices [0 ].text )
973
+ output_ids_0 .extend (chunk .choices [0 ].completion_token_ids )
924
974
925
975
# add bad words
976
+ bad_token_ids = output_ids_0 [6 :10 ]
977
+ bad_tokens = output_tokens_0 [6 :10 ]
926
978
response_1 = openai_client .completions .create (
927
979
model = "default" ,
928
980
prompt = "Hello, how are you?" ,
929
981
temperature = 1 ,
930
982
top_p = 0.0 ,
931
- max_tokens = 10 ,
932
- extra_body = {"bad_words" : output_0 [ - 5 :] },
983
+ max_tokens = 20 ,
984
+ extra_body = {"bad_words" : bad_tokens , "return_token_ids" : True },
933
985
stream = True ,
934
986
)
935
- output_1 = []
987
+ output_tokens_1 = []
988
+ output_ids_1 = []
989
+ is_first_chunk = True
936
990
for chunk in response_1 :
937
- assert hasattr (chunk , "choices" )
938
- assert len (chunk .choices ) > 0
939
- assert hasattr (chunk .choices [0 ], "text" )
940
- output_1 .append (chunk .choices [0 ].text )
941
- assert output_0 not in output_1
991
+ if is_first_chunk :
992
+ is_first_chunk = False
993
+ else :
994
+ assert hasattr (chunk , "choices" )
995
+ assert len (chunk .choices ) > 0
996
+ assert hasattr (chunk .choices [0 ], "text" )
997
+ assert hasattr (chunk .choices [0 ], "completion_token_ids" )
998
+ output_tokens_1 .append (chunk .choices [0 ].text )
999
+ output_ids_1 .extend (chunk .choices [0 ].completion_token_ids )
1000
+ assert not any (ids in output_ids_1 for ids in bad_token_ids )
0 commit comments