@@ -64,43 +64,6 @@ def test_batching(
6464 )
6565
6666
67- @pytest .mark .parametrize ("model" , MODELS )
68- @pytest .mark .parametrize ("dtype" , ["bfloat16" ])
69- @pytest .mark .parametrize ("max_tokens" , [15 ])
70- def test_n_lt_1 (
71- vllm_runner ,
72- example_prompts ,
73- model : str ,
74- dtype : str ,
75- max_tokens : int ,
76- ) -> None :
77- # To pass the small model tests, we need full precision.
78- # assert dtype == "float"
79-
80- with vllm_runner (model , dtype = dtype ) as vllm_model :
81- for_loop_outputs = []
82- for _ in range (10 ):
83- for_loop_outputs .append (
84- vllm_model .generate_greedy ([example_prompts [1 ]],
85- max_tokens )[0 ])
86- sampling_params = SamplingParams (n = 10 ,
87- temperature = 0.001 ,
88- seed = 0 ,
89- max_tokens = max_tokens )
90- n_lt_1_outputs = vllm_model .generate ([example_prompts [1 ]],
91- sampling_params )
92- token_ids , texts = n_lt_1_outputs [0 ]
93- n_lt_1_outputs = [(token_id , text )
94- for token_id , text in zip (token_ids , texts )]
95-
96- check_outputs_equal (
97- outputs_0_lst = n_lt_1_outputs ,
98- outputs_1_lst = for_loop_outputs ,
99- name_0 = "vllm_n_lt_1_outputs" ,
100- name_1 = "vllm" ,
101- )
102-
103-
10467@pytest .mark .parametrize ("model" , MODELS )
10568@pytest .mark .parametrize ("dtype" , ["bfloat16" ])
10669@pytest .mark .parametrize ("max_tokens" , [20 ])
0 commit comments