@@ -161,6 +161,7 @@ impl Infer {
161161 speculate : u32 ,
162162 preloaded_adapters : Vec < PreloadedAdapter > ,
163163 prefix_caching : bool ,
164+ is_causal_lm : bool ,
164165 ) -> Self {
165166 let adapter_event = Arc :: new ( AdapterEvent {
166167 batching_task : Notify :: new ( ) ,
@@ -178,6 +179,7 @@ impl Infer {
178179 speculate,
179180 max_batch_total_tokens,
180181 prefix_caching,
182+ is_causal_lm,
181183 ) ;
182184
183185 // Initialize with base model adapter (empty) mapping to index 0
@@ -729,13 +731,19 @@ impl Infer {
729731 . map ( |( id, input) | ( id as u64 , input. clone ( ) ) )
730732 . collect ( ) ;
731733
732- for ( id , r_inputs ) in request. inputs . iter ( ) . enumerate ( ) {
733- let inputs = r_inputs . to_string ( ) . clone ( ) ;
734- let ( tokenized_inputs , input_length ) = self
735- . validation
736- . validate_input ( r_inputs . to_string ( ) , None , Some ( 1 ) )
737- . await ? ;
734+ // Call validate_input on every input in the request and await the results
735+ let futures : Vec < _ > = request
736+ . inputs
737+ . iter ( )
738+ . map ( |input| self . validation . validate_input ( input . clone ( ) , None , Some ( 1 ) ) )
739+ . collect ( ) ;
738740
741+ let all_tokenized_inputs = try_join_all ( futures) . await ?;
742+
743+ for ( ( id, r_inputs) , ( tokenized_inputs, input_length) ) in
744+ request. inputs . iter ( ) . enumerate ( ) . zip ( all_tokenized_inputs)
745+ {
746+ let inputs = r_inputs. to_string ( ) . clone ( ) ;
739747 let valid_request = ValidClassifyRequest {
740748 inputs,
741749 tokenized_inputs,
0 commit comments