88from transformers import AutoTokenizer
99from litellm import completion
1010
11+
1112def litellm_completion (args , tokenizer , image_url = None ):
1213 try :
1314 if image_url :
1415 messages = [
1516 {
16- "role" : "user" ,
17+ "role" : "user" ,
1718 "content" : [
1819 {"type" : "image_url" , "image_url" : {"url" : image_url }},
1920 {"type" : "text" , "text" : "Tell me a story about this image." },
@@ -26,7 +27,7 @@ def litellm_completion(args, tokenizer, image_url=None):
2627 ]
2728
2829 start = time .time ()
29-
30+
3031 response = completion (
3132 model = args .model ,
3233 api_base = args .api_base ,
@@ -55,35 +56,38 @@ def litellm_completion(args, tokenizer, image_url=None):
5556 error_log .write (f"Error during completion: { str (e )} \n " )
5657 return str (e )
5758
59+
5860def main (args ):
5961 n = args .num_total_responses
6062 batch_size = args .req_per_sec # Requests per second
6163 start = time .time ()
6264
6365 all_results = []
6466 tokenizer = AutoTokenizer .from_pretrained ("gpt2" )
65-
67+
6668 with ThreadPoolExecutor (max_workers = batch_size ) as executor :
6769 for i in range (0 , n , batch_size ):
6870 batch_futures = []
6971 batch = range (i , min (i + batch_size , n ))
70-
72+
7173 for _ in batch :
7274 if args .include_image :
7375 if args .randomize_image_dimensions :
7476 y_dimension = np .random .randint (100 , 1025 )
7577 else :
7678 y_dimension = 512
7779 image_url = f"https://placehold.co/1024x{ y_dimension } /png"
78- future = executor .submit (litellm_completion , args , tokenizer , image_url )
80+ future = executor .submit (
81+ litellm_completion , args , tokenizer , image_url
82+ )
7983 else :
8084 future = executor .submit (litellm_completion , args , tokenizer )
8185 batch_futures .append (future )
82-
86+
8387 # Wait for batch to complete
8488 for future in batch_futures :
8589 all_results .append (future .result ())
86-
90+
8791 if i + batch_size < n :
8892 time .sleep (1 ) # Wait 1 second before next batch
8993
@@ -111,6 +115,7 @@ def main(args):
111115 print (f"P99 ITL: { np .percentile (itl_list_flattened , 99 )} " )
112116 print (f"Mean ITL: { np .mean (itl_list_flattened )} " )
113117
118+
114119if __name__ == "__main__" :
115120 parser = argparse .ArgumentParser ()
116121 parser .add_argument ("--model" , type = str , default = "azure-gpt-3.5" )
0 commit comments