@@ -238,6 +238,7 @@ def completion():
238
238
latency = 0.0
239
239
if simulator is not None :
240
240
latency = simulator .execute (Request (arrived_at , input_tokens , output_tokens , arrived_next = arrived_next ))
241
+ print (f"input_tokens { input_tokens } model metadata { simulator .model_metadata } " )
241
242
242
243
# Simulated response
243
244
response = {
@@ -698,7 +699,9 @@ def metrics():
698
699
if gpu_device != "disabled" :
699
700
# Load the tokenizer for your model
700
701
from transformers import AutoTokenizer
701
-
702
+ from transformers import AutoConfig
703
+
704
+ num_layers , num_heads , hidden_size = None
702
705
default_model = 'bert-base-uncased'
703
706
try :
704
707
# can we make this as an application argument.
@@ -709,15 +712,30 @@ def metrics():
709
712
token = HUGGINGFACE_TOKEN ,
710
713
model_max_length = 16384 , # Suppress warning
711
714
clean_up_tokenization_spaces = True )
715
+ config = AutoConfig .from_pretrained (token_model )
716
+ # Extract required details
717
+ num_layers = config .get ("num_hidden_layers" , config .get ("n_layers" ))
718
+ num_heads = config .get ("num_attention_heads" , config .get ("n_heads" ))
719
+ hidden_size = config .get ("hidden_size" , config .get ("dim" ))
712
720
except Exception as e :
713
721
logger .error (f"Failed to initialize tokenizer, will use default tokenizer model: { e } " )
714
722
tokenizer = AutoTokenizer .from_pretrained (
715
723
default_model ,
716
724
model_max_length = 16384 , # Suppress warning
717
725
clean_up_tokenization_spaces = True )
726
+ config = AutoConfig .from_pretrained (default_model )
727
+ # Extract required details
728
+ num_layers = config .get ("num_hidden_layers" , config .get ("n_layers" ))
729
+ num_heads = config .get ("num_attention_heads" , config .get ("n_heads" ))
730
+ hidden_size = config .get ("hidden_size" , config .get ("dim" ))
718
731
719
732
# TODO: check whether able to use argparse to build SimulationConfig
720
733
simulator = Simulator (SimulationConfig .create_from_cli_args ())
734
+
735
+ simulator .model_metadata ['num_layers' ] = num_layers
736
+ simulator .model_metadata ['num_heads' ] = num_heads
737
+ simulator .model_metadata ['hidden_size' ] = hidden_size
738
+
721
739
overrides = {
722
740
"total" : 100.0 ,
723
741
"running" : 0 ,
0 commit comments