Skip to content

Commit 0ea08a6

Browse files
author
Le Xu
committed
expose model info
Signed-off-by: Le Xu <[email protected]>
1 parent 45f267f commit 0ea08a6

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

development/app/app.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def completion():
238238
latency = 0.0
239239
if simulator is not None:
240240
latency = simulator.execute(Request(arrived_at, input_tokens, output_tokens, arrived_next=arrived_next))
241+
print(f"input_tokens {input_tokens} model metadata {simulator.model_metadata}")
241242

242243
# Simulated response
243244
response = {
@@ -698,7 +699,9 @@ def metrics():
698699
if gpu_device != "disabled":
699700
# Load the tokenizer for your model
700701
from transformers import AutoTokenizer
701-
702+
from transformers import AutoConfig
703+
704+
num_layers, num_heads, hidden_size = None
702705
default_model = 'bert-base-uncased'
703706
try:
704707
# can we make this as an application argument.
@@ -709,15 +712,30 @@ def metrics():
709712
token=HUGGINGFACE_TOKEN,
710713
model_max_length=16384, # Suppress warning
711714
clean_up_tokenization_spaces=True)
715+
config = AutoConfig.from_pretrained(token_model)
716+
# Extract required details
717+
num_layers = config.get("num_hidden_layers", config.get("n_layers"))
718+
num_heads = config.get("num_attention_heads", config.get("n_heads"))
719+
hidden_size = config.get("hidden_size", config.get("dim"))
712720
except Exception as e:
713721
logger.error(f"Failed to initialize tokenizer, will use default tokenizer model: {e}")
714722
tokenizer = AutoTokenizer.from_pretrained(
715723
default_model,
716724
model_max_length=16384, # Suppress warning
717725
clean_up_tokenization_spaces=True)
726+
config = AutoConfig.from_pretrained(default_model)
727+
# Extract required details
728+
num_layers = config.get("num_hidden_layers", config.get("n_layers"))
729+
num_heads = config.get("num_attention_heads", config.get("n_heads"))
730+
hidden_size = config.get("hidden_size", config.get("dim"))
718731

719732
# TODO: check whether able to use argparse to build SimulationConfig
720733
simulator = Simulator(SimulationConfig.create_from_cli_args())
734+
735+
simulator.model_metadata['num_layers'] = num_layers
736+
simulator.model_metadata['num_heads'] = num_heads
737+
simulator.model_metadata['hidden_size'] = hidden_size
738+
721739
overrides = {
722740
"total": 100.0,
723741
"running": 0,

development/app/simulator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import json
55
import sys
66
import threading
7-
from typing import List
7+
from typing import List, Dict
88

99
from vidur.config import SimulationConfig
1010
from vidur.entities import Cluster, Request
@@ -56,6 +56,8 @@ def __init__(self, config: SimulationConfig) -> None:
5656
self._expect_next_tick = sys.float_info.max
5757
self._queue_buffer: List[Request] = []
5858
self._queue = None
59+
60+
self._model_metadata = {}
5961

6062
# self._init_event_queue()
6163
atexit.register(self._write_output)
@@ -68,6 +70,10 @@ def scheduler(self) -> BaseGlobalScheduler:
6870
def metric_store(self) -> MetricsStore:
6971
return self._metric_store
7072

73+
@property
74+
def model_metadata(self) -> Dict:
75+
return self._model_metadata
76+
7177
def start(self):
7278
logger.info(
7379
f"Starting simulation with cluster: {self._cluster}, model: {self._config.cluster_config.replica_config.model_name}, seed: {self._config.seed}"

0 commit comments

Comments
 (0)