Skip to content

Commit 682e0b6

Browse files
authored
Log how much time loading a compiled artifact takes (#16848)
Signed-off-by: rzou <[email protected]>
1 parent d6195a7 commit 682e0b6

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

vllm/compilation/backends.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,14 @@ def compile(self,
110110
compiled_graph = self.load(graph, example_inputs, graph_index,
111111
runtime_shape)
112112
if compiled_graph is not None:
113-
if graph_index == 0:
114-
# adds some info logging for the first graph
115-
logger.info("Directly load the compiled graph for shape %s "
116-
"from the cache", str(runtime_shape)) # noqa
113+
if graph_index == num_graphs - 1:
114+
# after loading the last graph for this shape, record the time.
115+
# there can be multiple graphs due to piecewise compilation.
116+
now = time.time()
117+
elapsed = now - compilation_start_time
118+
logger.info(
119+
"Directly load the compiled graph(s) for shape %s "
120+
"from the cache, took %.3f s", str(runtime_shape), elapsed)
117121
return compiled_graph
118122

119123
# no compiler cached the graph, or the cache is disabled,

0 commit comments

Comments
 (0)