Skip to content

Commit 447cebb

Browse files
committed
feat: interpreter should report (stderr) call start/end and timing
Signed-off-by: Nick Mitchell <[email protected]>
1 parent e23283c commit 447cebb

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

src/pdl/pdl_llms.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import asyncio
33
import threading
44
from concurrent.futures import Future
5+
from sys import stderr
56
from typing import Any, Callable, Generator, TypeVar
67

78
import httpx
@@ -86,6 +87,7 @@ def generate_text(
8687
messages: ModelInput,
8788
parameters: dict[str, Any],
8889
) -> tuple[LazyMessage, PdlLazy[Any]]:
90+
print("Asynchronous model call started", file=stderr)
8991
# global _BACKGROUND_TASKS
9092
future = asyncio.run_coroutine_threadsafe(
9193
LitellmModel.async_generate_text(
@@ -108,6 +110,25 @@ def update_end_nanos(future):
108110
if block.pdl__timing is not None:
109111
block.pdl__timing.end_nanos = time.time_ns()
110112

113+
# report call completion and its duration (and if available queueing time)
114+
res = future.result()[1]
115+
start = (
116+
block.pdl__timing.start_nanos
117+
if block.pdl__timing.start_nanos is not None
118+
else 0
119+
)
120+
exec_nanos = block.pdl__timing.end_nanos - start
121+
queue_nanos = 0
122+
if "created" in res:
123+
queue_nanos = (
124+
res["created"] * 1000000000 - block.pdl__timing.start_nanos
125+
)
126+
exec_nanos = exec_nanos - queue_nanos
127+
print(
128+
f"Asynchronous model call to {block.model} completed in {(exec_nanos)/1000000}ms queued for {queue_nanos/1000000}ms",
129+
file=stderr,
130+
)
131+
111132
future.add_done_callback(update_end_nanos)
112133

113134
return message, response

0 commit comments

Comments
 (0)