Skip to content

Commit ddc7387

Browse files
authored
rechunk data for smoother runtimes of vLLM batch inference (#1518)
1 parent 6fec6cd commit ddc7387

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

06_gpu_and_ml/llm-serving/vllm_throughput.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,22 @@ def main(lookback: int = 5, wait_for_results: bool = True):
108108
# For both extraction and transformation, we use
109109
# [`.map`](https://modal.com/docs/guide/scale),
110110
# which fans out inputs over containers in parallel.
111-
# Each invocation handles one day's worth of data --
112-
# the same granularity offered by the data source.
111+
# Each invocation handles at most 1,500 rows,
112+
# which leads to runtimes of about five minutes per call
113+
# By parallelizing the calls, we finish processing everything in about five minutes.
114+
115+
# "Rechunking" our data from a list of filings by day
116+
# into a list of filings of fixed size requires a little
117+
# helper function:
118+
119+
120+
def rechunk(lists, size: int = 1_500):
121+
from itertools import chain, islice
122+
123+
it = iter(chain.from_iterable(lists))
124+
while chunk := list(islice(it, size)):
125+
yield chunk
126+
113127

114128
# For the LLM call, we use
115129
# [`.spawn`](https://modal.com/docs/guide/job-queue),
@@ -143,9 +157,10 @@ def orchestrate(lookback: int) -> list[modal.FunctionCall]:
143157
print("Transforming raw SEC filings for these dates:", *folders)
144158
filing_batches = list(transform.map(folders))
145159
n_filings = sum(map(len, filing_batches))
160+
submission_batches_gen = rechunk(filing_batches)
146161

147162
print(f"Submitting {n_filings} SEC filings to LLM for summarization")
148-
jobs = list(llm.process.spawn(batch) for batch in filing_batches)
163+
jobs = list(llm.process.spawn(batch) for batch in submission_batches_gen)
149164
if jobs:
150165
print("FunctionCall IDs:", *[job.object_id for job in jobs], sep="\n\t")
151166

0 commit comments

Comments
 (0)