Skip to content

Commit c047f20

Browse files
authored
Fix fetching dataset rows (#1091)
1 parent 076156e commit c047f20

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

src/datachain/query/dispatch.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,10 @@ def run_udf_single(
203203
def get_inputs() -> Iterable["RowsOutput"]:
204204
warehouse = self.catalog.warehouse.clone()
205205
if ids_only:
206-
yield from warehouse.dataset_rows_select_from_ids(
207-
self.query, input_rows, self.is_batching
208-
)
206+
for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
207+
yield from warehouse.dataset_rows_select_from_ids(
208+
self.query, ids, self.is_batching
209+
)
209210
else:
210211
yield from input_rows
211212

@@ -425,8 +426,9 @@ def get_inputs(self, ids_only: bool) -> Iterable["RowsOutput"]:
425426
warehouse = self.catalog.warehouse.clone()
426427
while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
427428
if ids_only:
428-
yield from warehouse.dataset_rows_select_from_ids(
429-
self.query, batch, self.is_batching
430-
)
429+
for ids in batched(batch, DEFAULT_BATCH_SIZE):
430+
yield from warehouse.dataset_rows_select_from_ids(
431+
self.query, ids, self.is_batching
432+
)
431433
else:
432434
yield from batch

0 commit comments

Comments
 (0)