Skip to content

Commit ec49ec5

Browse files
committed
samples: reformat append_rows_with_arrow.py
1 parent 501e993 commit ec49ec5

File tree

1 file changed

+12
-18
lines changed

1 file changed

+12
-18
lines changed

packages/google-cloud-bigquery-storage/samples/pyarrow/append_rows_with_arrow.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -176,50 +176,44 @@ def _create_request(batches):
176176
)
177177
return request
178178

179-
# 1. use pyarrow_table.to_batches() to get batches as a stack.
180-
batches_as_stack = list(pyarrow_table.to_batches())
181-
batches_as_stack.reverse()
179+
batches = pyarrow_table.to_batches()
182180

183-
# current_size is initially 0
184-
# current_batches is initilly empty list
185181
current_batches = []
186182
current_size = 0
187183

188-
# 2. repeat below until stack is empty:
189-
while batches_as_stack:
190-
batch = batches_as_stack.pop()
184+
while batches:
185+
batch = batches.pop()
191186
batch_size = batch.nbytes
192187

193188
if current_size + batch_size > max_request_bytes:
194189
if batch.num_rows > 1:
195-
# split the batch into 2 sub batches with identical chunksizes
190+
# Split the batch into 2 sub batches with identical chunksizes
196191
mid = batch.num_rows // 2
197192
batch_left = batch.slice(offset=0, length=mid)
198193
batch_right = batch.slice(offset=mid)
199194

200-
# append the new batches into the stack.
201-
batches_as_stack.append(batch_right)
202-
batches_as_stack.append(batch_left)
203-
# Repeat the poping
195+
# Append the new batches into the stack and continue poping.
196+
batches.append(batch_right)
197+
batches.append(batch_left)
204198
continue
205199

206-
# if the batch is single row and still larger than max_request_size
200+
# If the batch is single row and still larger than max_request_size
207201
else:
208-
# if current batches is empty, throw error
202+
# If current batches is empty, throw error
209203
if len(current_batches) == 0:
210204
raise ValueError(
211205
f"A single PyArrow batch of one row is larger than the maximum request size "
212206
f"(batch size: {batch_size} > max request size: {max_request_bytes}). Cannot proceed."
213207
)
214-
# otherwise, generate the request, reset current_size and current_batches
208+
# Otherwise, generate the request, reset current_size and current_batches
215209
else:
216210
yield _create_request(current_batches)
217211

218212
current_batches = []
219213
current_size = 0
220-
batches_as_stack.append(batch)
214+
batches.append(batch)
221215

222-
# otherwise, add the batch into current_batches
216+
# Otherwise, add the batch into current_batches
223217
else:
224218
current_batches.append(batch)
225219
current_size += batch_size

0 commit comments

Comments
 (0)