Skip to content

Commit 2239db8

Browse files
committed
Fix: Update verify_result in pyarrow sample
- Changes query to SELECT DISTINCT int64_col to count unique rows. - Asserts the count is exactly TABLE_LENGTH, removing the allowance for extra rows from potential retries.
1 parent 1eb8266 commit 2239db8

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

packages/google-cloud-bigquery-storage/samples/pyarrow/append_rows_with_arrow.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _create_request(batches):
179179
batches_in_request = []
180180
current_size = 0
181181

182-
# Split table into batches of one row.
182+
# Split table into batches with one row.
183183
for row_batch in pyarrow_table.to_batches(max_chunksize=1):
184184
serialized_batch = row_batch.serialize().to_pybytes()
185185
batch_size = len(serialized_batch)
@@ -216,11 +216,10 @@ def verify_result(client, table, futures):
216216
assert bq_table.schema == BQ_SCHEMA
217217

218218
# Verify table size.
219-
query = client.query(f"SELECT COUNT(1) FROM `{bq_table}`;")
219+
query = client.query(f"SELECT DISTINCT int64_col FROM `{bq_table}`;")
220220
query_result = query.result().to_dataframe()
221221

222-
# There might be extra rows due to retries.
223-
assert query_result.iloc[0, 0] >= TABLE_LENGTH
222+
assert query_result.iloc[0, 0] == TABLE_LENGTH
224223

225224
# Verify that table was split into multiple requests.
226225
assert len(futures) == 21

0 commit comments

Comments
 (0)