@@ -238,11 +238,6 @@ void BigqueryProtoWriter::WriteChunk(DataChunk &chunk, const std::map<std::strin
238238 throw BinderException (" Cannot get message prototype from message descriptor" );
239239 }
240240
241- // Create the append request
242- google::cloud::bigquery::storage::v1::AppendRowsRequest request;
243- request.set_write_stream (write_stream.name ());
244- msg_descriptor->CopyTo (request.mutable_proto_rows ()->mutable_writer_schema ()->mutable_proto_descriptor ());
245-
246241 vector<idx_t > column_indexes;
247242 if (column_idxs.empty ()) {
248243 column_indexes.resize (chunk.ColumnCount ());
@@ -255,7 +250,18 @@ void BigqueryProtoWriter::WriteChunk(DataChunk &chunk, const std::map<std::strin
255250 }
256251 }
257252
253+ auto create_request = [this ]() {
254+ google::cloud::bigquery::storage::v1::AppendRowsRequest new_request;
255+ new_request.set_write_stream (write_stream.name ());
256+ msg_descriptor->CopyTo (new_request.mutable_proto_rows ()->mutable_writer_schema ()->mutable_proto_descriptor ());
257+ return new_request;
258+ };
259+
260+ auto request = create_request ();
258261 auto *rows = request.mutable_proto_rows ()->mutable_rows ();
262+ idx_t rows_in_batch = 0 ;
263+ size_t current_request_bytes = request.ByteSizeLong ();
264+
259265 for (idx_t i = 0 ; i < chunk.size (); i++) {
260266 google::protobuf::Message *msg = msg_prototype->New ();
261267 const google::protobuf::Reflection *reflection = msg->GetReflection ();
@@ -290,10 +296,40 @@ void BigqueryProtoWriter::WriteChunk(DataChunk &chunk, const std::map<std::strin
290296 if (!msg->SerializeToString (&serialized_msg)) {
291297 throw std::runtime_error (" Failed to serialize message" );
292298 }
299+ auto estimated_size_increase = serialized_msg.size () + APPEND_ROWS_ROW_OVERHEAD;
300+
301+ if (rows_in_batch > 0 && current_request_bytes + estimated_size_increase > DEFAULT_APPEND_ROWS_SOFT_LIMIT) {
302+ SendAppendRequest (request);
303+ request = create_request ();
304+ rows = request.mutable_proto_rows ()->mutable_rows ();
305+ rows_in_batch = 0 ;
306+ current_request_bytes = request.ByteSizeLong ();
307+ }
308+
293309 rows->add_serialized_rows (serialized_msg);
310+ rows_in_batch++;
311+ current_request_bytes += estimated_size_increase;
312+
313+ if (current_request_bytes >= DEFAULT_APPEND_ROWS_SOFT_LIMIT) {
314+ SendAppendRequest (request);
315+ request = create_request ();
316+ rows = request.mutable_proto_rows ()->mutable_rows ();
317+ rows_in_batch = 0 ;
318+ current_request_bytes = request.ByteSizeLong ();
319+ }
294320 delete msg;
295321 }
296322
323+ if (rows_in_batch > 0 ) {
324+ SendAppendRequest (request);
325+ }
326+ }
327+
328+ void BigqueryProtoWriter::SendAppendRequest (const google::cloud::bigquery::storage::v1::AppendRowsRequest &request) {
329+ if (!request.has_proto_rows () || request.proto_rows ().rows ().serialized_rows_size () == 0 ) {
330+ return ;
331+ }
332+
297333 int max_retries = 100 ;
298334 for (int attempt = 0 ; attempt < max_retries; attempt++) {
299335 auto handle_broken_stream = [this ](char const *where) {
@@ -322,7 +358,6 @@ void BigqueryProtoWriter::WriteChunk(DataChunk &chunk, const std::map<std::strin
322358 }
323359 }
324360
325- // GET THE RESPONSE AND ERROR HANDLING
326361 auto response = grpc_stream->Read ().get ();
327362 if (!response) {
328363 if (attempt < max_retries - 1 ) {
0 commit comments