@@ -402,17 +402,14 @@ def standard_to_legacy(table):
402402 _ , iterator = self .raw_execute (sql , use_legacy_sql = True )
403403 return self .get_table_from_response (iterator )
404404
405- def copy_bq_table (self , source , destination , write_disposition ) -> None :
405+ def copy_bq_table (self , source , destination , write_disposition , partition_ids = None ) -> None :
406406 conn = self .get_thread_connection ()
407407 client : Client = conn .handle
408408
409409 # -------------------------------------------------------------------------------
410- # BigQuery allows to use copy API using two different formats:
411- # 1. client.copy_table(source_table_id, destination_table_id)
412- # where source_table_id = "your-project.source_dataset.source_table"
413- # 2. client.copy_table(source_table_ids, destination_table_id)
414- # where source_table_ids = ["your-project.your_dataset.your_table_name", ...]
415- # Let's use uniform function call and always pass list there
410+ # BigQuery allows to use copy API on the same table in parallel
411+ # so each source (and if partition of each source if given) is copied
412+ # into the destination table in parallel.
416413 # -------------------------------------------------------------------------------
417414 if type (source ) is not list :
418415 source = [source ]
@@ -436,14 +433,32 @@ def copy_bq_table(self, source, destination, write_disposition) -> None:
436433 ", " .join (source_ref .path for source_ref in source_ref_array ),
437434 destination_ref .path ,
438435 )
436+
439437 with self .exception_handler (msg ):
440- copy_job = client .copy_table (
441- source_ref_array ,
442- destination_ref ,
443- job_config = CopyJobConfig (write_disposition = write_disposition ),
444- retry = self ._retry .create_reopen_with_deadline (conn ),
445- )
446- copy_job .result (timeout = self ._retry .create_job_execution_timeout (fallback = 300 ))
438+
439+ copy_jobs = []
440+
441+ # Runs all the copy jobs in parallel
442+ for source_ref in source_ref_array :
443+
444+ for partition_id in partition_ids or [None ]:
445+ source_ref_partition = (
446+ f"{ source_ref } ${ partition_id } " if partition_id else source_ref
447+ )
448+ destination_ref_partition = (
449+ f"{ destination_ref } ${ partition_id } " if partition_id else destination_ref
450+ )
451+ copy_job = client .copy_table (
452+ source_ref_partition ,
453+ destination_ref_partition ,
454+ job_config = CopyJobConfig (write_disposition = write_disposition ),
455+ retry = self ._retry .create_reopen_with_deadline (conn ),
456+ )
457+ copy_jobs .append (copy_job )
458+
459+ # Waits for the jobs to finish
460+ for copy_job in copy_jobs :
461+ copy_job .result (timeout = self ._retry .create_job_execution_timeout (fallback = 300 ))
447462
448463 def write_dataframe_to_table (
449464 self ,
0 commit comments