35
35
predicates where it is safe to generate a new query ID.
36
36
"""
37
37
38
+ from __future__ import annotations
39
+
38
40
import copy
41
+ import dataclasses
39
42
import functools
40
43
import uuid
41
44
import textwrap
42
- from typing import Any , Dict , Optional , TYPE_CHECKING , Union
45
+ from typing import Any , Callable , Dict , Optional , TYPE_CHECKING , Union
43
46
import warnings
44
47
45
48
import google .api_core .exceptions as core_exceptions
46
49
from google .api_core import retry as retries
47
50
48
51
from google .cloud .bigquery import job
52
+ import google .cloud .bigquery .job .query
49
53
import google .cloud .bigquery .query
50
54
from google .cloud .bigquery import table
51
55
import google .cloud .bigquery .retry
@@ -116,14 +120,20 @@ def query_jobs_insert(
116
120
retry : Optional [retries .Retry ],
117
121
timeout : Optional [float ],
118
122
job_retry : Optional [retries .Retry ],
123
+ callback : Callable ,
119
124
) -> job .QueryJob :
120
125
"""Initiate a query using jobs.insert.
121
126
122
127
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
128
+
129
+ Args:
130
+ callback (Callable):
131
+ A callback function used by bigframes to report query progress.
123
132
"""
124
133
job_id_given = job_id is not None
125
134
job_id_save = job_id
126
135
job_config_save = job_config
136
+ query_sent_factory = QuerySentEventFactory ()
127
137
128
138
def do_query ():
129
139
# Make a copy now, so that original doesn't get changed by the process
@@ -136,6 +146,15 @@ def do_query():
136
146
137
147
try :
138
148
query_job ._begin (retry = retry , timeout = timeout )
149
+ callback (
150
+ query_sent_factory (
151
+ query = query ,
152
+ billing_project = query_job .project ,
153
+ location = query_job .location ,
154
+ job_id = query_job .job_id ,
155
+ request_id = None ,
156
+ )
157
+ )
139
158
except core_exceptions .Conflict as create_exc :
140
159
# The thought is if someone is providing their own job IDs and they get
141
160
# their job ID generation wrong, this could end up returning results for
@@ -396,6 +415,7 @@ def query_and_wait(
396
415
job_retry : Optional [retries .Retry ],
397
416
page_size : Optional [int ] = None ,
398
417
max_results : Optional [int ] = None ,
418
+ callback : Callable = lambda _ : None ,
399
419
) -> table .RowIterator :
400
420
"""Run the query, wait for it to finish, and return the results.
401
421
@@ -415,9 +435,8 @@ def query_and_wait(
415
435
location (Optional[str]):
416
436
Location where to run the job. Must match the location of the
417
437
table used in the query as well as the destination table.
418
- project (Optional[str]):
419
- Project ID of the project of where to run the job. Defaults
420
- to the client's project.
438
+ project (str):
439
+ Project ID of the project of where to run the job.
421
440
api_timeout (Optional[float]):
422
441
The number of seconds to wait for the underlying HTTP transport
423
442
before using ``retry``.
@@ -441,6 +460,8 @@ def query_and_wait(
441
460
request. Non-positive values are ignored.
442
461
max_results (Optional[int]):
443
462
The maximum total number of rows from this request.
463
+ callback (Callable):
464
+ A callback function used by bigframes to report query progress.
444
465
445
466
Returns:
446
467
google.cloud.bigquery.table.RowIterator:
@@ -479,12 +500,14 @@ def query_and_wait(
479
500
retry = retry ,
480
501
timeout = api_timeout ,
481
502
job_retry = job_retry ,
503
+ callback = callback ,
482
504
),
483
505
api_timeout = api_timeout ,
484
506
wait_timeout = wait_timeout ,
485
507
retry = retry ,
486
508
page_size = page_size ,
487
509
max_results = max_results ,
510
+ callback = callback ,
488
511
)
489
512
490
513
path = _to_query_path (project )
@@ -496,10 +519,23 @@ def query_and_wait(
496
519
if client .default_job_creation_mode :
497
520
request_body ["jobCreationMode" ] = client .default_job_creation_mode
498
521
522
+ query_sent_factory = QuerySentEventFactory ()
523
+
499
524
def do_query ():
500
- request_body ["requestId" ] = make_job_id ()
525
+ request_id = make_job_id ()
526
+ request_body ["requestId" ] = request_id
501
527
span_attributes = {"path" : path }
502
528
529
+ callback (
530
+ query_sent_factory (
531
+ query = query ,
532
+ billing_project = project ,
533
+ location = location ,
534
+ job_id = None ,
535
+ request_id = request_id ,
536
+ )
537
+ )
538
+
503
539
# For easier testing, handle the retries ourselves.
504
540
if retry is not None :
505
541
response = retry (client ._call_api )(
@@ -542,8 +578,21 @@ def do_query():
542
578
retry = retry ,
543
579
page_size = page_size ,
544
580
max_results = max_results ,
581
+ callback = callback ,
545
582
)
546
583
584
+ callback (
585
+ QueryFinishedEvent (
586
+ billing_project = project ,
587
+ location = query_results .location ,
588
+ query_id = query_results .query_id ,
589
+ job_id = query_results .job_id ,
590
+ total_rows = query_results .total_rows ,
591
+ total_bytes_processed = query_results .total_bytes_processed ,
592
+ slot_millis = query_results .slot_millis ,
593
+ destination = None ,
594
+ )
595
+ )
547
596
return table .RowIterator (
548
597
client = client ,
549
598
api_request = functools .partial (client ._call_api , retry , timeout = api_timeout ),
@@ -611,19 +660,43 @@ def _wait_or_cancel(
611
660
retry : Optional [retries .Retry ],
612
661
page_size : Optional [int ],
613
662
max_results : Optional [int ],
663
+ callback : Callable ,
614
664
) -> table .RowIterator :
615
665
"""Wait for a job to complete and return the results.
616
666
617
667
If we can't return the results within the ``wait_timeout``, try to cancel
618
668
the job.
619
669
"""
620
670
try :
621
- return job .result (
671
+ callback (
672
+ QueryReceivedEvent (
673
+ billing_project = job .project ,
674
+ location = job .location ,
675
+ job_id = job .job_id ,
676
+ statement_type = job .statement_type ,
677
+ state = job .state ,
678
+ query_plan = job .query_plan ,
679
+ )
680
+ )
681
+ query_results = job .result (
622
682
page_size = page_size ,
623
683
max_results = max_results ,
624
684
retry = retry ,
625
685
timeout = wait_timeout ,
626
686
)
687
+ callback (
688
+ QueryFinishedEvent (
689
+ billing_project = job .project ,
690
+ location = query_results .location ,
691
+ query_id = query_results .query_id ,
692
+ job_id = query_results .job_id ,
693
+ total_rows = query_results .total_rows ,
694
+ total_bytes_processed = query_results .total_bytes_processed ,
695
+ slot_millis = query_results .slot_millis ,
696
+ destination = job .destination ,
697
+ )
698
+ )
699
+ return query_results
627
700
except Exception :
628
701
# Attempt to cancel the job since we can't return the results.
629
702
try :
@@ -632,3 +705,56 @@ def _wait_or_cancel(
632
705
# Don't eat the original exception if cancel fails.
633
706
pass
634
707
raise
708
+
709
+
710
+ @dataclasses .dataclass (frozen = True )
711
+ class QueryFinishedEvent :
712
+ """Query finished successfully."""
713
+
714
+ billing_project : Optional [str ]
715
+ location : Optional [str ]
716
+ query_id : Optional [str ]
717
+ job_id : Optional [str ]
718
+ destination : Optional [table .TableReference ]
719
+ total_rows : Optional [int ]
720
+ total_bytes_processed : Optional [int ]
721
+ slot_millis : Optional [int ]
722
+
723
+
724
+ @dataclasses .dataclass (frozen = True )
725
+ class QueryReceivedEvent :
726
+ """Query received and acknowledged by the BigQuery API."""
727
+
728
+ billing_project : Optional [str ]
729
+ location : Optional [str ]
730
+ job_id : Optional [str ]
731
+ statement_type : Optional [str ]
732
+ state : Optional [str ]
733
+ query_plan : Optional [list [google .cloud .bigquery .job .query .QueryPlanEntry ]]
734
+
735
+
736
+ @dataclasses .dataclass (frozen = True )
737
+ class QuerySentEvent :
738
+ """Query sent to BigQuery."""
739
+
740
+ query : str
741
+ billing_project : Optional [str ]
742
+ location : Optional [str ]
743
+ job_id : Optional [str ]
744
+ request_id : Optional [str ]
745
+
746
+
747
+ class QueryRetryEvent (QuerySentEvent ):
748
+ """Query sent another time because the previous failed."""
749
+
750
+
751
+ class QuerySentEventFactory :
752
+ """Creates a QuerySentEvent first, then QueryRetryEvent after that."""
753
+
754
+ def __init__ (self ):
755
+ self ._event_constructor = QuerySentEvent
756
+
757
+ def __call__ (self , ** kwargs ):
758
+ result = self ._event_constructor (** kwargs )
759
+ self ._event_constructor = QueryRetryEvent
760
+ return result
0 commit comments