@@ -54,26 +54,43 @@ def get_table_metadata(
54
54
55
55
cached_table = cache .get (table_ref )
56
56
if use_cache and cached_table is not None :
57
- snapshot_timestamp , _ = cached_table
58
-
59
- # Cache hit could be unexpected. See internal issue 329545805.
60
- # Raise a warning with more information about how to avoid the
61
- # problems with the cache.
62
- msg = bfe .format_message (
63
- f"Reading cached table from { snapshot_timestamp } to avoid "
64
- "incompatibilies with previous reads of this table. To read "
65
- "the latest version, set `use_cache=False` or close the "
66
- "current session with Session.close() or "
67
- "bigframes.pandas.close_session()."
68
- )
69
- # There are many layers before we get to (possibly) the user's code:
70
- # pandas.read_gbq_table
71
- # -> with_default_session
72
- # -> Session.read_gbq_table
73
- # -> _read_gbq_table
74
- # -> _get_snapshot_sql_and_primary_key
75
- # -> get_snapshot_datetime_and_table_metadata
76
- warnings .warn (msg , stacklevel = 7 )
57
+ snapshot_timestamp , table = cached_table
58
+
59
+ if is_time_travel_eligible (
60
+ bqclient = bqclient ,
61
+ table = table ,
62
+ columns = None ,
63
+ snapshot_time = snapshot_timestamp ,
64
+ filter_str = None ,
65
+ # Don't warn, because that will already have been taken care of.
66
+ should_warn = False ,
67
+ should_dry_run = False ,
68
+ ):
69
+ # This warning should only happen if the cached snapshot_time will
70
+ # have any effect on bigframes (b/437090788). For example, with
71
+ # cached query results, such as after re-running a query, time
72
+ # travel won't be applied and thus this check is irrelevent.
73
+ #
74
+ # In other cases, such as an explicit read_gbq_table(), Cache hit
75
+ # could be unexpected. See internal issue 329545805. Raise a
76
+ # warning with more information about how to avoid the problems
77
+ # with the cache.
78
+ msg = bfe .format_message (
79
+ f"Reading cached table from { snapshot_timestamp } to avoid "
80
+ "incompatibilies with previous reads of this table. To read "
81
+ "the latest version, set `use_cache=False` or close the "
82
+ "current session with Session.close() or "
83
+ "bigframes.pandas.close_session()."
84
+ )
85
+ # There are many layers before we get to (possibly) the user's code:
86
+ # pandas.read_gbq_table
87
+ # -> with_default_session
88
+ # -> Session.read_gbq_table
89
+ # -> _read_gbq_table
90
+ # -> _get_snapshot_sql_and_primary_key
91
+ # -> get_snapshot_datetime_and_table_metadata
92
+ warnings .warn (msg , category = bfe .TimeTravelCacheWarning , stacklevel = 7 )
93
+
77
94
return cached_table
78
95
79
96
table = bqclient .get_table (table_ref )
@@ -88,77 +105,114 @@ def get_table_metadata(
88
105
return cached_table
89
106
90
107
91
- def validate_table (
108
+ def is_time_travel_eligible (
92
109
bqclient : bigquery .Client ,
93
110
table : bigquery .table .Table ,
94
111
columns : Optional [Sequence [str ]],
95
112
snapshot_time : datetime .datetime ,
96
113
filter_str : Optional [str ] = None ,
97
- ) -> bool :
98
- """Validates that the table can be read, returns True iff snapshot is supported."""
114
+ * ,
115
+ should_warn : bool ,
116
+ should_dry_run : bool ,
117
+ ):
118
+ """Check if a table is eligible to use time-travel.
119
+
120
+
121
+ Args:
122
+ table: BigQuery table to check.
123
+ should_warn:
124
+ If true, raises a warning when time travel is disabled and the
125
+ underlying table is likely mutable.
126
+
127
+ Return:
128
+ bool:
129
+ True if there is a chance that time travel may be supported on this
130
+ table. If ``should_dry_run`` is True, then this is validated with a
131
+ ``dry_run`` query.
132
+ """
133
+
134
+ # user code
135
+ # -> pandas.read_gbq_table
136
+ # -> with_default_session
137
+ # -> session.read_gbq_table
138
+ # -> session._read_gbq_table
139
+ # -> loader.read_gbq_table
140
+ # -> is_time_travel_eligible
141
+ stacklevel = 7
99
142
100
- time_travel_not_found = False
101
143
# Anonymous dataset, does not support snapshot ever
102
144
if table .dataset_id .startswith ("_" ):
103
- pass
145
+ return False
104
146
105
147
# Only true tables support time travel
106
- elif table .table_id .endswith ("*" ):
107
- msg = bfe .format_message (
108
- "Wildcard tables do not support FOR SYSTEM_TIME AS OF queries. "
109
- "Attempting query without time travel. Be aware that "
110
- "modifications to the underlying data may result in errors or "
111
- "unexpected behavior."
112
- )
113
- warnings .warn (msg , category = bfe .TimeTravelDisabledWarning )
114
- elif table .table_type != "TABLE" :
115
- if table .table_type == "MATERIALIZED_VIEW" :
148
+ if table .table_id .endswith ("*" ):
149
+ if should_warn :
116
150
msg = bfe .format_message (
117
- "Materialized views do not support FOR SYSTEM_TIME AS OF queries. "
118
- "Attempting query without time travel. Be aware that as materialized views "
119
- "are updated periodically, modifications to the underlying data in the view may "
120
- "result in errors or unexpected behavior."
151
+ "Wildcard tables do not support FOR SYSTEM_TIME AS OF queries. "
152
+ "Attempting query without time travel. Be aware that "
153
+ "modifications to the underlying data may result in errors or "
154
+ "unexpected behavior."
121
155
)
122
- warnings .warn (msg , category = bfe .TimeTravelDisabledWarning )
123
- else :
124
- # table might support time travel, lets do a dry-run query with time travel
156
+ warnings .warn (
157
+ msg , category = bfe .TimeTravelDisabledWarning , stacklevel = stacklevel
158
+ )
159
+ return False
160
+ elif table .table_type != "TABLE" :
161
+ if table .table_type == "MATERIALIZED_VIEW" :
162
+ if should_warn :
163
+ msg = bfe .format_message (
164
+ "Materialized views do not support FOR SYSTEM_TIME AS OF queries. "
165
+ "Attempting query without time travel. Be aware that as materialized views "
166
+ "are updated periodically, modifications to the underlying data in the view may "
167
+ "result in errors or unexpected behavior."
168
+ )
169
+ warnings .warn (
170
+ msg , category = bfe .TimeTravelDisabledWarning , stacklevel = stacklevel
171
+ )
172
+ return False
173
+
174
+ # table might support time travel, lets do a dry-run query with time travel
175
+ if should_dry_run :
125
176
snapshot_sql = bigframes .session ._io .bigquery .to_query (
126
177
query_or_table = f"{ table .reference .project } .{ table .reference .dataset_id } .{ table .reference .table_id } " ,
127
178
columns = columns or (),
128
179
sql_predicate = filter_str ,
129
180
time_travel_timestamp = snapshot_time ,
130
181
)
131
182
try :
132
- # If this succeeds, we don't need to query without time travel, that would surely succeed
133
- bqclient .query_and_wait (
134
- snapshot_sql , job_config = bigquery .QueryJobConfig (dry_run = True )
183
+ # If this succeeds, we know that time travel will for sure work.
184
+ bigframes .session ._io .bigquery .start_query_with_client (
185
+ bq_client = bqclient ,
186
+ sql = snapshot_sql ,
187
+ job_config = bigquery .QueryJobConfig (dry_run = True ),
188
+ location = None ,
189
+ project = None ,
190
+ timeout = None ,
191
+ metrics = None ,
192
+ query_with_job = False ,
135
193
)
136
194
return True
195
+
137
196
except google .api_core .exceptions .NotFound :
138
- # note that a notfound caused by a simple typo will be
139
- # caught above when the metadata is fetched, not here
140
- time_travel_not_found = True
141
-
142
- # At this point, time travel is known to fail, but can we query without time travel?
143
- snapshot_sql = bigframes .session ._io .bigquery .to_query (
144
- query_or_table = f"{ table .reference .project } .{ table .reference .dataset_id } .{ table .reference .table_id } " ,
145
- columns = columns or (),
146
- sql_predicate = filter_str ,
147
- time_travel_timestamp = None ,
148
- )
149
- # Any errors here should just be raised to user
150
- bqclient .query_and_wait (
151
- snapshot_sql , job_config = bigquery .QueryJobConfig (dry_run = True )
152
- )
153
- if time_travel_not_found :
154
- msg = bfe .format_message (
155
- "NotFound error when reading table with time travel."
156
- " Attempting query without time travel. Warning: Without"
157
- " time travel, modifications to the underlying table may"
158
- " result in errors or unexpected behavior."
159
- )
160
- warnings .warn (msg , category = bfe .TimeTravelDisabledWarning )
161
- return False
197
+ # If system time isn't supported, it returns NotFound error?
198
+ # Note that a notfound caused by a simple typo will be
199
+ # caught above when the metadata is fetched, not here.
200
+ if should_warn :
201
+ msg = bfe .format_message (
202
+ "NotFound error when reading table with time travel."
203
+ " Attempting query without time travel. Warning: Without"
204
+ " time travel, modifications to the underlying table may"
205
+ " result in errors or unexpected behavior."
206
+ )
207
+ warnings .warn (
208
+ msg , category = bfe .TimeTravelDisabledWarning , stacklevel = stacklevel
209
+ )
210
+
211
+ # If we make it to here, we know for sure that time travel won't work.
212
+ return False
213
+ else :
214
+ # We haven't validated it, but there's a chance that time travel could work.
215
+ return True
162
216
163
217
164
218
def infer_unique_columns (
0 commit comments