@@ -45,7 +45,11 @@ def ai_forecast(
45
45
result_sql = self ._sql_generator .ai_forecast (
46
46
source_sql = input_data .sql , options = options
47
47
)
48
- return self ._session .read_gbq (result_sql )
48
+
49
+ # TODO(b/395912450): Once the limitations with local data are
50
+ # resolved, consider setting allow_large_results only when expected
51
+ # data size is large.
52
+ return self ._session .read_gbq_query (result_sql , allow_large_results = True )
49
53
50
54
51
55
class BqmlModel (BaseBqml ):
@@ -95,7 +99,17 @@ def _apply_ml_tvf(
95
99
)
96
100
97
101
result_sql = apply_sql_tvf (input_sql )
98
- df = self ._session .read_gbq (result_sql , index_col = index_col_ids )
102
+ df = self ._session .read_gbq_query (
103
+ result_sql ,
104
+ index_col = index_col_ids ,
105
+ # Many ML methods use nested JSON, which isn't yet compatible with
106
+ # joining local results. Also, there is a chance that the results
107
+ # are greater than 10 GB.
108
+ # TODO(b/395912450): Once the limitations with local data are
109
+ # resolved, consider setting allow_large_results only when expected
110
+ # data size is large.
111
+ allow_large_results = True ,
112
+ )
99
113
if df ._has_index :
100
114
df .index .names = index_labels
101
115
# Restore column labels
@@ -159,7 +173,10 @@ def explain_predict(
159
173
def global_explain (self , options : Mapping [str , bool ]) -> bpd .DataFrame :
160
174
sql = self ._sql_generator .ml_global_explain (struct_options = options )
161
175
return (
162
- self ._session .read_gbq (sql )
176
+ # TODO(b/395912450): Once the limitations with local data are
177
+ # resolved, consider setting allow_large_results only when expected
178
+ # data size is large.
179
+ self ._session .read_gbq_query (sql , allow_large_results = True )
163
180
.sort_values (by = "attribution" , ascending = False )
164
181
.set_index ("feature" )
165
182
)
@@ -234,26 +251,49 @@ def forecast(self, options: Mapping[str, int | float]) -> bpd.DataFrame:
234
251
sql = self ._sql_generator .ml_forecast (struct_options = options )
235
252
timestamp_col_name = "forecast_timestamp"
236
253
index_cols = [timestamp_col_name ]
237
- first_col_name = self ._session .read_gbq (sql ).columns .values [0 ]
254
+ # TODO(b/395912450): Once the limitations with local data are
255
+ # resolved, consider setting allow_large_results only when expected
256
+ # data size is large.
257
+ first_col_name = self ._session .read_gbq_query (
258
+ sql , allow_large_results = True
259
+ ).columns .values [0 ]
238
260
if timestamp_col_name != first_col_name :
239
261
index_cols .append (first_col_name )
240
- return self ._session .read_gbq (sql , index_col = index_cols ).reset_index ()
262
+ # TODO(b/395912450): Once the limitations with local data are
263
+ # resolved, consider setting allow_large_results only when expected
264
+ # data size is large.
265
+ return self ._session .read_gbq_query (
266
+ sql , index_col = index_cols , allow_large_results = True
267
+ ).reset_index ()
241
268
242
269
def explain_forecast (self , options : Mapping [str , int | float ]) -> bpd .DataFrame :
243
270
sql = self ._sql_generator .ml_explain_forecast (struct_options = options )
244
271
timestamp_col_name = "time_series_timestamp"
245
272
index_cols = [timestamp_col_name ]
246
- first_col_name = self ._session .read_gbq (sql ).columns .values [0 ]
273
+ # TODO(b/395912450): Once the limitations with local data are
274
+ # resolved, consider setting allow_large_results only when expected
275
+ # data size is large.
276
+ first_col_name = self ._session .read_gbq_query (
277
+ sql , allow_large_results = True
278
+ ).columns .values [0 ]
247
279
if timestamp_col_name != first_col_name :
248
280
index_cols .append (first_col_name )
249
- return self ._session .read_gbq (sql , index_col = index_cols ).reset_index ()
281
+ # TODO(b/395912450): Once the limitations with local data are
282
+ # resolved, consider setting allow_large_results only when expected
283
+ # data size is large.
284
+ return self ._session .read_gbq_query (
285
+ sql , index_col = index_cols , allow_large_results = True
286
+ ).reset_index ()
250
287
251
288
def evaluate (self , input_data : Optional [bpd .DataFrame ] = None ):
252
289
sql = self ._sql_generator .ml_evaluate (
253
290
input_data .sql if (input_data is not None ) else None
254
291
)
255
292
256
- return self ._session .read_gbq (sql )
293
+ # TODO(b/395912450): Once the limitations with local data are
294
+ # resolved, consider setting allow_large_results only when expected
295
+ # data size is large.
296
+ return self ._session .read_gbq_query (sql , allow_large_results = True )
257
297
258
298
def llm_evaluate (
259
299
self ,
@@ -262,42 +302,62 @@ def llm_evaluate(
262
302
):
263
303
sql = self ._sql_generator .ml_llm_evaluate (input_data .sql , task_type )
264
304
265
- return self ._session .read_gbq (sql )
305
+ # TODO(b/395912450): Once the limitations with local data are
306
+ # resolved, consider setting allow_large_results only when expected
307
+ # data size is large.
308
+ return self ._session .read_gbq_query (sql , allow_large_results = True )
266
309
267
310
def arima_evaluate (self , show_all_candidate_models : bool = False ):
268
311
sql = self ._sql_generator .ml_arima_evaluate (show_all_candidate_models )
269
312
270
- return self ._session .read_gbq (sql )
313
+ # TODO(b/395912450): Once the limitations with local data are
314
+ # resolved, consider setting allow_large_results only when expected
315
+ # data size is large.
316
+ return self ._session .read_gbq_query (sql , allow_large_results = True )
271
317
272
318
def arima_coefficients (self ) -> bpd .DataFrame :
273
319
sql = self ._sql_generator .ml_arima_coefficients ()
274
320
275
- return self ._session .read_gbq (sql )
321
+ # TODO(b/395912450): Once the limitations with local data are
322
+ # resolved, consider setting allow_large_results only when expected
323
+ # data size is large.
324
+ return self ._session .read_gbq_query (sql , allow_large_results = True )
276
325
277
326
def centroids (self ) -> bpd .DataFrame :
278
327
assert self ._model .model_type == "KMEANS"
279
328
280
329
sql = self ._sql_generator .ml_centroids ()
281
330
282
- return self ._session .read_gbq (
283
- sql , index_col = ["centroid_id" , "feature" ]
331
+ # TODO(b/395912450): Once the limitations with local data are
332
+ # resolved, consider setting allow_large_results only when expected
333
+ # data size is large.
334
+ return self ._session .read_gbq_query (
335
+ sql , index_col = ["centroid_id" , "feature" ], allow_large_results = True
284
336
).reset_index ()
285
337
286
338
def principal_components (self ) -> bpd .DataFrame :
287
339
assert self ._model .model_type == "PCA"
288
340
289
341
sql = self ._sql_generator .ml_principal_components ()
290
342
291
- return self ._session .read_gbq (
292
- sql , index_col = ["principal_component_id" , "feature" ]
343
+ # TODO(b/395912450): Once the limitations with local data are
344
+ # resolved, consider setting allow_large_results only when expected
345
+ # data size is large.
346
+ return self ._session .read_gbq_query (
347
+ sql ,
348
+ index_col = ["principal_component_id" , "feature" ],
349
+ allow_large_results = True ,
293
350
).reset_index ()
294
351
295
352
def principal_component_info (self ) -> bpd .DataFrame :
296
353
assert self ._model .model_type == "PCA"
297
354
298
355
sql = self ._sql_generator .ml_principal_component_info ()
299
356
300
- return self ._session .read_gbq (sql )
357
+ # TODO(b/395912450): Once the limitations with local data are
358
+ # resolved, consider setting allow_large_results only when expected
359
+ # data size is large.
360
+ return self ._session .read_gbq_query (sql , allow_large_results = True )
301
361
302
362
def copy (self , new_model_name : str , replace : bool = False ) -> BqmlModel :
303
363
job_config = self ._session ._prepare_copy_job_config ()
0 commit comments