45
45
import bigframes .core .identifiers
46
46
import bigframes .core .nodes as nodes
47
47
import bigframes .core .ordering as order
48
- import bigframes .core .rewrite as rewrites
49
48
import bigframes .core .schema
50
49
import bigframes .core .tree_properties as tree_properties
51
50
import bigframes .features
@@ -128,7 +127,7 @@ def to_sql(
128
127
col_id_overrides = dict (col_id_overrides )
129
128
col_id_overrides [internal_offset_col ] = offset_column
130
129
node = (
131
- self ._get_optimized_plan (array_value .node )
130
+ self ._sub_cache_subtrees (array_value .node )
132
131
if enable_cache
133
132
else array_value .node
134
133
)
@@ -279,7 +278,7 @@ def peek(
279
278
"""
280
279
A 'peek' efficiently accesses a small number of rows in the dataframe.
281
280
"""
282
- plan = self ._get_optimized_plan (array_value .node )
281
+ plan = self ._sub_cache_subtrees (array_value .node )
283
282
if not tree_properties .can_fast_peek (plan ):
284
283
warnings .warn ("Peeking this value cannot be done efficiently." )
285
284
@@ -314,15 +313,15 @@ def head(
314
313
# No user-provided ordering, so just get any N rows, its faster!
315
314
return self .peek (array_value , n_rows )
316
315
317
- plan = self ._get_optimized_plan (array_value .node )
316
+ plan = self ._sub_cache_subtrees (array_value .node )
318
317
if not tree_properties .can_fast_head (plan ):
319
318
# If can't get head fast, we are going to need to execute the whole query
320
319
# Will want to do this in a way such that the result is reusable, but the first
321
320
# N values can be easily extracted.
322
321
# This currently requires clustering on offsets.
323
322
self ._cache_with_offsets (array_value )
324
323
# Get a new optimized plan after caching
325
- plan = self ._get_optimized_plan (array_value .node )
324
+ plan = self ._sub_cache_subtrees (array_value .node )
326
325
assert tree_properties .can_fast_head (plan )
327
326
328
327
head_plan = generate_head_plan (plan , n_rows )
@@ -347,7 +346,7 @@ def get_row_count(self, array_value: bigframes.core.ArrayValue) -> int:
347
346
if count is not None :
348
347
return count
349
348
else :
350
- row_count_plan = self ._get_optimized_plan (
349
+ row_count_plan = self ._sub_cache_subtrees (
351
350
generate_row_count_plan (array_value .node )
352
351
)
353
352
sql = self .compiler .compile_unordered (row_count_plan )
@@ -359,7 +358,7 @@ def _local_get_row_count(
359
358
) -> Optional [int ]:
360
359
# optimized plan has cache materializations which will have row count metadata
361
360
# that is more likely to be usable than original leaf nodes.
362
- plan = self ._get_optimized_plan (array_value .node )
361
+ plan = self ._sub_cache_subtrees (array_value .node )
363
362
return tree_properties .row_count (plan )
364
363
365
364
# Helpers
@@ -424,21 +423,14 @@ def _wait_on_job(
424
423
self .metrics .count_job_stats (query_job )
425
424
return results_iterator
426
425
427
- def _get_optimized_plan (self , node : nodes .BigFrameNode ) -> nodes .BigFrameNode :
426
+ def _sub_cache_subtrees (self , node : nodes .BigFrameNode ) -> nodes .BigFrameNode :
428
427
"""
429
428
Takes the original expression tree and applies optimizations to accelerate execution.
430
429
431
430
At present, the only optimization is to replace subtress with cached previous materializations.
432
431
"""
433
432
# Apply any rewrites *after* applying cache, as cache is sensitive to exact tree structure
434
- optimized_plan = tree_properties .replace_nodes (
435
- node , (dict (self ._cached_executions ))
436
- )
437
- if ENABLE_PRUNING :
438
- used_fields = frozenset (field .id for field in optimized_plan .fields )
439
- optimized_plan = optimized_plan .prune (used_fields )
440
- optimized_plan = rewrites .replace_slice_ops (optimized_plan )
441
- return optimized_plan
433
+ return tree_properties .replace_nodes (node , (dict (self ._cached_executions )))
442
434
443
435
def _is_trivially_executable (self , array_value : bigframes .core .ArrayValue ):
444
436
"""
@@ -448,7 +440,7 @@ def _is_trivially_executable(self, array_value: bigframes.core.ArrayValue):
448
440
# Once rewriting is available, will want to rewrite before
449
441
# evaluating execution cost.
450
442
return tree_properties .is_trivially_executable (
451
- self ._get_optimized_plan (array_value .node )
443
+ self ._sub_cache_subtrees (array_value .node )
452
444
)
453
445
454
446
def _cache_with_cluster_cols (
@@ -457,7 +449,7 @@ def _cache_with_cluster_cols(
457
449
"""Executes the query and uses the resulting table to rewrite future executions."""
458
450
459
451
sql , schema , ordering_info = self .compiler .compile_raw (
460
- self ._get_optimized_plan (array_value .node )
452
+ self ._sub_cache_subtrees (array_value .node )
461
453
)
462
454
tmp_table = self ._sql_as_cached_temp_table (
463
455
sql ,
@@ -474,7 +466,7 @@ def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
474
466
"""Executes the query and uses the resulting table to rewrite future executions."""
475
467
offset_column = bigframes .core .guid .generate_guid ("bigframes_offsets" )
476
468
w_offsets , offset_column = array_value .promote_offsets ()
477
- sql = self .compiler .compile_unordered (self ._get_optimized_plan (w_offsets .node ))
469
+ sql = self .compiler .compile_unordered (self ._sub_cache_subtrees (w_offsets .node ))
478
470
479
471
tmp_table = self ._sql_as_cached_temp_table (
480
472
sql ,
@@ -510,7 +502,7 @@ def _simplify_with_caching(self, array_value: bigframes.core.ArrayValue):
510
502
"""Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
511
503
# Apply existing caching first
512
504
for _ in range (MAX_SUBTREE_FACTORINGS ):
513
- node_with_cache = self ._get_optimized_plan (array_value .node )
505
+ node_with_cache = self ._sub_cache_subtrees (array_value .node )
514
506
if node_with_cache .planning_complexity < QUERY_COMPLEXITY_LIMIT :
515
507
return
516
508
@@ -567,7 +559,7 @@ def _validate_result_schema(
567
559
):
568
560
actual_schema = tuple (bq_schema )
569
561
ibis_schema = bigframes .core .compile .test_only_ibis_inferred_schema (
570
- self ._get_optimized_plan (array_value .node )
562
+ self ._sub_cache_subtrees (array_value .node )
571
563
)
572
564
internal_schema = array_value .schema
573
565
if not bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable :
0 commit comments