@@ -155,7 +155,13 @@ def __init__(
155155 self ._transpose_cache : Optional [Block ] = transpose_cache
156156
157157 @classmethod
158- def from_local (cls , data : pd .DataFrame , session : bigframes .Session ) -> Block :
158+ def from_local (
159+ cls ,
160+ data : pd .DataFrame ,
161+ session : bigframes .Session ,
162+ * ,
163+ cache_transpose : bool = True ,
164+ ) -> Block :
159165 # Assumes caller has already converted datatypes to bigframes ones.
160166 pd_data = data
161167 column_labels = pd_data .columns
@@ -169,12 +175,21 @@ def from_local(cls, data: pd.DataFrame, session: bigframes.Session) -> Block:
169175 pd_data = pd_data .reset_index (names = index_ids )
170176 as_pyarrow = pa .Table .from_pandas (pd_data , preserve_index = False )
171177 array_value = core .ArrayValue .from_pyarrow (as_pyarrow , session = session )
172- return cls (
178+ block = cls (
173179 array_value ,
174180 column_labels = column_labels ,
175181 index_columns = index_ids ,
176182 index_labels = index_labels ,
177183 )
184+ if cache_transpose :
185+ try :
186+ # this cache will help when aligning on axis=1
187+ block = block .with_transpose_cache (
188+ cls .from_local (data .T , session , cache_transpose = False )
189+ )
190+ except Exception :
191+ pass
192+ return block
178193
179194 @property
180195 def index (self ) -> BlockIndexProperties :
@@ -724,12 +739,18 @@ def with_column_labels(
724739 f"The column labels size `{ len (label_list )} ` should equal to the value"
725740 + f"columns size: { len (self .value_columns )} ."
726741 )
727- return Block (
742+ block = Block (
728743 self ._expr ,
729744 index_columns = self .index_columns ,
730745 column_labels = label_list ,
731746 index_labels = self .index .names ,
732747 )
748+ singleton_label = len (list (value )) == 1 and list (value )[0 ]
749+ if singleton_label is not None and self ._transpose_cache is not None :
750+ new_cache , label_id = self ._transpose_cache .create_constant (singleton_label )
751+ new_cache = new_cache .set_index ([label_id ])
752+ block = block .with_transpose_cache (new_cache )
753+ return block
733754
734755 def with_transpose_cache (self , transposed : Block ):
735756 return Block (
@@ -1947,6 +1968,153 @@ def merge(
19471968
19481969 return Block (expr , index_columns = index_columns , column_labels = labels )
19491970
1971+ def _align_both_axes (
1972+ self , other : Block , how : str
1973+ ) -> Tuple [Block , pd .Index , Sequence [Tuple [ex .Expression , ex .Expression ]]]:
1974+ # Join rows
1975+ aligned_block , (get_column_left , get_column_right ) = self .join (other , how = how )
1976+ # join columns schema
1977+ # indexers will be none for exact match
1978+ if self .column_labels .equals (other .column_labels ):
1979+ columns , lcol_indexer , rcol_indexer = self .column_labels , None , None
1980+ else :
1981+ columns , lcol_indexer , rcol_indexer = self .column_labels .join (
1982+ other .column_labels , how = "outer" , return_indexers = True
1983+ )
1984+ lcol_indexer = (
1985+ lcol_indexer if (lcol_indexer is not None ) else range (len (columns ))
1986+ )
1987+ rcol_indexer = (
1988+ rcol_indexer if (rcol_indexer is not None ) else range (len (columns ))
1989+ )
1990+
1991+ left_input_lookup = (
1992+ lambda index : ex .free_var (get_column_left [self .value_columns [index ]])
1993+ if index != - 1
1994+ else ex .const (None )
1995+ )
1996+ righ_input_lookup = (
1997+ lambda index : ex .free_var (get_column_right [other .value_columns [index ]])
1998+ if index != - 1
1999+ else ex .const (None )
2000+ )
2001+
2002+ left_inputs = [left_input_lookup (i ) for i in lcol_indexer ]
2003+ right_inputs = [righ_input_lookup (i ) for i in rcol_indexer ]
2004+ return aligned_block , columns , tuple (zip (left_inputs , right_inputs ))
2005+
2006+ def _align_axis_0 (
2007+ self , other : Block , how : str
2008+ ) -> Tuple [Block , pd .Index , Sequence [Tuple [ex .Expression , ex .Expression ]]]:
2009+ assert len (other .value_columns ) == 1
2010+ aligned_block , (get_column_left , get_column_right ) = self .join (other , how = how )
2011+
2012+ series_column_id = other .value_columns [0 ]
2013+ inputs = tuple (
2014+ (
2015+ ex .free_var (get_column_left [col ]),
2016+ ex .free_var (get_column_right [series_column_id ]),
2017+ )
2018+ for col in self .value_columns
2019+ )
2020+ return aligned_block , self .column_labels , inputs
2021+
2022+ def _align_series_block_axis_1 (
2023+ self , other : Block , how : str
2024+ ) -> Tuple [Block , pd .Index , Sequence [Tuple [ex .Expression , ex .Expression ]]]:
2025+ assert len (other .value_columns ) == 1
2026+ if other ._transpose_cache is None :
2027+ raise ValueError (
2028+ "Wrong align method, this approach requires transpose cache"
2029+ )
2030+
2031+ # Join rows
2032+ aligned_block , (get_column_left , get_column_right ) = join_with_single_row (
2033+ self , other .transpose ()
2034+ )
2035+ # join columns schema
2036+ # indexers will be none for exact match
2037+ if self .column_labels .equals (other .transpose ().column_labels ):
2038+ columns , lcol_indexer , rcol_indexer = self .column_labels , None , None
2039+ else :
2040+ columns , lcol_indexer , rcol_indexer = self .column_labels .join (
2041+ other .transpose ().column_labels , how = how , return_indexers = True
2042+ )
2043+ lcol_indexer = (
2044+ lcol_indexer if (lcol_indexer is not None ) else range (len (columns ))
2045+ )
2046+ rcol_indexer = (
2047+ rcol_indexer if (rcol_indexer is not None ) else range (len (columns ))
2048+ )
2049+
2050+ left_input_lookup = (
2051+ lambda index : ex .free_var (get_column_left [self .value_columns [index ]])
2052+ if index != - 1
2053+ else ex .const (None )
2054+ )
2055+ righ_input_lookup = (
2056+ lambda index : ex .free_var (
2057+ get_column_right [other .transpose ().value_columns [index ]]
2058+ )
2059+ if index != - 1
2060+ else ex .const (None )
2061+ )
2062+
2063+ left_inputs = [left_input_lookup (i ) for i in lcol_indexer ]
2064+ right_inputs = [righ_input_lookup (i ) for i in rcol_indexer ]
2065+ return aligned_block , columns , tuple (zip (left_inputs , right_inputs ))
2066+
2067+ def _align_pd_series_axis_1 (
2068+ self , other : pd .Series , how : str
2069+ ) -> Tuple [Block , pd .Index , Sequence [Tuple [ex .Expression , ex .Expression ]]]:
2070+ if self .column_labels .equals (other .index ):
2071+ columns , lcol_indexer , rcol_indexer = self .column_labels , None , None
2072+ else :
2073+ if not (self .column_labels .is_unique and other .index .is_unique ):
2074+ raise ValueError ("Cannot align non-unique indices" )
2075+ columns , lcol_indexer , rcol_indexer = self .column_labels .join (
2076+ other .index , how = how , return_indexers = True
2077+ )
2078+ lcol_indexer = (
2079+ lcol_indexer if (lcol_indexer is not None ) else range (len (columns ))
2080+ )
2081+ rcol_indexer = (
2082+ rcol_indexer if (rcol_indexer is not None ) else range (len (columns ))
2083+ )
2084+
2085+ left_input_lookup = (
2086+ lambda index : ex .free_var (self .value_columns [index ])
2087+ if index != - 1
2088+ else ex .const (None )
2089+ )
2090+ righ_input_lookup = (
2091+ lambda index : ex .const (other .iloc [index ]) if index != - 1 else ex .const (None )
2092+ )
2093+
2094+ left_inputs = [left_input_lookup (i ) for i in lcol_indexer ]
2095+ right_inputs = [righ_input_lookup (i ) for i in rcol_indexer ]
2096+ return self , columns , tuple (zip (left_inputs , right_inputs ))
2097+
2098+ def _apply_binop (
2099+ self ,
2100+ op : ops .BinaryOp ,
2101+ inputs : Sequence [Tuple [ex .Expression , ex .Expression ]],
2102+ labels : pd .Index ,
2103+ reverse : bool = False ,
2104+ ) -> Block :
2105+ block = self
2106+ binop_result_ids = []
2107+ for left_input , right_input in inputs :
2108+ expr = (
2109+ op .as_expr (right_input , left_input )
2110+ if reverse
2111+ else op .as_expr (left_input , right_input )
2112+ )
2113+ block , result_col_id = block .project_expr (expr )
2114+ binop_result_ids .append (result_col_id )
2115+
2116+ return block .select_columns (binop_result_ids ).with_column_labels (labels )
2117+
19502118 def join (
19512119 self ,
19522120 other : Block ,
@@ -2268,15 +2436,6 @@ def column_ids(self) -> Sequence[str]:
22682436 """Column(s) to use as row labels."""
22692437 return self ._block ._index_columns
22702438
2271- def __repr__ (self ) -> str :
2272- """Converts an Index to a string."""
2273- # TODO(swast): Add a timeout here? If the query is taking a long time,
2274- # maybe we just print the job metadata that we have so far?
2275- # TODO(swast): Avoid downloading the whole index by using job
2276- # metadata, like we do with DataFrame.
2277- preview = self .to_pandas ()
2278- return repr (preview )
2279-
22802439 def to_pandas (self ) -> pd .Index :
22812440 """Executes deferred operations and downloads the results."""
22822441 if len (self .column_ids ) == 0 :
@@ -2371,6 +2530,61 @@ def join_indexless(
23712530 )
23722531
23732532
2533+ def join_with_single_row (
2534+ left : Block ,
2535+ single_row_block : Block ,
2536+ ) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2537+ """
2538+ Special join case where other is a single row block.
2539+ This property is not validated, caller responsible for not passing multi-row block.
2540+ Preserves index of the left block, ignoring label of other.
2541+ """
2542+ left_expr = left .expr
2543+ # ignore index columns by dropping them
2544+ right_expr = single_row_block .expr .select_columns (single_row_block .value_columns )
2545+ left_mappings = [
2546+ join_defs .JoinColumnMapping (
2547+ source_table = join_defs .JoinSide .LEFT ,
2548+ source_id = id ,
2549+ destination_id = guid .generate_guid (),
2550+ )
2551+ for id in left_expr .column_ids
2552+ ]
2553+ right_mappings = [
2554+ join_defs .JoinColumnMapping (
2555+ source_table = join_defs .JoinSide .RIGHT ,
2556+ source_id = id ,
2557+ destination_id = guid .generate_guid (),
2558+ )
2559+ for id in right_expr .column_ids # skip index column
2560+ ]
2561+
2562+ join_def = join_defs .JoinDefinition (
2563+ conditions = (),
2564+ mappings = (* left_mappings , * right_mappings ),
2565+ type = "cross" ,
2566+ )
2567+ combined_expr = left_expr .join (
2568+ right_expr ,
2569+ join_def = join_def ,
2570+ )
2571+ get_column_left = join_def .get_left_mapping ()
2572+ get_column_right = join_def .get_right_mapping ()
2573+ # Drop original indices from each side. and used the coalesced combination generated by the join.
2574+ index_cols_post_join = [get_column_left [id ] for id in left .index_columns ]
2575+
2576+ block = Block (
2577+ combined_expr ,
2578+ index_columns = index_cols_post_join ,
2579+ column_labels = left .column_labels .append (single_row_block .column_labels ),
2580+ index_labels = [left .index .name ],
2581+ )
2582+ return (
2583+ block ,
2584+ (get_column_left , get_column_right ),
2585+ )
2586+
2587+
23742588def join_mono_indexed (
23752589 left : Block ,
23762590 right : Block ,
@@ -2558,7 +2772,7 @@ def coalesce_columns(
25582772) -> Tuple [core .ArrayValue , Sequence [str ]]:
25592773 result_ids = []
25602774 for left_id , right_id in zip (left_ids , right_ids ):
2561- if how == "left" or how == "inner" :
2775+ if how == "left" or how == "inner" or how == "cross" :
25622776 result_ids .append (left_id )
25632777 expr = expr .drop_columns ([right_id ])
25642778 elif how == "right" :
0 commit comments