1717from __future__ import annotations
1818
1919import typing
20- from typing import Mapping , Sequence , Tuple , Union
20+ from typing import Hashable , Mapping , Optional , Sequence , Tuple , Union
2121
22+ import google .cloud .bigquery as bigquery
2223import numpy as np
2324import pandas
2425
3334import bigframes .core .utils as utils
3435import bigframes .dtypes
3536import bigframes .dtypes as bf_dtypes
37+ import bigframes .formatting_helpers as formatter
3638import bigframes .operations as ops
3739import bigframes .operations .aggregations as agg_ops
3840import third_party .bigframes_vendored .pandas .core .indexes .base as vendored_pandas_index
3941
42+ if typing .TYPE_CHECKING :
43+ import bigframes .dataframe
44+ import bigframes .series
45+
4046
4147class Index (vendored_pandas_index .Index ):
4248 __doc__ = vendored_pandas_index .Index .__doc__
4349
44- def __init__ (self , data : blocks .BlockHolder ):
45- self ._data = data
50+ def __init__ (
51+ self ,
52+ data = None ,
53+ dtype = None ,
54+ * ,
55+ name = None ,
56+ ):
57+ import bigframes .dataframe as df
58+ import bigframes .series as series
59+
60+ if isinstance (data , blocks .Block ):
61+ block = data .select_columns ([])
62+ elif isinstance (data , df .DataFrame ):
63+ raise ValueError ("Cannot construct index from dataframe." )
64+ elif isinstance (data , series .Series ) or isinstance (data , Index ):
65+ if isinstance (data , series .Series ):
66+ block = data ._block
67+ block = block .set_index (
68+ col_ids = [data ._value_column ],
69+ )
70+ elif isinstance (data , Index ):
71+ block = data ._block
72+ index = Index (data = block )
73+ name = data .name if name is None else name
74+ if name is not None :
75+ index .name = name
76+ if dtype is not None :
77+ index = index .astype (dtype )
78+ block = index ._block
79+ else :
80+ pd_index = pandas .Index (data = data , dtype = dtype , name = name )
81+ pd_df = pandas .DataFrame (index = pd_index )
82+ block = df .DataFrame (pd_df )._block
83+ self ._query_job = None
84+ self ._block : blocks .Block = block
85+
86+ @classmethod
87+ def from_frame (
88+ cls , frame : Union [bigframes .series .Series , bigframes .dataframe .DataFrame ]
89+ ) -> Index :
90+ return FrameIndex (frame )
4691
4792 @property
4893 def name (self ) -> blocks .Label :
@@ -55,15 +100,16 @@ def name(self, value: blocks.Label):
55100 @property
56101 def names (self ) -> typing .Sequence [blocks .Label ]:
57102 """Returns the names of the Index."""
58- return self ._data . _get_block () ._index_labels
103+ return self ._block ._index_labels
59104
60105 @names .setter
61106 def names (self , values : typing .Sequence [blocks .Label ]):
62- return self ._data ._set_block (self ._block .with_index_labels (values ))
107+ new_block = self ._block .with_index_labels (values )
108+ self ._block = new_block
63109
64110 @property
65111 def nlevels (self ) -> int :
66- return len (self ._data . _get_block () .index_columns )
112+ return len (self ._block .index_columns )
67113
68114 @property
69115 def values (self ) -> np .ndarray :
@@ -75,7 +121,7 @@ def ndim(self) -> int:
75121
76122 @property
77123 def shape (self ) -> typing .Tuple [int ]:
78- return (self ._data . _get_block () .shape [0 ],)
124+ return (self ._block .shape [0 ],)
79125
80126 @property
81127 def dtype (self ):
@@ -107,9 +153,7 @@ def is_monotonic_increasing(self) -> bool:
107153 """
108154 return typing .cast (
109155 bool ,
110- self ._data ._get_block ().is_monotonic_increasing (
111- self ._data ._get_block ().index_columns
112- ),
156+ self ._block .is_monotonic_increasing (self ._block .index_columns ),
113157 )
114158
115159 @property
@@ -122,9 +166,7 @@ def is_monotonic_decreasing(self) -> bool:
122166 """
123167 return typing .cast (
124168 bool ,
125- self ._data ._get_block ().is_monotonic_decreasing (
126- self ._data ._get_block ().index_columns
127- ),
169+ self ._block .is_monotonic_decreasing (self ._block .index_columns ),
128170 )
129171
130172 @property
@@ -149,14 +191,65 @@ def has_duplicates(self) -> bool:
149191 duplicates_df = df .DataFrame (duplicates_block )
150192 return duplicates_df ["is_duplicate" ].any ()
151193
152- @property
153- def _block (self ) -> blocks .Block :
154- return self ._data ._get_block ()
155-
156194 @property
157195 def T (self ) -> Index :
158196 return self .transpose ()
159197
198+ @property
199+ def query_job (self ) -> Optional [bigquery .QueryJob ]:
200+ """BigQuery job metadata for the most recent query.
201+
202+ Returns:
203+ The most recent `QueryJob
204+ <https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob>`_.
205+ """
206+ if self ._query_job is None :
207+ self ._query_job = self ._block ._compute_dry_run ()
208+ return self ._query_job
209+
210+ def __repr__ (self ) -> str :
211+ # TODO(swast): Add a timeout here? If the query is taking a long time,
212+ # maybe we just print the job metadata that we have so far?
213+ # TODO(swast): Avoid downloading the whole series by using job
214+ # metadata, like we do with DataFrame.
215+ opts = bigframes .options .display
216+ max_results = opts .max_rows
217+ if opts .repr_mode == "deferred" :
218+ return formatter .repr_query_job (self .query_job )
219+
220+ pandas_df , _ , query_job = self ._block .retrieve_repr_request_results (max_results )
221+ self ._query_job = query_job
222+ return repr (pandas_df .index )
223+
224+ def copy (self , name : Optional [Hashable ] = None ):
225+ copy_index = Index (self ._block )
226+ if name is not None :
227+ copy_index .name = name
228+ return copy_index
229+
230+ def to_series (
231+ self , index : Optional [Index ] = None , name : Optional [Hashable ] = None
232+ ) -> bigframes .series .Series :
233+ if self .nlevels != 1 :
234+ NotImplementedError (
235+ f"Converting multi-index to series is not yet supported. { constants .FEEDBACK_LINK } "
236+ )
237+
238+ import bigframes .series
239+
240+ name = self .name if name is None else name
241+ if index is None :
242+ return bigframes .series .Series (data = self , index = self , name = name )
243+ else :
244+ return bigframes .series .Series (data = self , index = Index (index ), name = name )
245+
246+ def get_level_values (self , level ) -> Index :
247+ level_n = level if isinstance (level , int ) else self .names .index (level )
248+ block = self ._block .drop_levels (
249+ [self ._block .index_columns [i ] for i in range (self .nlevels ) if i != level_n ]
250+ )
251+ return Index (block )
252+
160253 def _memory_usage (self ) -> int :
161254 (n_rows ,) = self .shape
162255 return sum (
@@ -180,7 +273,7 @@ def sort_values(self, *, ascending: bool = True, na_position: str = "last"):
180273 order .OrderingColumnReference (column , direction = direction , na_last = na_last )
181274 for column in index_columns
182275 ]
183- return Index . _from_block (self ._block .order_by (ordering ))
276+ return Index (self ._block .order_by (ordering ))
184277
185278 def astype (
186279 self ,
@@ -269,7 +362,7 @@ def rename(self, name: Union[str, Sequence[str]]) -> Index:
269362 names = [name ] if isinstance (name , str ) else list (name )
270363 if len (names ) != self .nlevels :
271364 raise ValueError ("'name' must be same length as levels" )
272- return Index . _from_block (self ._block .with_index_labels (names ))
365+ return Index (self ._block .with_index_labels (names ))
273366
274367 def drop (
275368 self ,
@@ -291,17 +384,17 @@ def drop(
291384 )
292385 block = block .filter (condition_id , keep_null = True )
293386 block = block .drop_columns ([condition_id ])
294- return Index . _from_block (block )
387+ return Index (block )
295388
296389 def dropna (self , how : str = "any" ) -> Index :
297390 if how not in ("any" , "all" ):
298391 raise ValueError ("'how' must be one of 'any', 'all'" )
299392 result = block_ops .dropna (self ._block , self ._block .index_columns , how = how ) # type: ignore
300- return Index . _from_block (result )
393+ return Index (result )
301394
302395 def drop_duplicates (self , * , keep : str = "first" ) -> Index :
303396 block = block_ops .drop_duplicates (self ._block , self ._block .index_columns , keep )
304- return Index . _from_block (block )
397+ return Index (block )
305398
306399 def isin (self , values ) -> Index :
307400 if not utils .is_list_like (values ):
@@ -330,7 +423,7 @@ def _apply_unary_expr(
330423 result_ids .append (result_id )
331424
332425 block = block .set_index (result_ids , index_labels = self ._block .index_labels )
333- return Index . _from_block (block )
426+ return Index (block )
334427
335428 def _apply_aggregation (self , op : agg_ops .AggregateOp ) -> typing .Any :
336429 if self .nlevels > 1 :
@@ -344,7 +437,7 @@ def __getitem__(self, key: int) -> typing.Any:
344437 result_pd_df , _ = self ._block .slice (key , key + 1 , 1 ).to_pandas ()
345438 else : # special case, want [-1:] instead of [-1:0]
346439 result_pd_df , _ = self ._block .slice (key ).to_pandas ()
347- if result_pd_df .empty :
440+ if result_pd_df .index . empty :
348441 raise IndexError ("single positional indexer is out-of-bounds" )
349442 return result_pd_df .index [0 ]
350443 else :
@@ -367,11 +460,36 @@ def to_numpy(self, dtype=None, **kwargs) -> np.ndarray:
367460 def __len__ (self ):
368461 return self .shape [0 ]
369462
370- @classmethod
371- def _from_block (cls , block : blocks .Block ) -> Index :
372- import bigframes .dataframe as df
373463
374- return Index (df .DataFrame (block ))
464+ # Index that mutates the originating dataframe/series
465+ class FrameIndex (Index ):
466+ def __init__ (
467+ self ,
468+ series_or_dataframe : typing .Union [
469+ bigframes .series .Series , bigframes .dataframe .DataFrame
470+ ],
471+ ):
472+ super ().__init__ (series_or_dataframe ._block )
473+ self ._whole_frame = series_or_dataframe
474+
475+ @property
476+ def name (self ) -> blocks .Label :
477+ return self .names [0 ]
478+
479+ @name .setter
480+ def name (self , value : blocks .Label ):
481+ self .names = [value ]
482+
483+ @property
484+ def names (self ) -> typing .Sequence [blocks .Label ]:
485+ """Returns the names of the Index."""
486+ return self ._block ._index_labels
487+
488+ @names .setter
489+ def names (self , values : typing .Sequence [blocks .Label ]):
490+ new_block = self ._whole_frame ._get_block ().with_index_labels (values )
491+ self ._whole_frame ._set_block (new_block )
492+ self ._block = new_block
375493
376494
377495class IndexValue :
@@ -406,15 +524,6 @@ def dtypes(
406524 def session (self ) -> core .Session :
407525 return self ._expr .session
408526
409- def __repr__ (self ) -> str :
410- """Converts an Index to a string."""
411- # TODO(swast): Add a timeout here? If the query is taking a long time,
412- # maybe we just print the job metadata that we have so far?
413- # TODO(swast): Avoid downloading the whole index by using job
414- # metadata, like we do with DataFrame.
415- preview = self .to_pandas ()
416- return repr (preview )
417-
418527 def to_pandas (self ) -> pandas .Index :
419528 """Executes deferred operations and downloads the results."""
420529 # Project down to only the index column. So the query can be cached to visualize other data.
0 commit comments