11"""Arrow Utilities Module (PRIVATE)."""
22
3+ from __future__ import annotations
4+
35import datetime
46import json
57import logging
6- from typing import Any , Dict , Optional , Tuple , cast
8+ from typing import Any , Tuple , cast
79
810import pyarrow as pa
911import pytz
1416_logger : logging .Logger = logging .getLogger (__name__ )
1517
1618
17- def _extract_partitions_from_path (path_root : str , path : str ) -> Dict [str , str ]:
19+ def _extract_partitions_from_path (path_root : str , path : str ) -> dict [str , str ]:
1820 path_root = path_root if path_root .endswith ("/" ) else f"{ path_root } /"
1921 if path_root not in path :
2022 raise Exception (f"Object { path } is not under the root path ({ path_root } )." )
2123 path_wo_filename : str = path .rpartition ("/" )[0 ] + "/"
2224 path_wo_prefix : str = path_wo_filename .replace (f"{ path_root } /" , "" )
23- dirs : Tuple [str , ...] = tuple (x for x in path_wo_prefix .split ("/" ) if x and (x .count ("=" ) > 0 ))
25+ dirs : tuple [str , ...] = tuple (x for x in path_wo_prefix .split ("/" ) if x and (x .count ("=" ) > 0 ))
2426 if not dirs :
2527 return {}
2628 values_tups = cast (Tuple [Tuple [str , str ]], tuple (tuple (x .split ("=" , maxsplit = 1 )[:2 ]) for x in dirs ))
27- values_dics : Dict [str , str ] = dict (values_tups )
29+ values_dics : dict [str , str ] = dict (values_tups )
2830 return values_dics
2931
3032
3133def _add_table_partitions (
3234 table : pa .Table ,
3335 path : str ,
34- path_root : Optional [ str ] ,
36+ path_root : str | None ,
3537) -> pa .Table :
3638 part = _extract_partitions_from_path (path_root , path ) if path_root else None
3739 if part :
@@ -59,7 +61,7 @@ def ensure_df_is_mutable(df: pd.DataFrame) -> pd.DataFrame:
5961 return df
6062
6163
62- def _apply_timezone (df : pd .DataFrame , metadata : Dict [str , Any ]) -> pd .DataFrame :
64+ def _apply_timezone (df : pd .DataFrame , metadata : dict [str , Any ]) -> pd .DataFrame :
6365 for c in metadata ["columns" ]:
6466 if "field_name" in c and c ["field_name" ] is not None :
6567 col_name = str (c ["field_name" ])
@@ -79,13 +81,13 @@ def _apply_timezone(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
7981
8082def _table_to_df (
8183 table : pa .Table ,
82- kwargs : Dict [str , Any ],
84+ kwargs : dict [str , Any ],
8385) -> pd .DataFrame :
8486 """Convert a PyArrow table to a Pandas DataFrame and apply metadata.
8587
8688 This method should be used across to codebase to ensure this conversion is consistent.
8789 """
88- metadata : Dict [str , Any ] = {}
90+ metadata : dict [str , Any ] = {}
8991 if table .schema .metadata is not None and b"pandas" in table .schema .metadata :
9092 metadata = json .loads (table .schema .metadata [b"pandas" ])
9193
@@ -100,10 +102,10 @@ def _table_to_df(
100102
101103def _df_to_table (
102104 df : pd .DataFrame ,
103- schema : Optional [ pa .Schema ] = None ,
104- index : Optional [ bool ] = None ,
105- dtype : Optional [ Dict [ str , str ]] = None ,
106- cpus : Optional [ int ] = None ,
105+ schema : pa .Schema | None = None ,
106+ index : bool | None = None ,
107+ dtype : dict [ str , str ] | None = None ,
108+ cpus : int | None = None ,
107109) -> pa .Table :
108110 table : pa .Table = pa .Table .from_pandas (df = df , schema = schema , nthreads = cpus , preserve_index = index , safe = True )
109111 if dtype :
0 commit comments