1010class Glue :
1111 def __init__ (self , session ):
1212 self ._session = session
13+ self ._client_glue = session .boto3_session .client (
14+ service_name = "glue" , config = session .botocore_config )
15+
16+ def get_table_dtypes (self , database , table ):
17+ """
18+ Get all columns names and the related data types
19+ :param database: Glue database's name
20+ :param table: Glue table's name
21+ :return: A dictionary as {"col name": "col dtype"}
22+ """
23+ response = self ._client_glue .get_table (DatabaseName = database ,
24+ Name = table )
25+ logger .debug (f"get_table response:\n { response } " )
26+ dtypes = {}
27+ for col in response ["Table" ]["StorageDescriptor" ]["Columns" ]:
28+ dtypes [col ["Name" ]] = col ["Type" ]
29+ for par in response ["Table" ]["PartitionKeys" ]:
30+ dtypes [par ["Name" ]] = par ["Type" ]
31+ return dtypes
32+
33+ def get_table_python_types (self , database , table ):
34+ """
35+ Get all columns names and the related python types
36+ :param database: Glue database's name
37+ :param table: Glue table's name
38+ :return: A dictionary as {"col name": "col python type"}
39+ """
40+ dtypes = self .get_table_dtypes (database = database , table = table )
41+ return {k : Glue ._type_athena2python (v ) for k , v in dtypes .items ()}
42+
43+ @staticmethod
44+ def _type_athena2python (dtype ):
45+ dtype = dtype .lower ()
46+ if dtype == "int" :
47+ return int
48+ elif dtype == "bigint" :
49+ return int
50+ elif dtype == "float" :
51+ return float
52+ elif dtype == "double" :
53+ return float
54+ elif dtype == "boolean" :
55+ return bool
56+ elif dtype == "string" :
57+ return str
58+ else :
59+ raise UnsupportedType (f"Unsupported Athena type: { dtype } " )
1360
1461 def metadata_to_glue (
1562 self ,
@@ -53,20 +100,16 @@ def metadata_to_glue(
53100 )
54101
55102 def delete_table_if_exists (self , database , table ):
56- client = self ._session .boto3_session .client (
57- service_name = "glue" , config = self ._session .botocore_config )
58103 try :
59- client .delete_table (DatabaseName = database , Name = table )
60- except client .exceptions .EntityNotFoundException :
104+ self . _client_glue .delete_table (DatabaseName = database , Name = table )
105+ except self . _client_glue .exceptions .EntityNotFoundException :
61106 pass
62107
63108 def does_table_exists (self , database , table ):
64- client = self ._session .boto3_session .client (
65- service_name = "glue" , config = self ._session .botocore_config )
66109 try :
67- client .get_table (DatabaseName = database , Name = table )
110+ self . _client_glue .get_table (DatabaseName = database , Name = table )
68111 return True
69- except client .exceptions .EntityNotFoundException :
112+ except self . _client_glue .exceptions .EntityNotFoundException :
70113 return False
71114
72115 def create_table (self ,
@@ -76,8 +119,6 @@ def create_table(self,
76119 path ,
77120 file_format ,
78121 partition_cols = None ):
79- client = self ._session .boto3_session .client (
80- service_name = "glue" , config = self ._session .botocore_config )
81122 if file_format == "parquet" :
82123 table_input = Glue .parquet_table_definition (
83124 table , partition_cols , schema , path )
@@ -86,11 +127,10 @@ def create_table(self,
86127 schema , path )
87128 else :
88129 raise UnsupportedFileFormat (file_format )
89- client .create_table (DatabaseName = database , TableInput = table_input )
130+ self ._client_glue .create_table (DatabaseName = database ,
131+ TableInput = table_input )
90132
91133 def add_partitions (self , database , table , partition_paths , file_format ):
92- client = self ._session .boto3_session .client (
93- service_name = "glue" , config = self ._session .botocore_config )
94134 if not partition_paths :
95135 return None
96136 partitions = list ()
@@ -106,15 +146,13 @@ def add_partitions(self, database, table, partition_paths, file_format):
106146 for _ in range (pages_num ):
107147 page = partitions [:100 ]
108148 del partitions [:100 ]
109- client .batch_create_partition (DatabaseName = database ,
110- TableName = table ,
111- PartitionInputList = page )
149+ self . _client_glue .batch_create_partition (DatabaseName = database ,
150+ TableName = table ,
151+ PartitionInputList = page )
112152
113153 def get_connection_details (self , name ):
114- client = self ._session .boto3_session .client (
115- service_name = "glue" , config = self ._session .botocore_config )
116- return client .get_connection (Name = name ,
117- HidePassword = False )["Connection" ]
154+ return self ._client_glue .get_connection (
155+ Name = name , HidePassword = False )["Connection" ]
118156
119157 @staticmethod
120158 def _build_schema (dataframe , partition_cols , preserve_index ):
@@ -155,7 +193,7 @@ def _type_pandas2athena(dtype):
155193 elif dtype [:10 ] == "datetime64" :
156194 return "timestamp"
157195 else :
158- raise UnsupportedType ("Unsupported Pandas type: " + dtype )
196+ raise UnsupportedType (f "Unsupported Pandas type: { dtype } " )
159197
160198 @staticmethod
161199 def _parse_table_name (path ):
0 commit comments