61
61
from pyiceberg .transforms import BucketTransform , IdentityTransform
62
62
from pyiceberg .typedef import UTF8
63
63
from pyiceberg .types import (
64
+ BinaryType ,
64
65
BooleanType ,
66
+ DateType ,
67
+ DecimalType ,
68
+ DoubleType ,
69
+ FixedType ,
70
+ FloatType ,
65
71
IntegerType ,
72
+ ListType ,
66
73
LongType ,
74
+ MapType ,
67
75
NestedField ,
68
76
StringType ,
77
+ StructType ,
78
+ TimestampType ,
79
+ TimestamptzType ,
80
+ TimeType ,
81
+ UUIDType ,
69
82
)
70
83
71
84
HIVE_CATALOG_NAME = "hive"
@@ -181,15 +194,20 @@ def test_check_number_of_namespaces(table_schema_simple: Schema) -> None:
181
194
catalog .create_table ("table" , schema = table_schema_simple )
182
195
183
196
197
+ @pytest .mark .parametrize ("hive2_compatible" , [True , False ])
184
198
@patch ("time.time" , MagicMock (return_value = 12345 ))
185
- def test_create_table (table_schema_simple : Schema , hive_database : HiveDatabase , hive_table : HiveTable ) -> None :
199
+ def test_create_table (
200
+ table_schema_with_all_types : Schema , hive_database : HiveDatabase , hive_table : HiveTable , hive2_compatible : bool
201
+ ) -> None :
186
202
catalog = HiveCatalog (HIVE_CATALOG_NAME , uri = HIVE_METASTORE_FAKE_URL )
203
+ if hive2_compatible :
204
+ catalog = HiveCatalog (HIVE_CATALOG_NAME , uri = HIVE_METASTORE_FAKE_URL , ** {"hive.hive2-compatible" : "true" })
187
205
188
206
catalog ._client = MagicMock ()
189
207
catalog ._client .__enter__ ().create_table .return_value = None
190
208
catalog ._client .__enter__ ().get_table .return_value = hive_table
191
209
catalog ._client .__enter__ ().get_database .return_value = hive_database
192
- catalog .create_table (("default" , "table" ), schema = table_schema_simple , properties = {"owner" : "javaberg" })
210
+ catalog .create_table (("default" , "table" ), schema = table_schema_with_all_types , properties = {"owner" : "javaberg" })
193
211
194
212
called_hive_table : HiveTable = catalog ._client .__enter__ ().create_table .call_args [0 ][0 ]
195
213
# This one is generated within the function itself, so we need to extract
@@ -207,9 +225,27 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase,
207
225
retention = None ,
208
226
sd = StorageDescriptor (
209
227
cols = [
210
- FieldSchema (name = "foo" , type = "string" , comment = None ),
211
- FieldSchema (name = "bar" , type = "int" , comment = None ),
212
- FieldSchema (name = "baz" , type = "boolean" , comment = None ),
228
+ FieldSchema (name = 'boolean' , type = 'boolean' , comment = None ),
229
+ FieldSchema (name = 'integer' , type = 'int' , comment = None ),
230
+ FieldSchema (name = 'long' , type = 'bigint' , comment = None ),
231
+ FieldSchema (name = 'float' , type = 'float' , comment = None ),
232
+ FieldSchema (name = 'double' , type = 'double' , comment = None ),
233
+ FieldSchema (name = 'decimal' , type = 'decimal(32,3)' , comment = None ),
234
+ FieldSchema (name = 'date' , type = 'date' , comment = None ),
235
+ FieldSchema (name = 'time' , type = 'string' , comment = None ),
236
+ FieldSchema (name = 'timestamp' , type = 'timestamp' , comment = None ),
237
+ FieldSchema (
238
+ name = 'timestamptz' ,
239
+ type = 'timestamp' if hive2_compatible else 'timestamp with local time zone' ,
240
+ comment = None ,
241
+ ),
242
+ FieldSchema (name = 'string' , type = 'string' , comment = None ),
243
+ FieldSchema (name = 'uuid' , type = 'string' , comment = None ),
244
+ FieldSchema (name = 'fixed' , type = 'binary' , comment = None ),
245
+ FieldSchema (name = 'binary' , type = 'binary' , comment = None ),
246
+ FieldSchema (name = 'list' , type = 'array<string>' , comment = None ),
247
+ FieldSchema (name = 'map' , type = 'map<string,int>' , comment = None ),
248
+ FieldSchema (name = 'struct' , type = 'struct<inner_string:string,inner_int:int>' , comment = None ),
213
249
],
214
250
location = f"{ hive_database .locationUri } /table" ,
215
251
inputFormat = "org.apache.hadoop.mapred.FileInputFormat" ,
@@ -266,12 +302,46 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase,
266
302
location = metadata .location ,
267
303
table_uuid = metadata .table_uuid ,
268
304
last_updated_ms = metadata .last_updated_ms ,
269
- last_column_id = 3 ,
305
+ last_column_id = 22 ,
270
306
schemas = [
271
307
Schema (
272
- NestedField (field_id = 1 , name = "foo" , field_type = StringType (), required = False ),
273
- NestedField (field_id = 2 , name = "bar" , field_type = IntegerType (), required = True ),
274
- NestedField (field_id = 3 , name = "baz" , field_type = BooleanType (), required = False ),
308
+ NestedField (field_id = 1 , name = 'boolean' , field_type = BooleanType (), required = True ),
309
+ NestedField (field_id = 2 , name = 'integer' , field_type = IntegerType (), required = True ),
310
+ NestedField (field_id = 3 , name = 'long' , field_type = LongType (), required = True ),
311
+ NestedField (field_id = 4 , name = 'float' , field_type = FloatType (), required = True ),
312
+ NestedField (field_id = 5 , name = 'double' , field_type = DoubleType (), required = True ),
313
+ NestedField (field_id = 6 , name = 'decimal' , field_type = DecimalType (precision = 32 , scale = 3 ), required = True ),
314
+ NestedField (field_id = 7 , name = 'date' , field_type = DateType (), required = True ),
315
+ NestedField (field_id = 8 , name = 'time' , field_type = TimeType (), required = True ),
316
+ NestedField (field_id = 9 , name = 'timestamp' , field_type = TimestampType (), required = True ),
317
+ NestedField (field_id = 10 , name = 'timestamptz' , field_type = TimestamptzType (), required = True ),
318
+ NestedField (field_id = 11 , name = 'string' , field_type = StringType (), required = True ),
319
+ NestedField (field_id = 12 , name = 'uuid' , field_type = UUIDType (), required = True ),
320
+ NestedField (field_id = 13 , name = 'fixed' , field_type = FixedType (length = 12 ), required = True ),
321
+ NestedField (field_id = 14 , name = 'binary' , field_type = BinaryType (), required = True ),
322
+ NestedField (
323
+ field_id = 15 ,
324
+ name = 'list' ,
325
+ field_type = ListType (type = 'list' , element_id = 18 , element_type = StringType (), element_required = True ),
326
+ required = True ,
327
+ ),
328
+ NestedField (
329
+ field_id = 16 ,
330
+ name = 'map' ,
331
+ field_type = MapType (
332
+ type = 'map' , key_id = 19 , key_type = StringType (), value_id = 20 , value_type = IntegerType (), value_required = True
333
+ ),
334
+ required = True ,
335
+ ),
336
+ NestedField (
337
+ field_id = 17 ,
338
+ name = 'struct' ,
339
+ field_type = StructType (
340
+ NestedField (field_id = 21 , name = 'inner_string' , field_type = StringType (), required = False ),
341
+ NestedField (field_id = 22 , name = 'inner_int' , field_type = IntegerType (), required = True ),
342
+ ),
343
+ required = True ,
344
+ ),
275
345
schema_id = 0 ,
276
346
identifier_field_ids = [2 ],
277
347
)
0 commit comments