@@ -156,3 +156,40 @@ def test_enum_dtypes(self, dtype, values):
156
156
else :
157
157
assert enmr .dtype == enmr .values ().dtype == dtype
158
158
assert_array_equal (enmr .values (), values )
159
+
160
+ @pytest .mark .skipif (not has_pandas (), reason = "pandas not installed" )
161
+ def test_from_pandas_dtype_mismatch (self ):
162
+ import pandas as pd
163
+
164
+ schema = tiledb .ArraySchema (
165
+ enums = [
166
+ tiledb .Enumeration (name = "enum1" , values = ["a" , "b" , "c" ], ordered = False )
167
+ ],
168
+ domain = tiledb .Domain (
169
+ tiledb .Dim (name = "dim1" , dtype = np .int32 , domain = (0 , 1 ))
170
+ ),
171
+ attrs = [tiledb .Attr (name = "attr1" , dtype = np .int32 , enum_label = "enum1" )],
172
+ sparse = True ,
173
+ )
174
+
175
+ # Pandas category's categories matches the TileDB enumeration's values
176
+ df1 = pd .DataFrame (data = {"dim1" : [0 , 1 ], "attr1" : ["b" , "c" ]})
177
+ df1 ["attr1" ] = pd .Categorical (values = df1 .attr1 , categories = ["a" , "b" , "c" ])
178
+
179
+ array_path = self .path ("arr1" )
180
+ tiledb .Array .create (array_path , schema )
181
+ tiledb .from_pandas (array_path , df1 , schema = schema , mode = "append" )
182
+
183
+ actual_values = tiledb .open (array_path ).df [:]["attr1" ].values .tolist ()
184
+ assert actual_values == ["b" , "c" ]
185
+
186
+ # Pandas category's categories does not match the TileDB enumeration's values
187
+ df2 = pd .DataFrame (data = {"dim1" : [0 , 1 ], "attr1" : ["b" , "c" ]})
188
+ df2 ["attr1" ] = df2 ["attr1" ].astype ("category" )
189
+
190
+ array_path = self .path ("arr2" )
191
+ tiledb .Array .create (array_path , schema )
192
+ tiledb .from_pandas (array_path , df2 , schema = schema , mode = "append" )
193
+
194
+ actual_values = tiledb .open (array_path ).df [:]["attr1" ].values .tolist ()
195
+ assert actual_values == ["b" , "c" ]
0 commit comments