File tree Expand file tree Collapse file tree 1 file changed +9
-3
lines changed Expand file tree Collapse file tree 1 file changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -1207,9 +1207,15 @@ def factorize(
12071207 # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
12081208 data = data .cast (pa .int64 ())
12091209
1210- if pa .types .is_dictionary (data .type ) and null_encoding == "encode" :
1211- data = data .cast (data .type .value_type )
1212- encoded = data .dictionary_encode (null_encoding = null_encoding )
1210+ if pa .types .is_dictionary (data .type ):
1211+ if null_encoding == "encode" :
1212+ # dictionary encode does nothing if an already encoded array is given
1213+ data = data .cast (data .type .value_type )
1214+ encoded = data .dictionary_encode (null_encoding = null_encoding )
1215+ else :
1216+ encoded = data
1217+ else :
1218+ encoded = data .dictionary_encode (null_encoding = null_encoding )
12131219 if encoded .length () == 0 :
12141220 indices = np .array ([], dtype = np .intp )
12151221 uniques = type (self )(pa .chunked_array ([], type = encoded .type .value_type ))
You can’t perform that action at this time.
0 commit comments