|
18 | 18 | #include <Common/config_version.h> |
19 | 19 | #include <Common/formatReadable.h> |
20 | 20 | #include <Common/HashTable/HashSet.h> |
| 21 | +#include <DataTypes/DataTypeEnum.h> |
| 22 | +#include <Core/Block.h> |
| 23 | +#include <DataTypes/DataTypeCustom.h> |
| 24 | + |
21 | 25 |
|
22 | 26 | #if USE_SNAPPY |
23 | 27 | #include <snappy.h> |
@@ -337,6 +341,34 @@ struct ConverterString |
337 | 341 | } |
338 | 342 | }; |
339 | 343 |
|
| 344 | +template <typename T> |
| 345 | +struct ConverterEnumAsString |
| 346 | +{ |
| 347 | + using Statistics = StatisticsStringRef; |
| 348 | + |
| 349 | + explicit ConverterEnumAsString(const ColumnPtr & c, const DataTypePtr & enum_type_) |
| 350 | + : column(assert_cast<const ColumnVector<T> &>(*c)), enum_type(assert_cast<const DataTypeEnum<T> *>(enum_type_.get())) {} |
| 351 | + |
| 352 | + const ColumnVector<T> & column; |
| 353 | + const DataTypeEnum<T> * enum_type; |
| 354 | + PODArray<parquet::ByteArray> buf; |
| 355 | + |
| 356 | + const parquet::ByteArray * getBatch(size_t offset, size_t count) |
| 357 | + { |
| 358 | + buf.resize(count); |
| 359 | + |
| 360 | + const auto & data = column.getData(); |
| 361 | + |
| 362 | + for (size_t i = 0; i < count; ++i) |
| 363 | + { |
| 364 | + const T value = data[offset + i]; |
| 365 | + const StringRef s = enum_type->getNameForValue(value); |
| 366 | + buf[i] = parquet::ByteArray(static_cast<UInt32>(s.size), reinterpret_cast<const uint8_t *>(s.data)); |
| 367 | + } |
| 368 | + return buf.data(); |
| 369 | + } |
| 370 | +}; |
| 371 | + |
340 | 372 | struct ConverterFixedString |
341 | 373 | { |
342 | 374 | using Statistics = StatisticsFixedStringRef; |
@@ -991,8 +1023,24 @@ void writeColumnChunkBody( |
991 | 1023 | break; |
992 | 1024 | case TypeIndex::UInt16 : N(UInt16, Int32Type); break; |
993 | 1025 | case TypeIndex::UInt64 : N(UInt64, Int64Type); break; |
994 | | - case TypeIndex::Int8 : N(Int8, Int32Type); break; |
995 | | - case TypeIndex::Int16 : N(Int16, Int32Type); break; |
| 1026 | + case TypeIndex::Int8: |
| 1027 | + { |
| 1028 | + if (options.output_enum_as_byte_array && isEnum8(s.type)) |
| 1029 | + writeColumnImpl<parquet::ByteArrayType>( |
| 1030 | + s, options, out, ConverterEnumAsString<Int8>(s.primitive_column, s.type)); |
| 1031 | + else |
| 1032 | + N(Int8, Int32Type); |
| 1033 | + break; |
| 1034 | + } |
| 1035 | + case TypeIndex::Int16: |
| 1036 | + { |
| 1037 | + if (options.output_enum_as_byte_array && isEnum16(s.type)) |
| 1038 | + writeColumnImpl<parquet::ByteArrayType>( |
| 1039 | + s, options, out, ConverterEnumAsString<Int16>(s.primitive_column, s.type)); |
| 1040 | + else |
| 1041 | + N(Int16, Int32Type); |
| 1042 | + break; |
| 1043 | + } |
996 | 1044 | case TypeIndex::Int32 : N(Int32, Int32Type); break; |
997 | 1045 | case TypeIndex::Int64 : N(Int64, Int64Type); break; |
998 | 1046 |
|
|
0 commit comments