@@ -156,13 +156,15 @@ ColumnsDescription StoragePython::getTableStructureFromData(py::object data_sour
156156 RE2 pattern_decimal128 (R"( decimal128\((\d+),\s*(\d+)\))" );
157157 RE2 pattern_decimal256 (R"( decimal256\((\d+),\s*(\d+)\))" );
158158 RE2 pattern_date32 (R"( \bdate32\b)" );
159- RE2 pattern_date64 (R"( \bdate64\b)" );
159+ RE2 pattern_datatime64s (R"( \bdatetime64\[s\]|timestamp\[s\])" );
160+ RE2 pattern_date64 (R"( \bdate64\b|datetime64\[ms\]|timestamp\[ms\])" );
160161 RE2 pattern_time32 (R"( \btime32\b)" );
161- RE2 pattern_time64_us (R"( \btime64\[us\]\b)" );
162- RE2 pattern_time64_ns (R"( \btime64\[ns\]\b|<M8\[ns\])" );
162+ RE2 pattern_time64_us (R"( \btime64\[us\]\b|datetime64\[us\]|<M8\[us\] )" );
163+ RE2 pattern_time64_ns (R"( \btime64\[ns\]\b|datetime64\[ns\]| <M8\[ns\])" );
163164 RE2 pattern_string_binary (
164165 R"( \bstring\b|<class 'str'>|str|DataType\(string\)|DataType\(binary\)|binary\[pyarrow\]|dtype\[object_\]|
165166dtype\('S|dtype\('O|<class 'bytes'>|<class 'bytearray'>|<class 'memoryview'>|<class 'numpy.bytes_'>|<class 'numpy.str_'>|<class 'numpy.void)" );
167+ RE2 pattern_null (R"( \bnull\b)" );
166168
167169 // Iterate through each pair of name and type string in the schema
168170 for (const auto & [name, typeStr] : schema)
@@ -231,6 +233,10 @@ dtype\('S|dtype\('O|<class 'bytes'>|<class 'bytearray'>|<class 'memoryview'>|<cl
231233 {
232234 data_type = std::make_shared<DataTypeDate32>();
233235 }
236+ else if (RE2::PartialMatch (typeStr, pattern_datatime64s))
237+ {
238+ data_type = std::make_shared<DataTypeDateTime64>(0 ); // datetime64[s] corresponds to DateTime64(0)
239+ }
234240 else if (RE2::PartialMatch (typeStr, pattern_date64))
235241 {
236242 data_type = std::make_shared<DataTypeDateTime64>(3 ); // date64 corresponds to DateTime64(3)
@@ -251,9 +257,18 @@ dtype\('S|dtype\('O|<class 'bytes'>|<class 'bytearray'>|<class 'memoryview'>|<cl
251257 {
252258 data_type = std::make_shared<DataTypeString>();
253259 }
260+ else if (RE2::PartialMatch (typeStr, pattern_null))
261+ {
262+ // ClickHouse uses a separate file with NULL masks in addition to normal file with values.
263+ // Entries in masks file allow ClickHouse to distinguish between NULL and a default value of
264+ // corresponding data type for each table row. Because of an additional file we can't make it
265+ // in Python, so we have to use String type for NULLs.
266+ // https://clickhouse.com/docs/en/sql-reference/data-types/nullable#storage-features
267+ data_type = std::make_shared<DataTypeString>();
268+ }
254269 else
255270 {
256- throw Exception (ErrorCodes::TYPE_MISMATCH, " Unrecognized data type: {}" , typeStr);
271+ throw Exception (ErrorCodes::TYPE_MISMATCH, " Unrecognized data type: {} on column {} " , typeStr, name );
257272 }
258273
259274 names_and_types.push_back ({name, data_type});
0 commit comments