Skip to content

Commit ffc395f

Browse files
authored
Merge pull request #330 from wudidapaopao/yuxiaozhe-dev
Implement JSON type support
2 parents 3d8be24 + 1d0c64a commit ffc395f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2807
-344
lines changed

programs/local/CMakeLists.txt

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,50 @@
11
set (CLICKHOUSE_LOCAL_SOURCES LocalServer.cpp)
22

33
if (USE_PYTHON)
4-
set(CLICKHOUSE_LOCAL_SOURCES ${CLICKHOUSE_LOCAL_SOURCES} LocalChdb.cpp)
4+
set (CHDB_SOURCES
5+
FormatHelper.cpp
6+
ListScan.cpp
7+
LocalChdb.cpp
8+
LocalServer.cpp
9+
NumpyType.cpp
10+
PandasAnalyzer.cpp
11+
PandasDataFrame.cpp
12+
PandasScan.cpp
13+
PybindWrapper.cpp
14+
PythonConversion.cpp
15+
PythonDict.cpp
16+
PythonReader.cpp
17+
PythonTableCache.cpp
18+
PythonImportCache.cpp
19+
PythonImporter.cpp
20+
PythonSource.cpp
21+
PythonUtils.cpp
22+
StoragePython.cpp
23+
TableFunctionPython.cpp
24+
)
25+
set (CLICKHOUSE_LOCAL_SOURCES ${CLICKHOUSE_LOCAL_SOURCES} ${CHDB_SOURCES})
26+
527
# include path from shell cmd "python3 -m pybind11 --includes"
628
execute_process(COMMAND python3 -m pybind11 --includes
729
OUTPUT_VARIABLE PYBIND11_INCLUDES
830
OUTPUT_STRIP_TRAILING_WHITESPACE
931
)
1032
string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYBIND11_INCLUDE_DIR ${PYBIND11_INCLUDES})
11-
include_directories(${PYBIND11_INCLUDE_DIR})
33+
include_directories(${PYBIND11_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
1234

1335
# include Python.h
1436
execute_process(COMMAND python3-config --includes
1537
OUTPUT_VARIABLE PYTHON_INCLUDES
1638
OUTPUT_STRIP_TRAILING_WHITESPACE
1739
)
1840
string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDES})
19-
set_source_files_properties(LocalChdb.cpp PROPERTIES INCLUDE_DIRECTORIES ${PYTHON_INCLUDE_DIR})
41+
42+
foreach(_file ${CHDB_SOURCES})
43+
set_source_files_properties(${_file}
44+
PROPERTIES INCLUDE_DIRECTORIES
45+
${PYTHON_INCLUDE_DIR}
46+
)
47+
endforeach(_file)
2048

2149
# get python version, something like python3.x
2250
execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))"
@@ -32,18 +60,27 @@ if (USE_PYTHON)
3260
# "-w -idirafter /usr/include -include x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h"
3361
# )
3462
if (PYTHON_VERSION STREQUAL "python3.6" OR PYTHON_VERSION STREQUAL "python3.7" OR PYTHON_VERSION STREQUAL "python3.8")
35-
set_source_files_properties(LocalChdb.cpp PROPERTIES COMPILE_FLAGS
36-
"-w -idirafter /usr/include -include crypt.h"
37-
)
63+
foreach(_file ${CHDB_SOURCES})
64+
set_source_files_properties(${_file}
65+
PROPERTIES COMPILE_FLAGS
66+
"-w -idirafter /usr/include -include crypt.h"
67+
)
68+
endforeach(_file)
3869
else()
39-
set_source_files_properties(LocalChdb.cpp PROPERTIES COMPILE_FLAGS
40-
"-w"
41-
)
70+
foreach(_file ${CHDB_SOURCES})
71+
set_source_files_properties(${_file}
72+
PROPERTIES COMPILE_FLAGS
73+
"-w"
74+
)
75+
endforeach(_file)
4276
endif()
4377
elseif (OS_DARWIN)
44-
set_source_files_properties(LocalChdb.cpp PROPERTIES COMPILE_FLAGS
45-
"-w"
46-
)
78+
foreach(_file ${CHDB_SOURCES})
79+
set_source_files_properties(${_file}
80+
PROPERTIES COMPILE_FLAGS
81+
"-w"
82+
)
83+
endforeach(_file)
4784
endif()
4885
endif()
4986

programs/local/DatetimeCacheItem.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#pragma once
2+
3+
#include "PythonImportCacheItem.h"
4+
5+
namespace CHDB {
6+
7+
struct DatetimeDatetimeCacheItem : public PythonImportCacheItem
8+
{
9+
10+
public:
11+
DatetimeDatetimeCacheItem(PythonImportCacheItem * parent)
12+
: PythonImportCacheItem("datetime", parent), min("min", this), max("max", this), combine("combine", this)
13+
{
14+
}
15+
16+
~DatetimeDatetimeCacheItem() override = default;
17+
18+
PythonImportCacheItem min;
19+
PythonImportCacheItem max;
20+
PythonImportCacheItem combine;
21+
};
22+
23+
struct DatetimeDateCacheItem : public PythonImportCacheItem
24+
{
25+
26+
public:
27+
DatetimeDateCacheItem(PythonImportCacheItem * parent)
28+
: PythonImportCacheItem("date", parent), max("max", this), min("min", this)
29+
{
30+
}
31+
32+
~DatetimeDateCacheItem() override = default;
33+
34+
PythonImportCacheItem max;
35+
PythonImportCacheItem min;
36+
};
37+
38+
struct DatetimeCacheItem : public PythonImportCacheItem
39+
{
40+
41+
public:
42+
static constexpr const char *Name = "datetime";
43+
44+
public:
45+
DatetimeCacheItem()
46+
: PythonImportCacheItem("datetime"), date(this), time("time", this), timedelta("timedelta", this),
47+
datetime(this), timezone("timezone", this)
48+
{
49+
}
50+
51+
~DatetimeCacheItem() override = default;
52+
53+
DatetimeDateCacheItem date;
54+
PythonImportCacheItem time;
55+
PythonImportCacheItem timedelta;
56+
DatetimeDatetimeCacheItem datetime;
57+
PythonImportCacheItem timezone;
58+
};
59+
60+
} // namespace CHDB

programs/local/DecimalCacheItem.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
3+
#include "PythonImportCacheItem.h"
4+
5+
namespace CHDB {
6+
7+
struct DecimalCacheItem : public PythonImportCacheItem
8+
{
9+
public:
10+
static constexpr const char * Name = "decimal";
11+
12+
DecimalCacheItem() : PythonImportCacheItem("decimal"), Decimal("Decimal", this)
13+
{
14+
}
15+
16+
~DecimalCacheItem() override = default;
17+
18+
PythonImportCacheItem Decimal;
19+
};
20+
21+
} // namespace CHDB

programs/local/FormatHelper.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#include "FormatHelper.h"
2+
3+
namespace CHDB {
4+
5+
static bool is_json_supported = true;
6+
7+
void SetCurrentFormat(const char * format)
8+
{
9+
if (format)
10+
{
11+
String lowerFormat = format;
12+
std::transform(lowerFormat.begin(), lowerFormat.end(), lowerFormat.begin(), ::tolower);
13+
14+
is_json_supported = !(lowerFormat == "arrow" || lowerFormat == "parquet" || lowerFormat == "arrowstream"
15+
|| lowerFormat == "protobuf" || lowerFormat == "protobuflist" || lowerFormat == "protobufsingle");
16+
17+
return;
18+
}
19+
20+
is_json_supported = true;
21+
}
22+
23+
bool isJSONSupported()
24+
{
25+
return is_json_supported;
26+
}
27+
28+
} // namespace CHDB

programs/local/FormatHelper.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#pragma once
2+
3+
#include <base/types.h>
4+
5+
namespace CHDB {
6+
7+
void SetCurrentFormat(const char * format);
8+
9+
bool isJSONSupported();
10+
11+
} // namespace CHDB

programs/local/ListScan.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#include "ListScan.h"
2+
#include "PythonConversion.h"
3+
4+
#include <Columns/ColumnObject.h>
5+
#include <DataTypes/IDataType.h>
6+
#include <DataTypes/Serializations/SerializationJSON.h>
7+
#include <IO/WriteHelpers.h>
8+
9+
namespace DB
10+
{
11+
12+
namespace ErrorCodes
13+
{
14+
extern const int BAD_ARGUMENTS;
15+
extern const int PY_EXCEPTION_OCCURED;
16+
}
17+
18+
}
19+
20+
using namespace DB;
21+
22+
namespace CHDB {
23+
24+
ColumnPtr ListScan::scanObject(
25+
const ColumnWrapper & col_wrap,
26+
const size_t cursor,
27+
const size_t count,
28+
const FormatSettings & format_settings)
29+
{
30+
innerCheck(col_wrap);
31+
32+
auto & data_type = col_wrap.dest_type;
33+
auto column = data_type->createColumn();
34+
auto serialization = data_type->getDefaultSerialization();
35+
36+
innerScanObject(cursor, count, format_settings, serialization, col_wrap.data, column);
37+
38+
return column;
39+
}
40+
41+
void ListScan::scanObject(
42+
const size_t cursor,
43+
const size_t count,
44+
const FormatSettings & format_settings,
45+
const py::handle & obj,
46+
MutableColumnPtr & column)
47+
{
48+
auto data_type = std::make_shared<DataTypeObject>(DataTypeObject::SchemaFormat::JSON);
49+
SerializationPtr serialization = data_type->getDefaultSerialization();
50+
51+
innerScanObject(cursor, count, format_settings, serialization, obj, column);
52+
}
53+
54+
void ListScan::innerScanObject(
55+
const size_t cursor,
56+
const size_t count,
57+
const FormatSettings & format_settings,
58+
SerializationPtr & serialization,
59+
const py::handle & obj,
60+
MutableColumnPtr & column)
61+
{
62+
py::gil_scoped_acquire acquire;
63+
64+
auto list = obj.cast<py::list>();
65+
66+
for (size_t i = cursor; i < cursor + count; ++i)
67+
{
68+
auto item = list.attr("__getitem__")(i);
69+
if (!tryInsertJsonResult(item, format_settings, column, serialization))
70+
column->insertDefault();
71+
}
72+
}
73+
74+
void ListScan::innerCheck(const ColumnWrapper & col_wrap)
75+
{
76+
if (col_wrap.data.is_none())
77+
throw Exception(ErrorCodes::PY_EXCEPTION_OCCURED, "Column data is None");
78+
79+
if (!col_wrap.buf)
80+
throw Exception(ErrorCodes::PY_EXCEPTION_OCCURED, "Column buffer is null");
81+
}
82+
83+
} // namespace CHDB

programs/local/ListScan.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#pragma once
2+
3+
#include "PybindWrapper.h"
4+
#include "PythonUtils.h"
5+
6+
namespace CHDB {
7+
8+
class ListScan {
9+
public:
10+
static DB::ColumnPtr scanObject(
11+
const DB::ColumnWrapper & col_wrap,
12+
const size_t cursor,
13+
const size_t count,
14+
const DB::FormatSettings & format_settings);
15+
16+
static void scanObject(
17+
const size_t cursor,
18+
const size_t count,
19+
const DB::FormatSettings & format_settings,
20+
const py::handle & obj,
21+
DB::MutableColumnPtr & column);
22+
23+
private:
24+
static void innerCheck(const DB::ColumnWrapper & col_wrap);
25+
26+
static void innerScanObject(
27+
const size_t cursor,
28+
const size_t count,
29+
const DB::FormatSettings & format_settings,
30+
DB::SerializationPtr & serialization,
31+
const py::handle & obj,
32+
DB::MutableColumnPtr & column);
33+
};
34+
35+
} // namespace CHDB

0 commit comments

Comments
 (0)