Skip to content

Commit 054eef7

Browse files
authored
Introduce clang tidy and fix part of the issues. (#3104)
* Introduce clang tidy and fix part of the issues. * Fix compilation. * Fixed clang-tidy install. * Fixed running clang-tidy. * More tidy issues fixed. * Fix linter script.
1 parent e2bffb3 commit 054eef7

20 files changed

+604
-198
lines changed

.github/workflows/pg-extension-build.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,22 @@ jobs:
110110
python3 scripts/build_pg_ext.py prod --deeplake-static --pg-versions "${PG_VERSION}"
111111
echo -e "${GREEN}Done.${DEFAULT}"
112112
113+
- name: install clang-tidy
114+
shell: bash
115+
run: |-
116+
echo -e "${YELLOW}Installing clang-tidy...${DEFAULT}"
117+
yum install -y clang-tools-extra
118+
clang-tidy --version
119+
echo -e "${GREEN}Clang-tidy installed.${DEFAULT}"
120+
121+
- name: run clang-tidy
122+
shell: bash
123+
continue-on-error: true
124+
run: |-
125+
echo -e "${YELLOW}Running clang-tidy static analysis...${DEFAULT}"
126+
bash scripts/run_clang_tidy.sh builds/deeplake-pg-prod
127+
echo -e "${GREEN}Clang-tidy analysis complete.${DEFAULT}"
128+
113129
- name: extract version
114130
id: extract-version
115131
shell: bash

Taskfile.yml

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
version: '3'
2+
3+
vars:
4+
BUILD_MODE: '{{.BUILD_MODE | default "dev"}}'
5+
PG_VERSION: 18
6+
POSTGRES_SOURCE: "cpp/.ext/postgres-REL_{{.PG_VERSION}}_0"
7+
POSTGRES_INSTALL: "{{.POSTGRES_SOURCE}}/install"
8+
POSTGRES_DATA: "{{.POSTGRES_SOURCE}}/data"
9+
POSTGRES_LOG: "postgres/tests/taskfile_server.log"
10+
BUILD_DIR: "builds/deeplake-pg-{{.BUILD_MODE}}"
11+
USER:
12+
sh: echo ${USER:-postgres}
13+
14+
tasks:
15+
build:
16+
desc: "Build the pg_deeplake PostgreSQL extension (incremental)"
17+
cmds:
18+
- python3 scripts/build_pg_ext.py
19+
20+
"build:dev":
21+
desc: "Full build in dev mode"
22+
cmds:
23+
- python3 scripts/build_pg_ext.py dev
24+
25+
"build:prod":
26+
desc: "Full build in production mode"
27+
cmds:
28+
- python3 scripts/build_pg_ext.py prod
29+
30+
"build:debug":
31+
desc: "Full build in debug mode"
32+
cmds:
33+
- python3 scripts/build_pg_ext.py debug
34+
35+
lint:
36+
desc: "Run clang-tidy linter on cpp/deeplake_pg"
37+
deps:
38+
- build
39+
cmds:
40+
- bash scripts/run_clang_tidy.sh {{.BUILD_DIR}}
41+
42+
"lint:file":
43+
desc: "Run clang-tidy on a specific file"
44+
dir: cpp/deeplake_pg
45+
cmds:
46+
- |
47+
if [ -z "{{.FILE}}" ]; then
48+
echo "Error: Please specify FILE parameter"
49+
echo "Usage: task lint:file FILE=table_am.cpp"
50+
exit 1
51+
fi
52+
clang-tidy -p "../../{{.BUILD_DIR}}" "{{.FILE}}"
53+
54+
test:
55+
desc: "Run PostgreSQL extension tests (Python pytest)"
56+
dir: postgres/tests/py_tests
57+
cmds:
58+
- pytest -v {{.CLI_ARGS}}
59+
60+
"test:parallel":
61+
desc: "Run tests in parallel using pytest-xdist"
62+
dir: postgres/tests/py_tests
63+
cmds:
64+
- pytest -v -n auto {{.CLI_ARGS}}
65+
66+
"test:coverage":
67+
desc: "Run tests with coverage report"
68+
dir: postgres/tests/py_tests
69+
cmds:
70+
- pytest -v --cov=. --cov-report=html --cov-report=term {{.CLI_ARGS}}
71+
- echo "Coverage report generated in postgres/tests/py_tests/htmlcov/index.html"
72+
73+
"test:single":
74+
desc: "Run a single test file"
75+
dir: postgres/tests/py_tests
76+
cmds:
77+
- |
78+
if [ -z "{{.TEST}}" ]; then
79+
echo "Error: Please specify TEST parameter"
80+
echo "Usage: task test:single TEST=test_basic_ops.py"
81+
exit 1
82+
fi
83+
pytest -v "{{.TEST}}" {{.CLI_ARGS}}
84+
85+
run:
86+
desc: "Start PostgreSQL server with pg_deeplake extension loaded"
87+
deps:
88+
- build
89+
cmds:
90+
- |
91+
echo "Starting PostgreSQL server with pg_deeplake extension..."
92+
93+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
94+
INITDB="{{.POSTGRES_INSTALL}}/bin/initdb"
95+
PSQL="{{.POSTGRES_INSTALL}}/bin/psql"
96+
DATA_DIR="{{.POSTGRES_DATA}}"
97+
INSTALL_DIR="{{.POSTGRES_INSTALL}}"
98+
LOG_FILE="{{.POSTGRES_LOG}}"
99+
100+
if [ -f "$DATA_DIR/postmaster.pid" ]; then
101+
echo "PostgreSQL server appears to be running. Use 'task stop' to stop it first."
102+
exit 1
103+
fi
104+
105+
if [ ! -d "$DATA_DIR" ]; then
106+
echo "Initializing PostgreSQL database cluster..."
107+
"$INITDB" -D "$DATA_DIR" -U "{{.USER}}"
108+
109+
echo "shared_preload_libraries = 'pg_deeplake'" >> "$DATA_DIR/postgresql.conf"
110+
echo "max_connections = 100" >> "$DATA_DIR/postgresql.conf"
111+
fi
112+
113+
echo "Installing pg_deeplake extension files..."
114+
EXT_DIR="$INSTALL_DIR/share/extension"
115+
LIB_DIR="$INSTALL_DIR/lib"
116+
mkdir -p "$EXT_DIR"
117+
118+
cp postgres/*.control "$EXT_DIR/" 2>/dev/null || true
119+
cp postgres/*.sql "$EXT_DIR/" 2>/dev/null || grep -v "utils.psql" || true
120+
cp postgres/pg_deeplake_{{.PG_VERSION}}.so "$LIB_DIR/pg_deeplake.so" 2>/dev/null || true
121+
122+
export LD_LIBRARY_PATH="$LIB_DIR:${LD_LIBRARY_PATH:-}"
123+
124+
echo "Starting PostgreSQL server..."
125+
"$PG_CTL" -D "$DATA_DIR" -l "$LOG_FILE" start
126+
127+
sleep 2
128+
129+
echo "Creating pg_deeplake extension..."
130+
"$PSQL" -U "{{.USER}}" -d postgres -c "CREATE EXTENSION IF NOT EXISTS pg_deeplake;"
131+
132+
echo ""
133+
echo "✓ PostgreSQL server is running with pg_deeplake extension loaded"
134+
echo ""
135+
echo "Connection details:"
136+
echo " Host: localhost"
137+
echo " Database: postgres"
138+
echo " User: {{.USER}}"
139+
echo ""
140+
echo "Connect with: psql -U {{.USER}} -d postgres"
141+
echo "Stop server with: task stop"
142+
echo "View logs: tail -f {{.POSTGRES_LOG}}"
143+
144+
stop:
145+
desc: "Stop the PostgreSQL server"
146+
cmds:
147+
- |
148+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
149+
DATA_DIR="{{.POSTGRES_DATA}}"
150+
151+
if [ ! -f "$DATA_DIR/postmaster.pid" ]; then
152+
echo "PostgreSQL server is not running"
153+
exit 0
154+
fi
155+
156+
echo "Stopping PostgreSQL server..."
157+
"$PG_CTL" -D "$DATA_DIR" stop -m fast
158+
echo "✓ PostgreSQL server stopped"
159+
160+
status:
161+
desc: "Check PostgreSQL server status"
162+
cmds:
163+
- |
164+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
165+
DATA_DIR="{{.POSTGRES_DATA}}"
166+
167+
"$PG_CTL" -D "$DATA_DIR" status || echo "PostgreSQL server is not running"
168+
169+
clean:
170+
desc: "Clean build artifacts and test outputs"
171+
cmds:
172+
- rm -rf builds/
173+
- rm -rf postgres/tests/logs/*
174+
- rm -rf postgres/tests/results/*
175+
- rm -rf postgres/tests/py_tests/htmlcov/
176+
- rm -rf postgres/tests/py_tests/.pytest_cache/
177+
- rm -rf postgres/tests/py_tests/.coverage
178+
- find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
179+
- echo "✓ Cleaned build artifacts and test outputs"
180+
181+
"clean:db":
182+
desc: "Clean PostgreSQL data directory"
183+
cmds:
184+
- task: stop
185+
- rm -rf "{{.POSTGRES_DATA}}"
186+
- echo "✓ PostgreSQL data directory cleaned"
187+
188+
dev:
189+
desc: "Full development workflow (build, lint, test)"
190+
cmds:
191+
- task: build
192+
- task: lint
193+
- task: test
194+
195+
help:
196+
desc: "Show available tasks"
197+
cmds:
198+
- task --list

cpp/deeplake_pg/.clang-tidy

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
---
2+
# Clang-Tidy configuration for pg_deeplake
3+
Checks: >
4+
bugprone-*,
5+
cert-*,
6+
cppcoreguidelines-*,
7+
modernize-*,
8+
performance-*,
9+
readability-*,
10+
clang-analyzer-*,
11+
misc-*,
12+
-modernize-use-trailing-return-type,
13+
-readability-identifier-length,
14+
-readability-function-cognitive-complexity,
15+
-cppcoreguidelines-avoid-non-const-global-variables,
16+
-cppcoreguidelines-pro-type-vararg,
17+
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
18+
-cppcoreguidelines-pro-type-reinterpret-cast,
19+
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
20+
-bugprone-easily-swappable-parameters,
21+
-cppcoreguidelines-pro-type-const-cast
22+
23+
WarningsAsErrors: ''
24+
25+
CheckOptions:
26+
- key: readability-identifier-naming.NamespaceCase
27+
value: lower_case
28+
- key: readability-identifier-naming.ClassCase
29+
value: lower_case
30+
- key: readability-identifier-naming.StructCase
31+
value: CamelCase
32+
- key: readability-identifier-naming.FunctionCase
33+
value: lower_case
34+
- key: readability-identifier-naming.VariableCase
35+
value: lower_case
36+
- key: readability-identifier-naming.ConstantCase
37+
value: UPPER_CASE
38+
- key: readability-identifier-naming.ParameterCase
39+
value: lower_case
40+
- key: readability-identifier-naming.EnumConstantCase
41+
value: CamelCase
42+
- key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues
43+
value: '0;1;2;3;4;-1'
44+
- key: cppcoreguidelines-avoid-magic-numbers.IgnoredFloatingPointValues
45+
value: '0.0;1.0;-1.0'

cpp/deeplake_pg/deeplake_executor.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void analyze_plan(PlannedStmt* plan)
4444
if (plan->permInfos == nullptr) {
4545
return;
4646
}
47-
ListCell* lc;
47+
ListCell* lc = nullptr;
4848
bool all_tables_are_deeplake = true;
4949
foreach (lc, plan->permInfos) {
5050
RTEPermissionInfo* perminfo = (RTEPermissionInfo*)lfirst(lc);
@@ -69,7 +69,7 @@ void analyze_plan(PlannedStmt* plan)
6969
while ((col = bms_next_member(perminfo->selectedCols, col)) >= 0) {
7070
// bms_next_member returns 0-based index, but AttrNumber is 1-based
7171
// The bitmapset stores (attnum - FirstLowInvalidHeapAttributeNumber)
72-
AttrNumber attnum = col + FirstLowInvalidHeapAttributeNumber;
72+
AttrNumber attnum = static_cast<AttrNumber>(col + FirstLowInvalidHeapAttributeNumber);
7373
if (attnum <= 0) { // Only positive attribute numbers are real columns
7474
continue;
7575
}
@@ -104,7 +104,8 @@ struct DeeplakeExecutorState
104104
size_t total_rows = 0;
105105

106106
DeeplakeExecutorState()
107-
: printer("DeeplakeExecutorState")
107+
: css{}
108+
, printer("DeeplakeExecutorState")
108109
{
109110
}
110111

@@ -133,8 +134,8 @@ struct DeeplakeExecutorState
133134
// Simple executor state for COUNT(*) fast path
134135
struct CountExecutorState
135136
{
136-
CustomScanState css;
137-
int64_t count_value;
137+
CustomScanState css{};
138+
int64_t count_value = 0;
138139
bool returned = false;
139140
};
140141

@@ -205,10 +206,10 @@ Datum deeplake_sample_to_datum(
205206
try {
206207
if (!type_is_array(target_type) && nd::dtype_is_numeric(samples.dtype())) {
207208
return nd::switch_numeric_dtype(samples.dtype(), [&]<typename T>() {
208-
return pg::utils::pointer_to_datum<T>(samples.data().data(), target_type, attr_typmod, index);
209+
return pg::utils::pointer_to_datum<T>(samples.data().data(), target_type, attr_typmod, static_cast<int64_t>(index));
209210
});
210211
}
211-
nd::array sample = (samples.dimensions() == 0 ? samples : samples[index]);
212+
nd::array sample = (samples.dimensions() == 0 ? samples : samples[static_cast<int64_t>(index)]);
212213
if (sample.is_none()) {
213214
is_null = true;
214215
return (Datum)0;
@@ -340,8 +341,8 @@ void deeplake_executor_explain(CustomScanState* node, List* ancestors, ExplainSt
340341
DeeplakeExecutorState* state = (DeeplakeExecutorState*)node;
341342
ExplainPropertyText("DeepLake Query", state->query_string.c_str(), es);
342343
if (state->total_rows > 0) {
343-
ExplainPropertyInteger("Rows", nullptr, state->total_rows, es);
344-
ExplainPropertyInteger("Chunks", nullptr, state->duckdb_result.get_chunk_count(), es);
344+
ExplainPropertyInteger("Rows", nullptr, static_cast<int64>(state->total_rows), es);
345+
ExplainPropertyInteger("Chunks", nullptr, static_cast<int64>(state->duckdb_result.get_chunk_count()), es);
345346
}
346347
}
347348

@@ -429,7 +430,7 @@ void count_executor_explain(CustomScanState* node, List* ancestors, ExplainState
429430
List* create_simple_targetlist(List* original_targetlist)
430431
{
431432
List* new_targetlist = NIL;
432-
ListCell* lc;
433+
ListCell* lc = nullptr;
433434
AttrNumber attno = 1;
434435

435436
foreach (lc, original_targetlist) {

cpp/deeplake_pg/duckdb_deeplake_convert.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ nd::array to_deeplake_value(std::shared_ptr<duckdb::Vector>&& vector, size_t tot
233233
const auto dt = nd::dtype_enum<T>::value;
234234
auto data_span = std::span<const uint8_t>(
235235
reinterpret_cast<const uint8_t*>(src_data),
236-
total_rows * sizeof(T)
236+
static_cast<size_t>(total_rows) * sizeof(T)
237237
);
238238
return nd::array(nd::impl::std_span_array_nd(std::move(vector), data_span,
239239
icm::shape{static_cast<int64_t>(total_rows)}, dt));

cpp/deeplake_pg/duckdb_deeplake_scan.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ class deeplake_scan_function_helper
595595
// Copy actual data to grandchild vector
596596
auto* data_ptr = duckdb::FlatVector::GetData<T>(grandchild_vec);
597597
const T* array_data = reinterpret_cast<const T*>(sample.data().data());
598-
std::memcpy(data_ptr + grandchild_offset, array_data, nrows * ncols * sizeof(T));
598+
std::memcpy(data_ptr + grandchild_offset, array_data, nrows * static_cast<size_t>(ncols) * sizeof(T));
599599

600600
// Log first few values being written
601601
elog(LOG, " WRITE: copying %ld elements to grandchild at offset %zu",
@@ -673,7 +673,7 @@ class deeplake_scan_function_helper
673673
if constexpr (std::is_arithmetic_v<T>) {
674674
auto* list_data = duckdb::FlatVector::GetData<T>(list_entry_vec);
675675
const T* array_data = reinterpret_cast<const T*>(sample.data().data());
676-
std::memcpy(list_data + offset, array_data, array_len * sizeof(T));
676+
std::memcpy(list_data + offset, array_data, static_cast<size_t>(array_len) * sizeof(T));
677677
} else if constexpr (std::is_same_v<T, std::span<const uint8_t>>) {
678678
auto* list_data = duckdb::FlatVector::GetData<duckdb::string_t>(list_entry_vec);
679679
for (int64_t i = 0; i < array_len; ++i) {
@@ -828,7 +828,7 @@ class deeplake_scan_function_helper
828828
}
829829
return;
830830
}
831-
std::memcpy(duckdb::FlatVector::GetData<T>(output_vector), value_ptr, batch_size * sizeof(T));
831+
std::memcpy(duckdb::FlatVector::GetData<T>(output_vector), value_ptr, static_cast<size_t>(batch_size) * sizeof(T));
832832
} else if constexpr (std::is_same_v<T, nd::dict>) {
833833
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::string_t>(output_vector);
834834
for (duckdb::idx_t row_in_batch = 0; row_in_batch < batch_size; ++row_in_batch) {

cpp/deeplake_pg/duckdb_pg_convert.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,8 @@ Datum duckdb_value_to_pg_datum(
411411
int total_elements = dim0 * dim1;
412412

413413
// Allocate flat arrays for all elements
414-
Datum* elem_datums = (Datum*)palloc(total_elements * sizeof(Datum));
415-
bool* elem_nulls = (bool*)palloc(total_elements * sizeof(bool));
414+
Datum* elem_datums = (Datum*)palloc(static_cast<size_t>(total_elements) * sizeof(Datum));
415+
bool* elem_nulls = (bool*)palloc(static_cast<size_t>(total_elements) * sizeof(bool));
416416

417417
// Get the data vector (grandchild of the outer list)
418418
auto& data_vec = ListVector::GetEntry(list_child);
@@ -493,8 +493,8 @@ Datum duckdb_value_to_pg_datum(
493493
}
494494

495495
// Convert each element
496-
Datum* elem_datums = (Datum*)palloc(list_size * sizeof(Datum));
497-
bool* elem_nulls = (bool*)palloc(list_size * sizeof(bool));
496+
Datum* elem_datums = (Datum*)palloc(static_cast<size_t>(list_size) * sizeof(Datum));
497+
bool* elem_nulls = (bool*)palloc(static_cast<size_t>(list_size) * sizeof(bool));
498498

499499
for (idx_t i = 0; i < list_size; i++) {
500500
elem_datums[i] = duckdb_value_to_pg_datum(list_child,

0 commit comments

Comments
 (0)