Skip to content

Commit c8c4e3c

Browse files
committed
More tidy issues fixed.
1 parent c849a3c commit c8c4e3c

12 files changed

+343
-156
lines changed

Taskfile.yml

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
version: '3'
2+
3+
vars:
4+
BUILD_MODE: '{{.BUILD_MODE | default "dev"}}'
5+
PG_VERSION: 18
6+
POSTGRES_SOURCE: "cpp/.ext/postgres-REL_{{.PG_VERSION}}_0"
7+
POSTGRES_INSTALL: "{{.POSTGRES_SOURCE}}/install"
8+
POSTGRES_DATA: "{{.POSTGRES_SOURCE}}/data"
9+
POSTGRES_LOG: "postgres/tests/taskfile_server.log"
10+
BUILD_DIR: "builds/deeplake-pg-{{.BUILD_MODE}}"
11+
USER:
12+
sh: echo ${USER:-postgres}
13+
14+
tasks:
15+
build:
16+
desc: "Build the pg_deeplake PostgreSQL extension (incremental)"
17+
cmds:
18+
- python3 scripts/build_pg_ext.py
19+
20+
"build:dev":
21+
desc: "Full build in dev mode"
22+
cmds:
23+
- python3 scripts/build_pg_ext.py dev
24+
25+
"build:prod":
26+
desc: "Full build in production mode"
27+
cmds:
28+
- python3 scripts/build_pg_ext.py prod
29+
30+
"build:debug":
31+
desc: "Full build in debug mode"
32+
cmds:
33+
- python3 scripts/build_pg_ext.py debug
34+
35+
lint:
36+
desc: "Run clang-tidy linter on cpp/deeplake_pg"
37+
deps:
38+
- build
39+
cmds:
40+
- bash scripts/run_clang_tidy.sh {{.BUILD_DIR}}
41+
42+
"lint:file":
43+
desc: "Run clang-tidy on a specific file"
44+
dir: cpp/deeplake_pg
45+
cmds:
46+
- |
47+
if [ -z "{{.FILE}}" ]; then
48+
echo "Error: Please specify FILE parameter"
49+
echo "Usage: task lint:file FILE=table_am.cpp"
50+
exit 1
51+
fi
52+
clang-tidy -p "../../{{.BUILD_DIR}}" "{{.FILE}}"
53+
54+
test:
55+
desc: "Run PostgreSQL extension tests (Python pytest)"
56+
dir: postgres/tests/py_tests
57+
cmds:
58+
- pytest -v {{.CLI_ARGS}}
59+
60+
"test:parallel":
61+
desc: "Run tests in parallel using pytest-xdist"
62+
dir: postgres/tests/py_tests
63+
cmds:
64+
- pytest -v -n auto {{.CLI_ARGS}}
65+
66+
"test:coverage":
67+
desc: "Run tests with coverage report"
68+
dir: postgres/tests/py_tests
69+
cmds:
70+
- pytest -v --cov=. --cov-report=html --cov-report=term {{.CLI_ARGS}}
71+
- echo "Coverage report generated in postgres/tests/py_tests/htmlcov/index.html"
72+
73+
"test:single":
74+
desc: "Run a single test file"
75+
dir: postgres/tests/py_tests
76+
cmds:
77+
- |
78+
if [ -z "{{.TEST}}" ]; then
79+
echo "Error: Please specify TEST parameter"
80+
echo "Usage: task test:single TEST=test_basic_ops.py"
81+
exit 1
82+
fi
83+
pytest -v "{{.TEST}}" {{.CLI_ARGS}}
84+
85+
run:
86+
desc: "Start PostgreSQL server with pg_deeplake extension loaded"
87+
deps:
88+
- build
89+
cmds:
90+
- |
91+
echo "Starting PostgreSQL server with pg_deeplake extension..."
92+
93+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
94+
INITDB="{{.POSTGRES_INSTALL}}/bin/initdb"
95+
PSQL="{{.POSTGRES_INSTALL}}/bin/psql"
96+
DATA_DIR="{{.POSTGRES_DATA}}"
97+
INSTALL_DIR="{{.POSTGRES_INSTALL}}"
98+
LOG_FILE="{{.POSTGRES_LOG}}"
99+
100+
if [ -f "$DATA_DIR/postmaster.pid" ]; then
101+
echo "PostgreSQL server appears to be running. Use 'task stop' to stop it first."
102+
exit 1
103+
fi
104+
105+
if [ ! -d "$DATA_DIR" ]; then
106+
echo "Initializing PostgreSQL database cluster..."
107+
"$INITDB" -D "$DATA_DIR" -U "{{.USER}}"
108+
109+
echo "shared_preload_libraries = 'pg_deeplake'" >> "$DATA_DIR/postgresql.conf"
110+
echo "max_connections = 100" >> "$DATA_DIR/postgresql.conf"
111+
fi
112+
113+
echo "Installing pg_deeplake extension files..."
114+
EXT_DIR="$INSTALL_DIR/share/extension"
115+
LIB_DIR="$INSTALL_DIR/lib"
116+
mkdir -p "$EXT_DIR"
117+
118+
cp postgres/*.control "$EXT_DIR/" 2>/dev/null || true
119+
cp postgres/*.sql "$EXT_DIR/" 2>/dev/null || grep -v "utils.psql" || true
120+
cp postgres/pg_deeplake_{{.PG_VERSION}}.so "$LIB_DIR/pg_deeplake.so" 2>/dev/null || true
121+
122+
export LD_LIBRARY_PATH="$LIB_DIR:${LD_LIBRARY_PATH:-}"
123+
124+
echo "Starting PostgreSQL server..."
125+
"$PG_CTL" -D "$DATA_DIR" -l "$LOG_FILE" start
126+
127+
sleep 2
128+
129+
echo "Creating pg_deeplake extension..."
130+
"$PSQL" -U "{{.USER}}" -d postgres -c "CREATE EXTENSION IF NOT EXISTS pg_deeplake;"
131+
132+
echo ""
133+
echo "✓ PostgreSQL server is running with pg_deeplake extension loaded"
134+
echo ""
135+
echo "Connection details:"
136+
echo " Host: localhost"
137+
echo " Database: postgres"
138+
echo " User: {{.USER}}"
139+
echo ""
140+
echo "Connect with: psql -U {{.USER}} -d postgres"
141+
echo "Stop server with: task stop"
142+
echo "View logs: tail -f {{.POSTGRES_LOG}}"
143+
144+
stop:
145+
desc: "Stop the PostgreSQL server"
146+
cmds:
147+
- |
148+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
149+
DATA_DIR="{{.POSTGRES_DATA}}"
150+
151+
if [ ! -f "$DATA_DIR/postmaster.pid" ]; then
152+
echo "PostgreSQL server is not running"
153+
exit 0
154+
fi
155+
156+
echo "Stopping PostgreSQL server..."
157+
"$PG_CTL" -D "$DATA_DIR" stop -m fast
158+
echo "✓ PostgreSQL server stopped"
159+
160+
status:
161+
desc: "Check PostgreSQL server status"
162+
cmds:
163+
- |
164+
PG_CTL="{{.POSTGRES_INSTALL}}/bin/pg_ctl"
165+
DATA_DIR="{{.POSTGRES_DATA}}"
166+
167+
"$PG_CTL" -D "$DATA_DIR" status || echo "PostgreSQL server is not running"
168+
169+
clean:
170+
desc: "Clean build artifacts and test outputs"
171+
cmds:
172+
- rm -rf builds/
173+
- rm -rf postgres/tests/logs/*
174+
- rm -rf postgres/tests/results/*
175+
- rm -rf postgres/tests/py_tests/htmlcov/
176+
- rm -rf postgres/tests/py_tests/.pytest_cache/
177+
- rm -rf postgres/tests/py_tests/.coverage
178+
- find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
179+
- echo "✓ Cleaned build artifacts and test outputs"
180+
181+
"clean:db":
182+
desc: "Clean PostgreSQL data directory"
183+
cmds:
184+
- task: stop
185+
- rm -rf "{{.POSTGRES_DATA}}"
186+
- echo "✓ PostgreSQL data directory cleaned"
187+
188+
dev:
189+
desc: "Full development workflow (build, lint, test)"
190+
cmds:
191+
- task: build
192+
- task: lint
193+
- task: test
194+
195+
help:
196+
desc: "Show available tasks"
197+
cmds:
198+
- task --list

cpp/deeplake_pg/deeplake_executor.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void analyze_plan(PlannedStmt* plan)
4444
if (plan->permInfos == nullptr) {
4545
return;
4646
}
47-
ListCell* lc;
47+
ListCell* lc = nullptr;
4848
bool all_tables_are_deeplake = true;
4949
foreach (lc, plan->permInfos) {
5050
RTEPermissionInfo* perminfo = (RTEPermissionInfo*)lfirst(lc);
@@ -69,7 +69,7 @@ void analyze_plan(PlannedStmt* plan)
6969
while ((col = bms_next_member(perminfo->selectedCols, col)) >= 0) {
7070
// bms_next_member returns 0-based index, but AttrNumber is 1-based
7171
// The bitmapset stores (attnum - FirstLowInvalidHeapAttributeNumber)
72-
AttrNumber attnum = col + FirstLowInvalidHeapAttributeNumber;
72+
AttrNumber attnum = static_cast<AttrNumber>(col + FirstLowInvalidHeapAttributeNumber);
7373
if (attnum <= 0) { // Only positive attribute numbers are real columns
7474
continue;
7575
}
@@ -104,7 +104,8 @@ struct DeeplakeExecutorState
104104
size_t total_rows = 0;
105105

106106
DeeplakeExecutorState()
107-
: printer("DeeplakeExecutorState")
107+
: css{}
108+
, printer("DeeplakeExecutorState")
108109
{
109110
}
110111

@@ -133,8 +134,8 @@ struct DeeplakeExecutorState
133134
// Simple executor state for COUNT(*) fast path
134135
struct CountExecutorState
135136
{
136-
CustomScanState css;
137-
int64_t count_value;
137+
CustomScanState css{};
138+
int64_t count_value = 0;
138139
bool returned = false;
139140
};
140141

@@ -205,10 +206,10 @@ Datum deeplake_sample_to_datum(
205206
try {
206207
if (!type_is_array(target_type) && nd::dtype_is_numeric(samples.dtype())) {
207208
return nd::switch_numeric_dtype(samples.dtype(), [&]<typename T>() {
208-
return pg::utils::pointer_to_datum<T>(samples.data().data(), target_type, attr_typmod, index);
209+
return pg::utils::pointer_to_datum<T>(samples.data().data(), target_type, attr_typmod, static_cast<int64_t>(index));
209210
});
210211
}
211-
nd::array sample = (samples.dimensions() == 0 ? samples : samples[index]);
212+
nd::array sample = (samples.dimensions() == 0 ? samples : samples[static_cast<int64_t>(index)]);
212213
if (sample.is_none()) {
213214
is_null = true;
214215
return (Datum)0;
@@ -340,8 +341,8 @@ void deeplake_executor_explain(CustomScanState* node, List* ancestors, ExplainSt
340341
DeeplakeExecutorState* state = (DeeplakeExecutorState*)node;
341342
ExplainPropertyText("DeepLake Query", state->query_string.c_str(), es);
342343
if (state->total_rows > 0) {
343-
ExplainPropertyInteger("Rows", nullptr, state->total_rows, es);
344-
ExplainPropertyInteger("Chunks", nullptr, state->duckdb_result.get_chunk_count(), es);
344+
ExplainPropertyInteger("Rows", nullptr, static_cast<int64>(state->total_rows), es);
345+
ExplainPropertyInteger("Chunks", nullptr, static_cast<int64>(state->duckdb_result.get_chunk_count()), es);
345346
}
346347
}
347348

@@ -429,7 +430,7 @@ void count_executor_explain(CustomScanState* node, List* ancestors, ExplainState
429430
List* create_simple_targetlist(List* original_targetlist)
430431
{
431432
List* new_targetlist = NIL;
432-
ListCell* lc;
433+
ListCell* lc = nullptr;
433434
AttrNumber attno = 1;
434435

435436
foreach (lc, original_targetlist) {

cpp/deeplake_pg/duckdb_deeplake_convert.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ nd::array to_deeplake_value(std::shared_ptr<duckdb::Vector>&& vector, size_t tot
233233
const auto dt = nd::dtype_enum<T>::value;
234234
auto data_span = std::span<const uint8_t>(
235235
reinterpret_cast<const uint8_t*>(src_data),
236-
total_rows * sizeof(T)
236+
static_cast<size_t>(total_rows) * sizeof(T)
237237
);
238238
return nd::array(nd::impl::std_span_array_nd(std::move(vector), data_span,
239239
icm::shape{static_cast<int64_t>(total_rows)}, dt));

cpp/deeplake_pg/duckdb_deeplake_scan.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ class deeplake_scan_function_helper
595595
// Copy actual data to grandchild vector
596596
auto* data_ptr = duckdb::FlatVector::GetData<T>(grandchild_vec);
597597
const T* array_data = reinterpret_cast<const T*>(sample.data().data());
598-
std::memcpy(data_ptr + grandchild_offset, array_data, nrows * ncols * sizeof(T));
598+
std::memcpy(data_ptr + grandchild_offset, array_data, nrows * static_cast<size_t>(ncols) * sizeof(T));
599599

600600
// Log first few values being written
601601
elog(LOG, " WRITE: copying %ld elements to grandchild at offset %zu",
@@ -673,7 +673,7 @@ class deeplake_scan_function_helper
673673
if constexpr (std::is_arithmetic_v<T>) {
674674
auto* list_data = duckdb::FlatVector::GetData<T>(list_entry_vec);
675675
const T* array_data = reinterpret_cast<const T*>(sample.data().data());
676-
std::memcpy(list_data + offset, array_data, array_len * sizeof(T));
676+
std::memcpy(list_data + offset, array_data, static_cast<size_t>(array_len) * sizeof(T));
677677
} else if constexpr (std::is_same_v<T, std::span<const uint8_t>>) {
678678
auto* list_data = duckdb::FlatVector::GetData<duckdb::string_t>(list_entry_vec);
679679
for (int64_t i = 0; i < array_len; ++i) {
@@ -828,7 +828,7 @@ class deeplake_scan_function_helper
828828
}
829829
return;
830830
}
831-
std::memcpy(duckdb::FlatVector::GetData<T>(output_vector), value_ptr, batch_size * sizeof(T));
831+
std::memcpy(duckdb::FlatVector::GetData<T>(output_vector), value_ptr, static_cast<size_t>(batch_size) * sizeof(T));
832832
} else if constexpr (std::is_same_v<T, nd::dict>) {
833833
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::string_t>(output_vector);
834834
for (duckdb::idx_t row_in_batch = 0; row_in_batch < batch_size; ++row_in_batch) {

cpp/deeplake_pg/duckdb_pg_convert.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,8 @@ Datum duckdb_value_to_pg_datum(
411411
int total_elements = dim0 * dim1;
412412

413413
// Allocate flat arrays for all elements
414-
Datum* elem_datums = (Datum*)palloc(total_elements * sizeof(Datum));
415-
bool* elem_nulls = (bool*)palloc(total_elements * sizeof(bool));
414+
Datum* elem_datums = (Datum*)palloc(static_cast<size_t>(total_elements) * sizeof(Datum));
415+
bool* elem_nulls = (bool*)palloc(static_cast<size_t>(total_elements) * sizeof(bool));
416416

417417
// Get the data vector (grandchild of the outer list)
418418
auto& data_vec = ListVector::GetEntry(list_child);
@@ -493,8 +493,8 @@ Datum duckdb_value_to_pg_datum(
493493
}
494494

495495
// Convert each element
496-
Datum* elem_datums = (Datum*)palloc(list_size * sizeof(Datum));
497-
bool* elem_nulls = (bool*)palloc(list_size * sizeof(bool));
496+
Datum* elem_datums = (Datum*)palloc(static_cast<size_t>(list_size) * sizeof(Datum));
497+
bool* elem_nulls = (bool*)palloc(static_cast<size_t>(list_size) * sizeof(bool));
498498

499499
for (idx_t i = 0; i < list_size; i++) {
500500
elem_datums[i] = duckdb_value_to_pg_datum(list_child,

cpp/deeplake_pg/extension_init.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ void transform_jsonb_arrow_quals(Node** nodeptr)
317317

318318
if (IsA(node, BoolExpr)) {
319319
BoolExpr* b = (BoolExpr*)node;
320-
ListCell* lc;
320+
ListCell* lc = nullptr;
321321
foreach(lc, b->args) {
322322
transform_jsonb_arrow_quals((Node**)&lfirst(lc));
323323
}

cpp/deeplake_pg/hybrid_query_merge.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ inline nd::array softmax(const query_core::query_result& result, double max_weig
3535
return nd::none(nd::dtype::float32, 0);
3636
}
3737

38-
const size_t num_scores = shape[0];
38+
const size_t num_scores = static_cast<size_t>(shape[0]);
3939
std::vector<float> exp_scores;
4040
exp_scores.reserve(num_scores);
4141

4242
// Compute exponentials with clamping
4343
for (size_t i = 0; i < num_scores; ++i) {
44-
float score = result.scores[i].value<float>(0);
44+
float score = result.scores[static_cast<size_t>(i)].value<float>(0);
4545
float clamped_score = std::min(score, static_cast<float>(max_weight));
4646
exp_scores.push_back(std::exp(clamped_score));
4747
}
@@ -86,10 +86,10 @@ inline query_core::query_result merge_query_results(
8686
if (!embedding_result.is_empty() && !emb_normalized.is_none()) {
8787
auto emb_shape = emb_normalized.shape();
8888
if (!emb_shape.empty()) {
89-
const size_t num_emb = emb_shape[0];
89+
const size_t num_emb = static_cast<size_t>(emb_shape[0]);
9090
for (size_t i = 0; i < num_emb; ++i) {
91-
int64_t idx = embedding_result.indices[i];
92-
float score = emb_normalized[i].value<float>(0) * embedding_weight;
91+
int64_t idx = embedding_result.indices[static_cast<size_t>(i)];
92+
float score = emb_normalized[static_cast<size_t>(i)].value<float>(0) * static_cast<float>(embedding_weight);
9393
combined_scores[idx] = score;
9494
}
9595
}
@@ -99,10 +99,10 @@ inline query_core::query_result merge_query_results(
9999
if (!text_result.is_empty() && !text_normalized.is_none()) {
100100
auto text_shape = text_normalized.shape();
101101
if (!text_shape.empty()) {
102-
const size_t num_text = text_shape[0];
102+
const size_t num_text = static_cast<size_t>(text_shape[0]);
103103
for (size_t i = 0; i < num_text; ++i) {
104-
int64_t idx = text_result.indices[i];
105-
float score = text_normalized[i].value<float>(0) * text_weight;
104+
int64_t idx = text_result.indices[static_cast<size_t>(i)];
105+
float score = text_normalized[static_cast<size_t>(i)].value<float>(0) * static_cast<float>(text_weight);
106106
combined_scores[idx] += score; // Add to existing score if index already exists
107107
}
108108
}

0 commit comments

Comments
 (0)