Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions cpp/deeplake_pg/duckdb_deeplake_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,14 +481,19 @@ class deeplake_scan_function_helper
auto value = base::string_view_cast<const unsigned char>(sample.data());
std::string uuid_str(reinterpret_cast<const char*>(value.data()), value.size());

// Use DuckDB's UUID::FromString to parse UUID string
try {
auto uuid_value = duckdb::UUID::FromString(uuid_str);
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::hugeint_t>(output_vector);
duckdb_data[row_in_batch] = uuid_value;
} catch (...) {
// If parsing fails, set to NULL
// Treat empty string as NULL for UUID columns
if (uuid_str.empty()) {
duckdb::FlatVector::SetNull(output_vector, row_in_batch, true);
} else {
// Use DuckDB's UUID::FromString to parse UUID string
try {
auto uuid_value = duckdb::UUID::FromString(uuid_str);
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::hugeint_t>(output_vector);
duckdb_data[row_in_batch] = uuid_value;
} catch (...) {
// If parsing fails, set to NULL
duckdb::FlatVector::SetNull(output_vector, row_in_batch, true);
}
}
}
}
Expand Down Expand Up @@ -843,12 +848,17 @@ class deeplake_scan_function_helper
auto value = td.get_streamers().value<std::string_view>(col_idx, row_idx);
if (is_uuid) {
std::string str_value(value);
duckdb::hugeint_t uuid_value;
if (!duckdb::UUID::FromString(str_value, uuid_value)) {
elog(ERROR, "Failed to parse UUID string: %s", str_value.c_str());
// Treat empty string as NULL for UUID columns
if (str_value.empty()) {
duckdb::FlatVector::SetNull(output_vector, row_in_batch, true);
} else {
duckdb::hugeint_t uuid_value;
if (!duckdb::UUID::FromString(str_value, uuid_value)) {
elog(ERROR, "Failed to parse UUID string: %s", str_value.c_str());
}
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::hugeint_t>(output_vector);
duckdb_data[row_in_batch] = uuid_value;
}
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::hugeint_t>(output_vector);
duckdb_data[row_in_batch] = uuid_value;
} else {
auto* duckdb_data = duckdb::FlatVector::GetData<duckdb::string_t>(output_vector);
duckdb_data[row_in_batch] =
Expand Down
5 changes: 2 additions & 3 deletions cpp/deeplake_pg/extension_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,9 +1299,9 @@ static void executor_run(QueryDesc* query_desc, ScanDirection direction, uint64
pg::query_info::cleanup();
pg::table_storage::instance().reset_requested_columns();
} catch (const std::exception& e) {
elog(WARNING, "Error during transaction cleanup: %s", e.what());
// Silently handle cleanup errors
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While silencing errors may prevent cascading failures, completely silent error handling makes debugging difficult. Consider using a safer logging mechanism (e.g., appending to a file) or add a comment explaining what specific cascading error this prevents and how to debug issues when cleanup fails.

Copilot uses AI. Check for mistakes.
} catch (...) {
elog(WARNING, "Unknown error during transaction cleanup");
// Silently handle cleanup errors
}
PG_RE_THROW();
}
Expand Down Expand Up @@ -1332,7 +1332,6 @@ static void executor_end(QueryDesc* query_desc)
// Error occurred during flush - rollback and suppress to prevent cascade
// This prevents "Deeplake does not support transaction aborts" cascade
pg::table_storage::instance().rollback_all();
elog(WARNING, "Failed to flush data during executor end, changes rolled back");
// Don't re-throw - let the transaction abort naturally
FlushErrorState();
}
Expand Down
40 changes: 1 addition & 39 deletions cpp/deeplake_pg/reporter.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
#pragma once

#include <base/backtrace.hpp>
#include <base/memory_info.hpp>
#include <base/system_report.hpp>

#include <chrono>
#include <csignal>
#include <string>
Expand All @@ -12,27 +8,8 @@ namespace pg {

extern bool print_runtime_stats;

inline static void print_memory_report()
{
base::memory_info info;

base::system_report::get_meminfo(info);
const auto msg = fmt::format("Memory Report:\n"
"RSS: ({:.2f} MB)"
", VM: ({:.2f} MB)"
", Peak: ({:.2f} MB)",
info.process_vm_rss / (1024.0 * 1024.0),
info.process_vm_size / (1024.0 * 1024.0),
info.process_peak_mem / (1024.0 * 1024.0));
elog(INFO, "%s", msg.c_str());
}

inline static void signal_handler(int signal)
{
elog(NOTICE, "Caught signal %d (%s)\n", signal, strsignal(signal));
elog(NOTICE, "%s", base::backtrace().c_str());
print_memory_report();
fflush(stderr);
_exit(signal);
}
Comment on lines 11 to 14
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The signal_handler function now only calls _exit without any logging or diagnostic information. While this may be intentional to fix unsafe logging, it makes debugging signal-related crashes much more difficult. Consider adding a comment explaining why logging was removed and what alternative debugging approach should be used.

Copilot uses AI. Check for mistakes.

Expand Down Expand Up @@ -61,22 +38,7 @@ struct runtime_printer
}
}

~runtime_printer()
{
if (pg::print_runtime_stats) {
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration<double, PERIOD>(end_time - start_time_).count();
std::string period_name = "ms";
if constexpr (std::is_same_v<PERIOD, std::micro>) {
period_name = "µs";
} else if constexpr (std::is_same_v<PERIOD, std::nano>) {
period_name = "ns";
} else if constexpr (std::is_same_v<PERIOD, std::ratio<1>>) {
period_name = "s";
}
elog(INFO, "%s: %.2f %s", task_name_.data(), duration, period_name.data());
}
}
~runtime_printer() = default;
Copy link

Copilot AI Jan 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The runtime_printer destructor is now defaulted, but the class still stores start_time_ member that appears unused. This creates dead code - either remove the start_time_ member variable or restore the timing functionality with a safer logging approach.

Copilot uses AI. Check for mistakes.

private:
std::string_view task_name_;
Expand Down
76 changes: 76 additions & 0 deletions postgres/tests/py_tests/test_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,79 @@ async def test_uuid_type(db_conn: asyncpg.Connection):
finally:
# Cleanup
await db_conn.execute("DROP TABLE IF EXISTS people CASCADE")


@pytest.mark.asyncio
async def test_uuid_empty_string_handling(db_conn: asyncpg.Connection):
"""
Test that empty strings in UUID columns are handled correctly.

This test verifies the fix for the issue where adding a UUID column
to a table with existing rows would cause "Failed to parse UUID string:"
error when querying. Empty strings stored in deeplake are treated as NULL.

Tests the exact scenario:
1. Create table without UUID column
2. Add UUID column (existing rows get NULL/empty values)
3. Query table (should not crash)
4. Insert row with NULL UUID
5. Query multiple times
"""
assertions = Assertions(db_conn)

try:
# Request 1: Create table uuid_test
await db_conn.execute("""
CREATE TABLE uuid_test (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL
) USING deeplake
""")

# Request 5: Query on uuid_test table
rows = await db_conn.fetch(
"SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0"
)
assert len(rows) == 0, f"Expected 0 rows, got {len(rows)}"

# Request 6: Add UUID column to uuid_test
await db_conn.execute("""
ALTER TABLE uuid_test ADD COLUMN uu UUID
""")

# Request 8: Query uuid_test after schema change
# This would previously crash with: ERROR: Failed to parse UUID string:
rows = await db_conn.fetch(
"SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0"
)
assert len(rows) == 0, f"Expected 0 rows after adding UUID column, got {len(rows)}"

# Request 9: Insert row with empty name and NULL UUID
await db_conn.execute("""
INSERT INTO uuid_test (name, uu) VALUES ('', NULL)
""")

# Request 11: Query uuid_test after insert
rows = await db_conn.fetch(
"SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0"
)
assert len(rows) == 1, f"Expected 1 row after insert, got {len(rows)}"
assert rows[0]['uu'] is None, "UUID should be NULL"

# Request 12: Query uuid_test again
rows = await db_conn.fetch(
"SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0"
)
assert len(rows) == 1, f"Expected 1 row, got {len(rows)}"

# Request 13: Query uuid_test again
rows = await db_conn.fetch(
"SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0"
)
assert len(rows) == 1, f"Expected 1 row, got {len(rows)}"

print("✓ Test passed: UUID empty string handling works correctly")

finally:
# Cleanup
await db_conn.execute("DROP TABLE IF EXISTS uuid_test CASCADE")