diff --git a/cpp/deeplake_pg/duckdb_deeplake_scan.cpp b/cpp/deeplake_pg/duckdb_deeplake_scan.cpp index 7885a84d71..33ae1d085b 100644 --- a/cpp/deeplake_pg/duckdb_deeplake_scan.cpp +++ b/cpp/deeplake_pg/duckdb_deeplake_scan.cpp @@ -481,14 +481,19 @@ class deeplake_scan_function_helper auto value = base::string_view_cast(sample.data()); std::string uuid_str(reinterpret_cast(value.data()), value.size()); - // Use DuckDB's UUID::FromString to parse UUID string - try { - auto uuid_value = duckdb::UUID::FromString(uuid_str); - auto* duckdb_data = duckdb::FlatVector::GetData(output_vector); - duckdb_data[row_in_batch] = uuid_value; - } catch (...) { - // If parsing fails, set to NULL + // Treat empty string as NULL for UUID columns + if (uuid_str.empty()) { duckdb::FlatVector::SetNull(output_vector, row_in_batch, true); + } else { + // Use DuckDB's UUID::FromString to parse UUID string + try { + auto uuid_value = duckdb::UUID::FromString(uuid_str); + auto* duckdb_data = duckdb::FlatVector::GetData(output_vector); + duckdb_data[row_in_batch] = uuid_value; + } catch (...) { + // If parsing fails, set to NULL + duckdb::FlatVector::SetNull(output_vector, row_in_batch, true); + } } } } @@ -843,12 +848,17 @@ class deeplake_scan_function_helper auto value = td.get_streamers().value(col_idx, row_idx); if (is_uuid) { std::string str_value(value); - duckdb::hugeint_t uuid_value; - if (!duckdb::UUID::FromString(str_value, uuid_value)) { - elog(ERROR, "Failed to parse UUID string: %s", str_value.c_str()); + // Treat empty string as NULL for UUID columns + if (str_value.empty()) { + duckdb::FlatVector::SetNull(output_vector, row_in_batch, true); + } else { + duckdb::hugeint_t uuid_value; + if (!duckdb::UUID::FromString(str_value, uuid_value)) { + elog(ERROR, "Failed to parse UUID string: %s", str_value.c_str()); + } + auto* duckdb_data = duckdb::FlatVector::GetData(output_vector); + duckdb_data[row_in_batch] = uuid_value; } - auto* duckdb_data = duckdb::FlatVector::GetData(output_vector); - duckdb_data[row_in_batch] = uuid_value; } else { auto* duckdb_data = duckdb::FlatVector::GetData(output_vector); duckdb_data[row_in_batch] = diff --git a/cpp/deeplake_pg/extension_init.cpp b/cpp/deeplake_pg/extension_init.cpp index 9eab860165..c930c305f7 100644 --- a/cpp/deeplake_pg/extension_init.cpp +++ b/cpp/deeplake_pg/extension_init.cpp @@ -1299,9 +1299,9 @@ static void executor_run(QueryDesc* query_desc, ScanDirection direction, uint64 pg::query_info::cleanup(); pg::table_storage::instance().reset_requested_columns(); } catch (const std::exception& e) { - elog(WARNING, "Error during transaction cleanup: %s", e.what()); + // Silently handle cleanup errors } catch (...) { - elog(WARNING, "Unknown error during transaction cleanup"); + // Silently handle cleanup errors } PG_RE_THROW(); } @@ -1332,7 +1332,6 @@ static void executor_end(QueryDesc* query_desc) // Error occurred during flush - rollback and suppress to prevent cascade // This prevents "Deeplake does not support transaction aborts" cascade pg::table_storage::instance().rollback_all(); - elog(WARNING, "Failed to flush data during executor end, changes rolled back"); // Don't re-throw - let the transaction abort naturally FlushErrorState(); } diff --git a/cpp/deeplake_pg/reporter.hpp b/cpp/deeplake_pg/reporter.hpp index a1ad0088bb..cf25ffdaed 100644 --- a/cpp/deeplake_pg/reporter.hpp +++ b/cpp/deeplake_pg/reporter.hpp @@ -1,9 +1,5 @@ #pragma once -#include -#include -#include - #include #include #include @@ -12,27 +8,8 @@ namespace pg { extern bool print_runtime_stats; -inline static void print_memory_report() -{ - base::memory_info info; - - base::system_report::get_meminfo(info); - const auto msg = fmt::format("Memory Report:\n" - "RSS: ({:.2f} MB)" - ", VM: ({:.2f} MB)" - ", Peak: ({:.2f} MB)", - info.process_vm_rss / (1024.0 * 1024.0), - info.process_vm_size / (1024.0 * 1024.0), - info.process_peak_mem / (1024.0 * 1024.0)); - elog(INFO, "%s", msg.c_str()); -} - inline static void signal_handler(int signal) { - elog(NOTICE, "Caught signal %d (%s)\n", signal, strsignal(signal)); - elog(NOTICE, "%s", base::backtrace().c_str()); - print_memory_report(); - fflush(stderr); _exit(signal); } @@ -61,22 +38,7 @@ struct runtime_printer } } - ~runtime_printer() - { - if (pg::print_runtime_stats) { - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration(end_time - start_time_).count(); - std::string period_name = "ms"; - if constexpr (std::is_same_v) { - period_name = "µs"; - } else if constexpr (std::is_same_v) { - period_name = "ns"; - } else if constexpr (std::is_same_v>) { - period_name = "s"; - } - elog(INFO, "%s: %.2f %s", task_name_.data(), duration, period_name.data()); - } - } + ~runtime_printer() = default; private: std::string_view task_name_; diff --git a/postgres/tests/py_tests/test_uuid.py b/postgres/tests/py_tests/test_uuid.py index e6816e05eb..f0a1689e29 100644 --- a/postgres/tests/py_tests/test_uuid.py +++ b/postgres/tests/py_tests/test_uuid.py @@ -79,3 +79,79 @@ async def test_uuid_type(db_conn: asyncpg.Connection): finally: # Cleanup await db_conn.execute("DROP TABLE IF EXISTS people CASCADE") + + +@pytest.mark.asyncio +async def test_uuid_empty_string_handling(db_conn: asyncpg.Connection): + """ + Test that empty strings in UUID columns are handled correctly. + + This test verifies the fix for the issue where adding a UUID column + to a table with existing rows would cause "Failed to parse UUID string:" + error when querying. Empty strings stored in deeplake are treated as NULL. + + Tests the exact scenario: + 1. Create table without UUID column + 2. Add UUID column (existing rows get NULL/empty values) + 3. Query table (should not crash) + 4. Insert row with NULL UUID + 5. Query multiple times + """ + assertions = Assertions(db_conn) + + try: + # Request 1: Create table uuid_test + await db_conn.execute(""" + CREATE TABLE uuid_test ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL + ) USING deeplake + """) + + # Request 5: Query on uuid_test table + rows = await db_conn.fetch( + "SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0" + ) + assert len(rows) == 0, f"Expected 0 rows, got {len(rows)}" + + # Request 6: Add UUID column to uuid_test + await db_conn.execute(""" + ALTER TABLE uuid_test ADD COLUMN uu UUID + """) + + # Request 8: Query uuid_test after schema change + # This would previously crash with: ERROR: Failed to parse UUID string: + rows = await db_conn.fetch( + "SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0" + ) + assert len(rows) == 0, f"Expected 0 rows after adding UUID column, got {len(rows)}" + + # Request 9: Insert row with empty name and NULL UUID + await db_conn.execute(""" + INSERT INTO uuid_test (name, uu) VALUES ('', NULL) + """) + + # Request 11: Query uuid_test after insert + rows = await db_conn.fetch( + "SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0" + ) + assert len(rows) == 1, f"Expected 1 row after insert, got {len(rows)}" + assert rows[0]['uu'] is None, "UUID should be NULL" + + # Request 12: Query uuid_test again + rows = await db_conn.fetch( + "SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0" + ) + assert len(rows) == 1, f"Expected 1 row, got {len(rows)}" + + # Request 13: Query uuid_test again + rows = await db_conn.fetch( + "SELECT * FROM (SELECT * FROM uuid_test ORDER BY id) LIMIT 20 OFFSET 0" + ) + assert len(rows) == 1, f"Expected 1 row, got {len(rows)}" + + print("✓ Test passed: UUID empty string handling works correctly") + + finally: + # Cleanup + await db_conn.execute("DROP TABLE IF EXISTS uuid_test CASCADE")