Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cpp/deeplake_pg/nd_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,11 @@ inline Datum nd_to_datum(const nd::array& curr_val, Oid attr_typeid, int32_t typ
}
case UUIDOID: {
auto str = base::string_view_cast(base::span_cast<const char>(curr_val.data()));
// Treat empty string as NULL for UUID columns (same as duckdb_deeplake_scan.cpp)
if (str.empty()) {
// Return NULL datum - caller must set is_null flag appropriately
return (Datum)0;
}
Comment on lines +665 to +669
Copy link

Copilot AI Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function returns a NULL datum but relies on the caller to set the is_null flag appropriately. This creates an ambiguous return value where (Datum)0 could represent either NULL or a valid zero datum. Consider returning a std::pair<Datum, bool> similar to table_scan::get_datum to explicitly communicate the null status, or document the expected caller behavior more clearly in the function signature.

Copilot uses AI. Check for mistakes.
// CStringGetDatum expects a null-terminated string, hence copy the string
std::string str_copy(str.data(), str.size());
Datum uuid = DirectFunctionCall1(uuid_in, CStringGetDatum(str_copy.c_str()));
Expand Down
4 changes: 4 additions & 0 deletions cpp/deeplake_pg/table_scan_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ inline std::pair<Datum, bool> table_scan::get_datum(int32_t column_number, int64
}
case UUIDOID: {
auto str = table_data_.get_streamers().value<std::string_view>(column_number, row_number);
// Treat empty string as NULL for UUID columns (same as duckdb_deeplake_scan.cpp)
if (str.empty()) {
return {(Datum)0, true};
}
std::string str_copy(str.data(), str.size());
Datum uuid = DirectFunctionCall1(uuid_in, CStringGetDatum(str_copy.c_str()));
return {uuid, false};
Expand Down
171 changes: 171 additions & 0 deletions postgres/tests/py_tests/test_uuid_null_update_bug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""
Test for UUID NULL value bug during UPDATE operations.

Bug: When a table has a UUID column with NULL values and another column (e.g., bool),
attempting to update the non-UUID column fails with "invalid value for type uuid".

Reproduction steps:
1. Create table with UUID column and bool column using DEEPLAKE access method
2. Add rows with NULL UUID values
3. Update the bool column value
4. Expected: Update should succeed
5. Actual: Fails with "invalid value for type uuid"
"""
import pytest
import asyncpg
from test_utils.assertions import Assertions


@pytest.mark.asyncio
async def test_uuid_null_update_bug(db_conn: asyncpg.Connection):
"""
Test that updating non-UUID columns works when UUID column contains NULL values.

This reproduces the bug where updating a bool column fails when there's a
UUID column with NULL values in the same table.
"""
assertions = Assertions(db_conn)

try:
# Step 1: Create table with UUID and bool columns using DEEPLAKE access method
await db_conn.execute("""
CREATE TABLE test_uuid_bool (
id SERIAL PRIMARY KEY,
uuid_col UUID,
bool_col BOOL
) USING deeplake
""")
print("✓ Created table with UUID and bool columns")

# Step 2: Add rows with NULL/empty values
await db_conn.execute("""
INSERT INTO test_uuid_bool (uuid_col, bool_col) VALUES
(NULL, NULL),
(NULL, FALSE),
(NULL, TRUE)
""")
print("✓ Inserted rows with NULL UUID values")

# Verify initial state
rows = await db_conn.fetch("SELECT * FROM test_uuid_bool ORDER BY id")
assert len(rows) == 3, f"Expected 3 rows, got {len(rows)}"
assert all(row['uuid_col'] is None for row in rows), "All UUID values should be NULL"
print(f"✓ Verified initial state: {len(rows)} rows with NULL UUIDs")

# Step 3: Attempt to update the bool column
# This is where the bug occurs - it should succeed but fails with
# "invalid value for type uuid"
try:
await db_conn.execute("""
UPDATE test_uuid_bool
SET bool_col = TRUE
WHERE id = 1
""")
print("✓ Successfully updated bool column (bug is fixed!)")
except Exception as e:
print(f"✗ Failed to update bool column: {e}")
raise AssertionError(f"Update failed with error: {e}. This is the bug we're testing for.")

# Verify the update worked
row = await db_conn.fetchrow("SELECT * FROM test_uuid_bool WHERE id = 1")
assert row['bool_col'] is True, "bool_col should be TRUE after update"
assert row['uuid_col'] is None, "uuid_col should still be NULL"
print("✓ Update succeeded and values are correct")

# Test additional update scenarios
await db_conn.execute("""
UPDATE test_uuid_bool
SET bool_col = FALSE
WHERE uuid_col IS NULL
""")
print("✓ Batch update with NULL UUID condition succeeded")

# Verify all bool values are now FALSE
rows = await db_conn.fetch("SELECT * FROM test_uuid_bool ORDER BY id")
assert all(row['bool_col'] is False for row in rows), "All bool values should be FALSE"
print("✓ Batch update worked correctly")

finally:
# Cleanup
await db_conn.execute("DROP TABLE IF EXISTS test_uuid_bool CASCADE")


@pytest.mark.asyncio
async def test_uuid_null_multiple_columns_update(db_conn: asyncpg.Connection):
"""
Test updating multiple different column types when UUID column has NULL values.

This is a more comprehensive test covering multiple data types.
"""
try:
# Create table with UUID and various other column types
await db_conn.execute("""
CREATE TABLE test_uuid_multi (
id SERIAL PRIMARY KEY,
uuid_col UUID,
bool_col BOOL,
int_col INT,
text_col TEXT
) USING deeplake
""")
print("✓ Created table with UUID and multiple column types")

# Insert rows with NULL UUID values
await db_conn.execute("""
INSERT INTO test_uuid_multi (uuid_col, bool_col, int_col, text_col) VALUES
(NULL, NULL, NULL, NULL),
(NULL, TRUE, 42, 'test'),
('550e8400-e29b-41d4-a716-446655440000'::uuid, FALSE, 100, 'with_uuid')
""")
print("✓ Inserted rows with mixed UUID values (NULL and non-NULL)")

# Test updating bool column where UUID is NULL
await db_conn.execute("""
UPDATE test_uuid_multi
SET bool_col = TRUE
WHERE id = 1
""")
print("✓ Updated bool column for row with NULL UUID")

# Test updating int column where UUID is NULL
await db_conn.execute("""
UPDATE test_uuid_multi
SET int_col = 999
WHERE id = 1
""")
print("✓ Updated int column for row with NULL UUID")

# Test updating text column where UUID is NULL
await db_conn.execute("""
UPDATE test_uuid_multi
SET text_col = 'updated'
WHERE id = 1
""")
print("✓ Updated text column for row with NULL UUID")

# Test updating multiple columns at once
await db_conn.execute("""
UPDATE test_uuid_multi
SET bool_col = FALSE, int_col = 777, text_col = 'multi_update'
WHERE id = 2
""")
print("✓ Updated multiple columns at once for row with NULL UUID")

# Verify all updates worked correctly
row = await db_conn.fetchrow("SELECT * FROM test_uuid_multi WHERE id = 1")
assert row['bool_col'] is True, "bool_col should be TRUE"
assert row['int_col'] == 999, "int_col should be 999"
assert row['text_col'] == 'updated', "text_col should be 'updated'"
assert row['uuid_col'] is None, "uuid_col should still be NULL"

row = await db_conn.fetchrow("SELECT * FROM test_uuid_multi WHERE id = 2")
assert row['bool_col'] is False, "bool_col should be FALSE"
assert row['int_col'] == 777, "int_col should be 777"
assert row['text_col'] == 'multi_update', "text_col should be 'multi_update'"
assert row['uuid_col'] is None, "uuid_col should still be NULL"

print("✓ All updates verified correctly")

finally:
# Cleanup
await db_conn.execute("DROP TABLE IF EXISTS test_uuid_multi CASCADE")