Skip to content

Commit e2bffb3

Browse files
authored
Fix UUID empty string handling in UPDATE operations (#3106)
1 parent 63fbb67 commit e2bffb3

File tree

3 files changed

+180
-0
lines changed

3 files changed

+180
-0
lines changed

cpp/deeplake_pg/nd_utils.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,11 @@ inline Datum nd_to_datum(const nd::array& curr_val, Oid attr_typeid, int32_t typ
662662
}
663663
case UUIDOID: {
664664
auto str = base::string_view_cast(base::span_cast<const char>(curr_val.data()));
665+
// Treat empty string as NULL for UUID columns (same as duckdb_deeplake_scan.cpp)
666+
if (str.empty()) {
667+
// Return NULL datum - caller must set is_null flag appropriately
668+
return (Datum)0;
669+
}
665670
// CStringGetDatum expects a null-terminated string, hence copy the string
666671
std::string str_copy(str.data(), str.size());
667672
Datum uuid = DirectFunctionCall1(uuid_in, CStringGetDatum(str_copy.c_str()));

cpp/deeplake_pg/table_scan_impl.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ inline std::pair<Datum, bool> table_scan::get_datum(int32_t column_number, int64
115115
}
116116
case UUIDOID: {
117117
auto str = table_data_.get_streamers().value<std::string_view>(column_number, row_number);
118+
// Treat empty string as NULL for UUID columns (same as duckdb_deeplake_scan.cpp)
119+
if (str.empty()) {
120+
return {(Datum)0, true};
121+
}
118122
std::string str_copy(str.data(), str.size());
119123
Datum uuid = DirectFunctionCall1(uuid_in, CStringGetDatum(str_copy.c_str()));
120124
return {uuid, false};
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
Test for UUID NULL value bug during UPDATE operations.
3+
4+
Bug: When a table has a UUID column with NULL values and another column (e.g., bool),
5+
attempting to update the non-UUID column fails with "invalid value for type uuid".
6+
7+
Reproduction steps:
8+
1. Create table with UUID column and bool column using DEEPLAKE access method
9+
2. Add rows with NULL UUID values
10+
3. Update the bool column value
11+
4. Expected: Update should succeed
12+
5. Actual: Fails with "invalid value for type uuid"
13+
"""
14+
import pytest
15+
import asyncpg
16+
from test_utils.assertions import Assertions
17+
18+
19+
@pytest.mark.asyncio
20+
async def test_uuid_null_update_bug(db_conn: asyncpg.Connection):
21+
"""
22+
Test that updating non-UUID columns works when UUID column contains NULL values.
23+
24+
This reproduces the bug where updating a bool column fails when there's a
25+
UUID column with NULL values in the same table.
26+
"""
27+
assertions = Assertions(db_conn)
28+
29+
try:
30+
# Step 1: Create table with UUID and bool columns using DEEPLAKE access method
31+
await db_conn.execute("""
32+
CREATE TABLE test_uuid_bool (
33+
id SERIAL PRIMARY KEY,
34+
uuid_col UUID,
35+
bool_col BOOL
36+
) USING deeplake
37+
""")
38+
print("✓ Created table with UUID and bool columns")
39+
40+
# Step 2: Add rows with NULL/empty values
41+
await db_conn.execute("""
42+
INSERT INTO test_uuid_bool (uuid_col, bool_col) VALUES
43+
(NULL, NULL),
44+
(NULL, FALSE),
45+
(NULL, TRUE)
46+
""")
47+
print("✓ Inserted rows with NULL UUID values")
48+
49+
# Verify initial state
50+
rows = await db_conn.fetch("SELECT * FROM test_uuid_bool ORDER BY id")
51+
assert len(rows) == 3, f"Expected 3 rows, got {len(rows)}"
52+
assert all(row['uuid_col'] is None for row in rows), "All UUID values should be NULL"
53+
print(f"✓ Verified initial state: {len(rows)} rows with NULL UUIDs")
54+
55+
# Step 3: Attempt to update the bool column
56+
# This is where the bug occurs - it should succeed but fails with
57+
# "invalid value for type uuid"
58+
try:
59+
await db_conn.execute("""
60+
UPDATE test_uuid_bool
61+
SET bool_col = TRUE
62+
WHERE id = 1
63+
""")
64+
print("✓ Successfully updated bool column (bug is fixed!)")
65+
except Exception as e:
66+
print(f"✗ Failed to update bool column: {e}")
67+
raise AssertionError(f"Update failed with error: {e}. This is the bug we're testing for.")
68+
69+
# Verify the update worked
70+
row = await db_conn.fetchrow("SELECT * FROM test_uuid_bool WHERE id = 1")
71+
assert row['bool_col'] is True, "bool_col should be TRUE after update"
72+
assert row['uuid_col'] is None, "uuid_col should still be NULL"
73+
print("✓ Update succeeded and values are correct")
74+
75+
# Test additional update scenarios
76+
await db_conn.execute("""
77+
UPDATE test_uuid_bool
78+
SET bool_col = FALSE
79+
WHERE uuid_col IS NULL
80+
""")
81+
print("✓ Batch update with NULL UUID condition succeeded")
82+
83+
# Verify all bool values are now FALSE
84+
rows = await db_conn.fetch("SELECT * FROM test_uuid_bool ORDER BY id")
85+
assert all(row['bool_col'] is False for row in rows), "All bool values should be FALSE"
86+
print("✓ Batch update worked correctly")
87+
88+
finally:
89+
# Cleanup
90+
await db_conn.execute("DROP TABLE IF EXISTS test_uuid_bool CASCADE")
91+
92+
93+
@pytest.mark.asyncio
94+
async def test_uuid_null_multiple_columns_update(db_conn: asyncpg.Connection):
95+
"""
96+
Test updating multiple different column types when UUID column has NULL values.
97+
98+
This is a more comprehensive test covering multiple data types.
99+
"""
100+
try:
101+
# Create table with UUID and various other column types
102+
await db_conn.execute("""
103+
CREATE TABLE test_uuid_multi (
104+
id SERIAL PRIMARY KEY,
105+
uuid_col UUID,
106+
bool_col BOOL,
107+
int_col INT,
108+
text_col TEXT
109+
) USING deeplake
110+
""")
111+
print("✓ Created table with UUID and multiple column types")
112+
113+
# Insert rows with NULL UUID values
114+
await db_conn.execute("""
115+
INSERT INTO test_uuid_multi (uuid_col, bool_col, int_col, text_col) VALUES
116+
(NULL, NULL, NULL, NULL),
117+
(NULL, TRUE, 42, 'test'),
118+
('550e8400-e29b-41d4-a716-446655440000'::uuid, FALSE, 100, 'with_uuid')
119+
""")
120+
print("✓ Inserted rows with mixed UUID values (NULL and non-NULL)")
121+
122+
# Test updating bool column where UUID is NULL
123+
await db_conn.execute("""
124+
UPDATE test_uuid_multi
125+
SET bool_col = TRUE
126+
WHERE id = 1
127+
""")
128+
print("✓ Updated bool column for row with NULL UUID")
129+
130+
# Test updating int column where UUID is NULL
131+
await db_conn.execute("""
132+
UPDATE test_uuid_multi
133+
SET int_col = 999
134+
WHERE id = 1
135+
""")
136+
print("✓ Updated int column for row with NULL UUID")
137+
138+
# Test updating text column where UUID is NULL
139+
await db_conn.execute("""
140+
UPDATE test_uuid_multi
141+
SET text_col = 'updated'
142+
WHERE id = 1
143+
""")
144+
print("✓ Updated text column for row with NULL UUID")
145+
146+
# Test updating multiple columns at once
147+
await db_conn.execute("""
148+
UPDATE test_uuid_multi
149+
SET bool_col = FALSE, int_col = 777, text_col = 'multi_update'
150+
WHERE id = 2
151+
""")
152+
print("✓ Updated multiple columns at once for row with NULL UUID")
153+
154+
# Verify all updates worked correctly
155+
row = await db_conn.fetchrow("SELECT * FROM test_uuid_multi WHERE id = 1")
156+
assert row['bool_col'] is True, "bool_col should be TRUE"
157+
assert row['int_col'] == 999, "int_col should be 999"
158+
assert row['text_col'] == 'updated', "text_col should be 'updated'"
159+
assert row['uuid_col'] is None, "uuid_col should still be NULL"
160+
161+
row = await db_conn.fetchrow("SELECT * FROM test_uuid_multi WHERE id = 2")
162+
assert row['bool_col'] is False, "bool_col should be FALSE"
163+
assert row['int_col'] == 777, "int_col should be 777"
164+
assert row['text_col'] == 'multi_update', "text_col should be 'multi_update'"
165+
assert row['uuid_col'] is None, "uuid_col should still be NULL"
166+
167+
print("✓ All updates verified correctly")
168+
169+
finally:
170+
# Cleanup
171+
await db_conn.execute("DROP TABLE IF EXISTS test_uuid_multi CASCADE")

0 commit comments

Comments
 (0)