Skip to content

Commit 3a46d98

Browse files
sambhavclaude
andcommitted
Fix purge_deleted() to only delete tombstones on PostgreSQL backend
The purge_deleted() method's base class contract and docstring specify that it deletes "tombstones" (soft-deleted objects). The memory backend correctly operates only on tombstone storage (self._cemetery). However, the PostgreSQL backend's DELETE queries lacked a deleted = TRUE filter, meaning purge_deleted with a before timestamp or max_retained would delete both live records and tombstones. This can result in silent data loss when running the purge_deleted maintenance script. What changed: - Added AND deleted = TRUE to the before-path DELETE query - Added WHERE deleted = TRUE to the max_retained-path CTE so the ranking window only considers tombstones - Added three test cases that create live records + tombstones, call purge_deleted, and verify only tombstones are removed Test coverage: - test_purge_deleted_with_before_only_deletes_tombstones: Tests the before parameter path with mixed live/deleted objects - test_purge_deleted_with_max_retained_only_affects_tombstones: Tests the max_retained parameter with 3 live + 5 tombstones - test_purge_deleted_without_before_only_deletes_tombstones: Tests purging all tombstones while preserving live records Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent e57fe60 commit 3a46d98

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

kinto/core/storage/postgresql/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ def purge_deleted(
607607
FROM objects
608608
WHERE {parent_id_filter}
609609
{resource_name_filter}
610+
AND deleted = TRUE
610611
{conditions_filter}
611612
"""
612613

@@ -625,6 +626,7 @@ def purge_deleted(
625626
ORDER BY last_modified DESC
626627
) AS rn
627628
FROM objects
629+
WHERE deleted = TRUE
628630
)
629631
DELETE FROM objects
630632
WHERE id IN (

kinto/core/storage/testing.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,104 @@ def test_purge_deleted_remove_with_max_count_per_collection(self):
14041404
objects = self.storage.list_all(include_deleted=True, **cid1_other_kw)
14051405
self.assertEqual(len(objects), 5)
14061406

1407+
def test_purge_deleted_with_before_only_deletes_tombstones(self):
1408+
# Create live records
1409+
live1 = self.create_object()
1410+
time.sleep(0.001)
1411+
live2 = self.create_object()
1412+
time.sleep(0.001)
1413+
1414+
# Create tombstones
1415+
deleted1 = self.create_object()
1416+
self.storage.delete(object_id=deleted1["id"], **self.storage_kw)
1417+
time.sleep(0.001)
1418+
deleted2 = self.create_object()
1419+
self.storage.delete(object_id=deleted2["id"], **self.storage_kw)
1420+
1421+
# Verify we have 2 live + 2 deleted
1422+
all_objects = self.storage.list_all(include_deleted=True, **self.storage_kw)
1423+
self.assertEqual(len(all_objects), 4)
1424+
live_objects = self.storage.list_all(include_deleted=False, **self.storage_kw)
1425+
self.assertEqual(len(live_objects), 2)
1426+
1427+
# Purge tombstones older than the most recent deletion
1428+
before_timestamp = max(deleted1["last_modified"], deleted2["last_modified"])
1429+
num_removed = self.storage.purge_deleted(before=before_timestamp, **self.storage_kw)
1430+
1431+
# Should only remove one tombstone (the older one)
1432+
self.assertEqual(num_removed, 1)
1433+
1434+
# Verify live records are still there
1435+
live_objects_after = self.storage.list_all(include_deleted=False, **self.storage_kw)
1436+
self.assertEqual(len(live_objects_after), 2)
1437+
live_ids = {obj["id"] for obj in live_objects_after}
1438+
self.assertEqual(live_ids, {live1["id"], live2["id"]})
1439+
1440+
# Verify one tombstone remains
1441+
all_objects_after = self.storage.list_all(include_deleted=True, **self.storage_kw)
1442+
self.assertEqual(len(all_objects_after), 3) # 2 live + 1 tombstone
1443+
1444+
def test_purge_deleted_with_max_retained_only_affects_tombstones(self):
1445+
# Create live records
1446+
for _ in range(3):
1447+
self.create_object()
1448+
time.sleep(0.001)
1449+
1450+
# Create tombstones
1451+
for _ in range(5):
1452+
obj = self.create_object()
1453+
self.storage.delete(object_id=obj["id"], **self.storage_kw)
1454+
time.sleep(0.001)
1455+
1456+
# Verify we have 3 live + 5 deleted
1457+
all_objects = self.storage.list_all(include_deleted=True, **self.storage_kw)
1458+
self.assertEqual(len(all_objects), 8)
1459+
live_objects = self.storage.list_all(include_deleted=False, **self.storage_kw)
1460+
self.assertEqual(len(live_objects), 3)
1461+
1462+
# Purge tombstones, keeping only 2 most recent
1463+
num_removed = self.storage.purge_deleted(max_retained=2, **self.storage_kw)
1464+
1465+
# Should remove 3 tombstones (5 total - 2 retained)
1466+
self.assertEqual(num_removed, 3)
1467+
1468+
# Verify all live records are still there
1469+
live_objects_after = self.storage.list_all(include_deleted=False, **self.storage_kw)
1470+
self.assertEqual(len(live_objects_after), 3)
1471+
1472+
# Verify only 2 tombstones remain
1473+
all_objects_after = self.storage.list_all(include_deleted=True, **self.storage_kw)
1474+
self.assertEqual(len(all_objects_after), 5) # 3 live + 2 tombstones
1475+
1476+
def test_purge_deleted_without_before_only_deletes_tombstones(self):
1477+
# Create mix of live and deleted objects
1478+
live1 = self.create_object()
1479+
time.sleep(0.001)
1480+
1481+
deleted1 = self.create_object()
1482+
self.storage.delete(object_id=deleted1["id"], **self.storage_kw)
1483+
time.sleep(0.001)
1484+
1485+
live2 = self.create_object()
1486+
time.sleep(0.001)
1487+
1488+
deleted2 = self.create_object()
1489+
self.storage.delete(object_id=deleted2["id"], **self.storage_kw)
1490+
1491+
# Verify we have 2 live + 2 deleted
1492+
all_objects = self.storage.list_all(include_deleted=True, **self.storage_kw)
1493+
self.assertEqual(len(all_objects), 4)
1494+
1495+
# Purge all tombstones
1496+
num_removed = self.storage.purge_deleted(**self.storage_kw)
1497+
self.assertEqual(num_removed, 2)
1498+
1499+
# Verify only live records remain
1500+
all_objects_after = self.storage.list_all(include_deleted=True, **self.storage_kw)
1501+
self.assertEqual(len(all_objects_after), 2)
1502+
live_ids = {obj["id"] for obj in all_objects_after}
1503+
self.assertEqual(live_ids, {live1["id"], live2["id"]})
1504+
14071505
#
14081506
# Sorting
14091507
#

0 commit comments

Comments
 (0)