1+ -- Migration 008: Clean up invalid data before applying stricter constraints in migration 009
2+ -- This migration removes or fixes data that would violate constraints introduced in the next migration
3+
4+ BEGIN ;
5+
6+ -- Log what we're about to clean up for audit purposes
7+ DO $$
8+ DECLARE
9+ invalid_name_count INTEGER ;
10+ empty_version_count INTEGER ;
11+ invalid_status_count INTEGER ;
12+ duplicate_count INTEGER ;
13+ BEGIN
14+ -- Count servers with invalid name format
15+ SELECT COUNT (* ) INTO invalid_name_count
16+ FROM servers
17+ WHERE value- >> ' name' NOT SIMILAR TO ' [a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]' ;
18+
19+ -- Count servers with empty or NULL versions
20+ SELECT COUNT (* ) INTO empty_version_count
21+ FROM servers
22+ WHERE value- >> ' version' IS NULL OR value- >> ' version' = ' ' ;
23+
24+ -- Count servers with invalid status
25+ SELECT COUNT (* ) INTO invalid_status_count
26+ FROM servers
27+ WHERE value- >> ' status' IS NOT NULL
28+ AND value- >> ' status' != ' '
29+ AND value- >> ' status' NOT IN (' active' , ' deprecated' , ' deleted' );
30+
31+ -- Count duplicate name+version combinations
32+ SELECT COUNT (* ) INTO duplicate_count
33+ FROM (
34+ SELECT value- >> ' name' , value- >> ' version' , COUNT (* ) as cnt
35+ FROM servers
36+ GROUP BY value- >> ' name' , value- >> ' version'
37+ HAVING COUNT (* ) > 1
38+ ) dups;
39+
40+ -- Log the cleanup operations
41+ IF invalid_name_count > 0 OR empty_version_count > 0 THEN
42+ RAISE NOTICE ' Deleting % servers with invalid names and % servers with empty versions' ,
43+ invalid_name_count, empty_version_count;
44+ END IF;
45+
46+ IF invalid_status_count > 0 THEN
47+ RAISE NOTICE ' Fixing % servers with invalid status values (changing to ' ' active' ' )' ,
48+ invalid_status_count;
49+ END IF;
50+
51+ IF duplicate_count > 0 THEN
52+ RAISE NOTICE ' Found % duplicate name+version combinations to clean up' , duplicate_count;
53+ END IF;
54+ END $$;
55+
56+ -- Delete servers with invalid names or empty versions
57+ -- These cannot be reasonably fixed and would violate primary key constraints
58+ DELETE FROM servers
59+ WHERE value- >> ' name' NOT SIMILAR TO ' [a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]'
60+ OR value- >> ' version' IS NULL
61+ OR value- >> ' version' = ' ' ;
62+
63+ -- Fix invalid status values by setting them to 'active'
64+ -- These can be reasonably defaulted to a valid value
65+ UPDATE servers
66+ SET value = jsonb_set(value, ' {status}' , ' "active"' )
67+ WHERE value- >> ' status' IS NOT NULL
68+ AND value- >> ' status' != ' '
69+ AND value- >> ' status' NOT IN (' active' , ' deprecated' , ' deleted' );
70+
71+ -- Remove duplicate name+version combinations
72+ -- Keep the one with the highest version_id (most recently added)
73+ DELETE FROM servers s1
74+ WHERE EXISTS (
75+ SELECT 1 FROM servers s2
76+ WHERE s2 .value - >> ' name' = s1 .value - >> ' name'
77+ AND s2 .value - >> ' version' = s1 .value - >> ' version'
78+ AND s2 .version_id > s1 .version_id
79+ );
80+
81+ -- Log completion
82+ DO $$
83+ DECLARE
84+ remaining_count INTEGER ;
85+ BEGIN
86+ SELECT COUNT (* ) INTO remaining_count FROM servers;
87+ RAISE NOTICE ' Data cleanup complete. % servers remaining in database.' , remaining_count;
88+ END $$;
89+
90+ COMMIT ;
0 commit comments