dump/restore revival (#155)

3nids · web-flow · commit d99e127913f2 · 2026-01-21T21:05:16.000+01:00
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -79,6 +79,10 @@ jobs:
             exit 1
           fi
 
+      - name: Run dump and restore tests
+        shell: bash
+        run: ./test/test_dump_restore.sh
+
       # - name: Run migrations tests
       #   shell: bash
       #   run: ./test/test_pum.sh
diff --git a/.github/workflows/windows-tests.yml b/.github/workflows/windows-tests.yml
@@ -73,6 +73,10 @@ jobs:
       - name: Run base tests
         run: nose2 -v
 
+      - name: Run dump and restore tests
+        shell: bash
+        run: ./test/test_dump_restore.sh
+
       # - name: Run migrations tests
       #   shell: bash
       #   run: ./test/test_pum.sh
diff --git a/pum/checker.py b/pum/checker.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
+import re
 
 import psycopg
 
@@ -339,9 +340,23 @@ def check_constraints(self):
                     ORDER BY n.nspname, cl.relname, c.conname
                     """
 
+        # Normalization function for constraint records
+        def normalize_constraint_record(record_dict, col_names):
+            """Normalize constraint definitions in a record."""
+            normalized = record_dict.copy()
+            if "constraint_definition" in normalized and normalized["constraint_definition"]:
+                normalized["constraint_definition"] = self.__normalize_constraint_definition(
+                    normalized["constraint_definition"]
+                )
+            return normalized
+
         # Execute both queries and combine results
-        passed_keys, diffs_keys = self.__check_equals(key_query)
-        passed_checks, diffs_checks = self.__check_equals(check_query)
+        passed_keys, diffs_keys = self.__check_equals(
+            key_query, normalize_func=normalize_constraint_record
+        )
+        passed_checks, diffs_checks = self.__check_equals(
+            check_query, normalize_func=normalize_constraint_record
+        )
 
         return (passed_keys and passed_checks, diffs_keys + diffs_checks)
 
@@ -527,11 +542,57 @@ def check_rules(self):
 
         return self.__check_equals(query)
 
-    def __check_equals(self, query) -> tuple[bool, list[DifferenceItem]]:
+    @staticmethod
+    def __normalize_constraint_definition(definition: str) -> str:
+        """Normalize a constraint definition for comparison.
+
+        PostgreSQL may represent functionally equivalent constraints differently,
+        especially after dump/restore operations. This function normalizes common
+        variations to enable accurate comparison.
+
+        Args:
+            definition: The constraint definition string from pg_get_constraintdef()
+
+        Returns:
+            Normalized constraint definition
+        """
+        if not definition:
+            return definition
+
+        # Normalize different ARRAY representations:
+        # Before: (ARRAY['a'::type, 'b'::type])::type[] OR ARRAY[('a'::type)::text, ...]
+        # After: Canonical form based on sorted elements
+
+        # Strategy: Extract the constraint type and key values, ignoring formatting details
+        # For ANY/ALL with arrays, extract just the operator and the array values
+
+        # Remove extra parentheses around ARRAY expressions
+        # (ARRAY[...])::type[] -> ARRAY[...]::type[]
+        definition = re.sub(r"\(\(ARRAY\[(.*?)\]\)::(.*?)\[\]\)", r"ARRAY[\1]::\2[]", definition)
+
+        # Also remove parentheses without cast: (ARRAY[...]) -> ARRAY[...]
+        definition = re.sub(r"\(ARRAY\[([^\]]+)\]\)", r"ARRAY[\1]", definition)
+
+        # Normalize array element casts: ('value'::type1)::type2 -> 'value'::type1
+        # This handles the case where elements are double-cast
+        definition = re.sub(r"\('([^']+)'::([^)]+)\)::(\w+)", r"'\1'::\2", definition)
+
+        # Remove trailing array cast that may be present or absent: ::text[] or ::character varying[]
+        # This is safe because the type information is already in each array element
+        definition = re.sub(r"::(?:text|character varying)\[\]", "", definition)
+
+        # Remove extra whitespace and normalize spacing
+        definition = re.sub(r"\s+", " ", definition).strip()
+
+        return definition
+
+    def __check_equals(self, query, normalize_func=None) -> tuple[bool, list[DifferenceItem]]:
         """Check if the query results on the two databases are equals.
 
         Args:
             query: The SQL query to execute on both databases.
+            normalize_func: Optional function to normalize specific fields in records.
+                Should accept (dict, col_names) and return normalized dict.
 
         Returns:
             tuple: A tuple containing:
@@ -554,6 +615,14 @@ def __check_equals(self, query) -> tuple[bool, list[DifferenceItem]]:
         structured1 = [dict(zip(col_names, record)) for record in records1]
         structured2 = [dict(zip(col_names, record)) for record in records2]
 
+        # Apply normalization if provided
+        if normalize_func:
+            structured1 = [normalize_func(r, col_names) for r in structured1]
+            structured2 = [normalize_func(r, col_names) for r in structured2]
+            # Recreate records from normalized structured data
+            records1 = [tuple(r[col] for col in col_names) for r in structured1]
+            records2 = [tuple(r[col] for col in col_names) for r in structured2]
+
         # Create sets for comparison
         set1 = {str(tuple(r)) for r in records1}
         set2 = {str(tuple(r)) for r in records2}
diff --git a/pum/cli.py b/pum/cli.py
@@ -17,7 +17,7 @@
 from .upgrader import Upgrader
 from .parameter import ParameterType
 from .schema_migrations import SchemaMigrations
-from .dumper import DumpFormat
+from .dumper import DumpFormat, Dumper
 
 
 def setup_logging(verbosity: int = 0):
@@ -408,7 +408,6 @@ def cli() -> int:  # noqa: PLR0912
                     raise ValueError(f"Unsupported parameter type for {p[0]}: {param.type}")
             logger.debug(f"Parameters: {parameters}")
 
-        pum = Pum(args.pg_connection, config)
         exit_code = 0
 
         if args.command == "info":
@@ -465,9 +464,21 @@ def cli() -> int:  # noqa: PLR0912
                     logger.error(f"Unknown action: {args.action}")
                     exit_code = 1
         elif args.command == "dump":
-            pass
+            dumper = Dumper(args.pg_connection, args.file)
+            dumper.pg_dump(
+                exclude_schema=args.exclude_schema or [],
+                format=args.format,
+            )
+            logger.info(f"Database dumped to {args.file}")
         elif args.command == "restore":
-            pum.run_restore(args.pg_connection, args.file, args.x, args.exclude_schema)
+            dumper = Dumper(args.pg_connection, args.file)
+            try:
+                dumper.pg_restore(exclude_schema=args.exclude_schema or [])
+                logger.info(f"Database restored from {args.file}")
+            except Exception as e:
+                if not args.x:
+                    raise
+                logger.warning(f"Restore completed with errors (ignored): {e}")
         elif args.command == "baseline":
             sm = SchemaMigrations(config=config)
             if not sm.exists(connection=conn):
diff --git a/test/test_dump_restore.sh b/test/test_dump_restore.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+# Script to test the PUM dump and restore CLI commands
+# This script sets up a test database, dumps it, restores to another database,
+# and checks that they are identical
+
+set -e
+
+# Configuration
+PG_SERVICE1="pum_test"
+PG_SERVICE2="pum_test_2"
+TEST_DIR="test/data/checker_test"
+DUMP_FILE="/tmp/pum_test_dump.backup"
+
+echo "🔧 Testing PUM dump and restore commands..."
+echo ""
+
+# Clean databases
+echo "🧹 Cleaning test databases..."
+psql service=$PG_SERVICE1 -c "DROP SCHEMA IF EXISTS pum_test_checker CASCADE; DROP TABLE IF EXISTS public.pum_migrations;" 2>/dev/null || true
+psql service=$PG_SERVICE2 -c "DROP SCHEMA IF EXISTS pum_test_checker CASCADE; DROP TABLE IF EXISTS public.pum_migrations;" 2>/dev/null || true
+
+# Install version 1.1.0 on first database
+echo "📦 Installing version 1.1.0 on $PG_SERVICE1..."
+pum -p $PG_SERVICE1 -d $TEST_DIR install
+echo "✅ Installation complete"
+echo ""
+
+# Dump the first database
+echo "💾 Dumping $PG_SERVICE1 to $DUMP_FILE..."
+pum -p $PG_SERVICE1 -d $TEST_DIR dump -f custom -N public "$DUMP_FILE"
+echo "✅ Dump complete"
+echo ""
+
+# Check that dump file exists and has content
+if [ ! -f "$DUMP_FILE" ]; then
+    echo "❌ Dump file not created!"
+    exit 1
+fi
+
+FILE_SIZE=$(stat -f%z "$DUMP_FILE" 2>/dev/null || stat -c%s "$DUMP_FILE" 2>/dev/null)
+echo "📊 Dump file size: $FILE_SIZE bytes"
+echo ""
+
+# Restore to the second database
+echo "📥 Restoring dump to $PG_SERVICE2..."
+pum -p $PG_SERVICE2 -d $TEST_DIR restore -N public "$DUMP_FILE"
+echo "✅ Restore complete"
+echo ""
+
+# Run checker to verify databases are identical
+echo "🔍 Running checker to verify databases are identical..."
+set +e  # Don't exit on error for this command
+
+pum -p $PG_SERVICE1 -d $TEST_DIR check $PG_SERVICE2 -N public
+CHECKER_EXIT=$?
+
+set -e
+
+if [ $CHECKER_EXIT -eq 0 ]; then
+    echo "✅ SUCCESS! Databases are identical after dump and restore."
+    # Clean up dump file
+    rm -f "$DUMP_FILE"
+    exit 0
+elif [ $CHECKER_EXIT -eq 1 ]; then
+    echo "❌ FAIL! Differences found between databases after dump and restore."
+    # Keep dump file for inspection
+    echo "⚠️  Dump file kept at: $DUMP_FILE"
+    exit 1
+else
+    echo "❌ Checker failed with exit code $CHECKER_EXIT"
+    exit $CHECKER_EXIT
+fi