Skip to content

Commit bb73d04

Browse files
committed
refactor: logical restore file system redesign
1 parent 9228eaa commit bb73d04

File tree

17 files changed

+613
-507
lines changed

17 files changed

+613
-507
lines changed

CLAUDE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ Self-hosted database branching service for PostgreSQL databases.
77
- API server (Go, Gin, SQLite, OpenAPI): `cmd/server/main.go`
88
- Workers (asynq): `cmd/worker/main.go`
99
- Landing page(NextJS): `site`
10-
- Admin dashboard UI (Vite, React, TypeScript, Tailwind, shadcn): `cmd/worker/main.go`
10+
- Admin dashboard UI (Vite, React, TypeScript, Tailwind, shadcn): `web/`
1111

1212
## Key details
1313
- Tasks: `Makefile`
1414
- [MUST USE] OpenAPI generated APIs: `web/src/lib/openapi.ts`, `web/src/hooks/use-api.ts`. Never use `fetch` for API requests.
15-
- Cloudformation template for VM setup: `scripts/server_setup.sh`
15+
- VM setup script: `scripts/server_setup.sh`
1616
- Models: `internal/models/models.go`
1717
- API endpoints: `internal/server/server.go`

cmd/asynqmon/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ func main() {
2727

2828
log.Printf("Starting Asynqmon on :%s with Redis at %s", port, redisAddr)
2929
log.Fatal(http.ListenAndServe(":"+port, h))
30-
}
30+
}

cmd/worker/main.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ func main() {
6666
// Register task handlers
6767
mux := asynq.NewServeMux()
6868

69-
// pg_dump/restore workflow tasks (local execution)
70-
mux.HandleFunc(tasks.TypePgDumpRestoreExecute, func(ctx context.Context, t *asynq.Task) error {
71-
return workers.HandlePgDumpRestoreExecute(ctx, t, asynqClient, db, cfg, log)
69+
// Logical restore workflow tasks (local execution)
70+
mux.HandleFunc(tasks.TypeTriggerLogicalRestore, func(ctx context.Context, t *asynq.Task) error {
71+
return workers.HandleTriggerLogicalRestore(ctx, t, asynqClient, db, cfg, log)
7272
})
73-
mux.HandleFunc(tasks.TypePgDumpRestoreWaitComplete, func(ctx context.Context, t *asynq.Task) error {
74-
return workers.HandlePgDumpRestoreWaitComplete(ctx, t, asynqClient, db, log)
73+
mux.HandleFunc(tasks.TypeLogicalRestoreWaitComplete, func(ctx context.Context, t *asynq.Task) error {
74+
return workers.HandleLogicalRestoreWaitComplete(ctx, t, asynqClient, db, log)
7575
})
7676

7777
// Start refresh scheduler goroutine (checks every hour for instances needing refresh)

internal/anonymize/anonymize.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ import (
1313

1414
// TablePrimaryKey holds the primary key column for a table
1515
type TablePrimaryKey struct {
16-
Table string
17-
PKColumn string // Empty string means no PK found, will use ctid
16+
Table string
17+
PKColumn string // Empty string means no PK found, will use ctid
1818
}
1919

2020
// GenerateSQL generates anonymization SQL from rules
@@ -202,9 +202,9 @@ func quoteIdentifier(name string) string {
202202

203203
// ApplyParams contains parameters needed to apply anonymization rules
204204
type ApplyParams struct {
205-
DatabaseName string
205+
DatabaseName string
206206
PostgresVersion string
207-
PostgresPort int
207+
PostgresPort int
208208
}
209209

210210
// Apply loads and applies anonymization rules to a database

internal/branches/create-branch.sh

Lines changed: 65 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,88 +26,90 @@ echo "BRANCH_CREATION_STARTED=true"
2626

2727
# Input parameters
2828
BRANCH_NAME="{{.BranchName}}"
29-
DATASET_NAME="{{.DatasetName}}"
29+
DATASET_NAME="{{.DatasetName}}" # e.g., tank/restore_20250915120000
30+
RESTORE_PORT="{{.RestorePort}}" # Port of the restore's PostgreSQL cluster
3031
USER="{{.User}}"
3132
PASSWORD="{{.Password}}"
3233
PG_VERSION="{{.PgVersion}}"
3334
CUSTOM_POSTGRESQL_CONF="{{.CustomPostgresqlConf}}"
3435

3536
echo "DEBUG: Parameters loaded successfully"
36-
37-
# Derive PostgreSQL port from version (hardcoded ports: PG14→5414, PG15→5415, etc.)
38-
RESTORE_PORT="54${PG_VERSION}"
39-
echo "DEBUG: PostgreSQL version ${PG_VERSION}, using port ${RESTORE_PORT}"
37+
echo "DEBUG: PostgreSQL version ${PG_VERSION}, restore port ${RESTORE_PORT}"
38+
echo "DEBUG: Cloning from restore dataset: ${DATASET_NAME}"
4039

4140
# Configuration
4241
PORT_RANGE_START=15432
4342
PORT_RANGE_END=16432
4443

4544
BRANCH_MOUNTPOINT="/opt/branchd/${BRANCH_NAME}"
46-
# Branch PostgreSQL data directory (in 'main' subdirectory after ZFS clone)
47-
BRANCH_PGDATA="${BRANCH_MOUNTPOINT}/main"
45+
# Branch PostgreSQL data directory (in 'data' subdirectory after ZFS clone from restore)
46+
BRANCH_PGDATA="${BRANCH_MOUNTPOINT}/data"
4847
PORT_ALLOCATION_LOCK="/tmp/branchd-port-allocation.lock"
4948
SERVICE_NAME="branchd-branch-${BRANCH_NAME}"
5049

51-
echo "Creating branch: ${BRANCH_NAME} from dataset: ${DATASET_NAME}"
50+
echo "Creating branch: ${BRANCH_NAME}"
51+
echo "Source restore dataset: ${DATASET_NAME}"
52+
echo "Branch mountpoint: ${BRANCH_MOUNTPOINT}"
53+
echo "Branch data directory: ${BRANCH_PGDATA}"
5254

5355
# Cleanup function to be called on exit
54-
# cleanup() {
55-
# local exit_code=$?
56-
# local signal_received=${1:-}
57-
58-
# # Clean up if script failed OR was interrupted by signal
59-
# if [ $exit_code -ne 0 ] || [ -n "$signal_received" ]; then
60-
# if [ -n "$signal_received" ]; then
61-
# echo "Script interrupted by signal $signal_received, cleaning up..."
62-
# else
63-
# echo "Script failed with exit code $exit_code, cleaning up..."
64-
# fi
65-
66-
# # Stop and remove systemd service first
67-
# # Check if service file exists (avoid pipefail issues with grep)
68-
# if systemctl list-unit-files "${SERVICE_NAME}.service" 2>/dev/null | grep -q .; then
69-
# echo "Stopping and removing systemd service..."
70-
# sudo systemctl stop "${SERVICE_NAME}" 2>/dev/null || true
71-
# sudo systemctl disable "${SERVICE_NAME}" 2>/dev/null || true
72-
# sudo rm -f "/etc/systemd/system/${SERVICE_NAME}.service"
73-
# sudo systemctl daemon-reload
74-
# fi
75-
76-
# # Kill any remaining PostgreSQL processes for this branch
77-
# if [ -n "${BRANCH_NAME:-}" ]; then
78-
# echo "Killing any remaining PostgreSQL processes..."
79-
# sudo pkill -f "postgres.*${BRANCH_NAME}" 2>/dev/null || true
80-
# sleep 2 # Give processes time to exit
81-
# fi
82-
83-
# # Remove ZFS snapshot and any dependent clones recursively
84-
# if sudo zfs list -t snapshot "${DATASET_NAME}@${BRANCH_NAME}" >/dev/null 2>&1; then
85-
# echo "Removing ZFS snapshot and dependent clones..."
86-
# sudo zfs destroy -R "${DATASET_NAME}@${BRANCH_NAME}" || echo "Warning: Failed to remove snapshot and clones"
87-
88-
# # Remove leftover mountpoint directory (zfs destroy unmounts but leaves directory)
89-
# if [ -d "${BRANCH_MOUNTPOINT}" ]; then
90-
# echo "Removing mountpoint directory ${BRANCH_MOUNTPOINT}..."
91-
# sudo rmdir "${BRANCH_MOUNTPOINT}" 2>/dev/null || sudo rm -rf "${BRANCH_MOUNTPOINT}"
92-
# fi
93-
# fi
94-
95-
# # Close UFW port if it was opened
96-
# if [ -n "${AVAILABLE_PORT:-}" ]; then
97-
# echo "Closing UFW port ${AVAILABLE_PORT}..."
98-
# sudo ufw --force delete allow "${AVAILABLE_PORT}/tcp" 2>/dev/null || true
99-
# fi
100-
# fi
101-
102-
# # Always remove lock files if we created them
103-
# rm -f "${PORT_ALLOCATION_LOCK}" 2>/dev/null || true
104-
# }
56+
cleanup() {
57+
local exit_code=$?
58+
local signal_received=${1:-}
59+
60+
# Clean up if script failed OR was interrupted by signal
61+
if [ $exit_code -ne 0 ] || [ -n "$signal_received" ]; then
62+
if [ -n "$signal_received" ]; then
63+
echo "Script interrupted by signal $signal_received, cleaning up..."
64+
else
65+
echo "Script failed with exit code $exit_code, cleaning up..."
66+
fi
67+
68+
# Stop and remove systemd service first
69+
# Check if service file exists (avoid pipefail issues with grep)
70+
if systemctl list-unit-files "${SERVICE_NAME}.service" 2>/dev/null | grep -q .; then
71+
echo "Stopping and removing systemd service..."
72+
sudo systemctl stop "${SERVICE_NAME}" 2>/dev/null || true
73+
sudo systemctl disable "${SERVICE_NAME}" 2>/dev/null || true
74+
sudo rm -f "/etc/systemd/system/${SERVICE_NAME}.service"
75+
sudo systemctl daemon-reload
76+
fi
77+
78+
# Kill any remaining PostgreSQL processes for this branch
79+
if [ -n "${BRANCH_NAME:-}" ]; then
80+
echo "Killing any remaining PostgreSQL processes..."
81+
sudo pkill -f "postgres.*${BRANCH_NAME}" 2>/dev/null || true
82+
sleep 2 # Give processes time to exit
83+
fi
84+
85+
# Remove ZFS snapshot and any dependent clones recursively
86+
if sudo zfs list -t snapshot "${DATASET_NAME}@${BRANCH_NAME}" >/dev/null 2>&1; then
87+
echo "Removing ZFS snapshot and dependent clones..."
88+
sudo zfs destroy -R "${DATASET_NAME}@${BRANCH_NAME}" || echo "Warning: Failed to remove snapshot and clones"
89+
90+
# Remove leftover mountpoint directory (zfs destroy unmounts but leaves directory)
91+
if [ -d "${BRANCH_MOUNTPOINT}" ]; then
92+
echo "Removing mountpoint directory ${BRANCH_MOUNTPOINT}..."
93+
sudo rmdir "${BRANCH_MOUNTPOINT}" 2>/dev/null || sudo rm -rf "${BRANCH_MOUNTPOINT}"
94+
fi
95+
fi
96+
97+
# Close UFW port if it was opened
98+
if [ -n "${AVAILABLE_PORT:-}" ]; then
99+
echo "Closing UFW port ${AVAILABLE_PORT}..."
100+
sudo ufw --force delete allow "${AVAILABLE_PORT}/tcp" 2>/dev/null || true
101+
fi
102+
fi
103+
104+
# Always remove lock files if we created them
105+
rm -f "${PORT_ALLOCATION_LOCK}" 2>/dev/null || true
106+
}
105107

106108
# Signal handlers for graceful cleanup
107-
# cleanup_on_signal() {
108-
# cleanup "$1"
109-
# exit 1
110-
# }
109+
cleanup_on_signal() {
110+
cleanup "$1"
111+
exit 1
112+
}
111113

112114
# Find available port with locking to prevent race conditions
113115
find_available_port() {

0 commit comments

Comments
 (0)