Skip to content

Commit 20fbf62

Browse files
committed
Add automatic weekly VACUUM to prevent unbounded database growth
Problem: - Database grows indefinitely despite purging old data - Deleted rows create holes in middle of file - New data appends to end of file - Incremental VACUUM can't reclaim middle holes - Result: File size never shrinks, only grows Solution: Implement weekly VACUUM worker that runs Sunday 3 AM: 1. Checks if fragmentation > 15% (worth doing) 2. Uses VACUUM INTO to create compacted copy (doesn't block reads) 3. Swaps files atomically (brief interruption) 4. Keeps old file as dated backup Benefits: - Database file size stays reasonable (~2GB steady state) - Runs during low-traffic hours (Sunday 3 AM) - VACUUM INTO doesn't block read operations during creation - Only brief downtime during file swap - Automatic backup kept - First run tomorrow (Sunday Oct 12) to measure actual performance Expected behavior: - Current 9.5GB database with 16% fragmentation - After VACUUM: ~8GB (once backlog cleared: ~1.5-2GB) - Maintains healthy database size long-term
1 parent 86e1c7d commit 20fbf62

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

archive.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package main
22

33
import (
44
"context"
5+
"database/sql"
56
"encoding/json"
67
"fmt"
8+
"os"
79
"sync"
810
"time"
911

@@ -486,3 +488,148 @@ func (app app) processPurgeOperations(ctx context.Context) error {
486488

487489
return nil
488490
}
491+
492+
// vacuumWorker runs in a separate goroutine and performs weekly database compaction.
493+
// It runs on Sunday mornings at 3 AM to minimize impact on users.
494+
//
495+
// The worker uses VACUUM INTO to create a compacted copy of the database without
496+
// blocking read operations. Once complete, it swaps the files during a brief pause.
497+
//
498+
// This prevents database file size from growing indefinitely due to fragmentation
499+
// from the continuous cycle of deleting old data and adding new data.
500+
func (app app) vacuumWorker(ctx context.Context) {
501+
logger := app.logger
502+
503+
// Recover from panics to prevent worker from dying
504+
defer func() {
505+
if r := recover(); r != nil {
506+
logger.Error("Vacuum worker panic recovered", fmt.Errorf("panic: %v", r))
507+
}
508+
}()
509+
510+
logger.Info("Vacuum worker started - will run Sundays at 3 AM")
511+
512+
// Check every hour if it's time to vacuum
513+
ticker := time.NewTicker(1 * time.Hour)
514+
defer ticker.Stop()
515+
516+
for {
517+
select {
518+
case <-ticker.C:
519+
now := time.Now()
520+
521+
// Only run on Sunday between 3 AM and 4 AM
522+
if now.Weekday() != time.Sunday {
523+
continue
524+
}
525+
if now.Hour() != 3 {
526+
continue
527+
}
528+
529+
logger.Info("Starting weekly database VACUUM")
530+
531+
// Check if vacuum is needed
532+
_, freelist, fragmentation, err := app.ndb.getDatabaseStats()
533+
if err != nil {
534+
logger.Error("Failed to get database stats", err)
535+
continue
536+
}
537+
538+
logger.Info("Database stats before VACUUM",
539+
"fragmentation_pct", fragmentation,
540+
"freelist_pages", freelist)
541+
542+
if fragmentation < 15.0 {
543+
logger.Info("Fragmentation low - skipping VACUUM",
544+
"fragmentation_pct", fragmentation)
545+
continue
546+
}
547+
548+
// Perform VACUUM INTO (creates compacted copy)
549+
err = app.performWeeklyVacuum(ctx)
550+
if err != nil {
551+
logger.Error("Weekly VACUUM failed", err)
552+
continue
553+
}
554+
555+
logger.Info("Weekly VACUUM completed successfully")
556+
557+
// Sleep for remainder of hour to avoid running multiple times
558+
time.Sleep(55 * time.Minute)
559+
560+
case <-ctx.Done():
561+
logger.Info("Vacuum worker shutting down")
562+
return
563+
}
564+
}
565+
}
566+
567+
// performWeeklyVacuum creates a compacted copy of the database and swaps it in
568+
func (app app) performWeeklyVacuum(ctx context.Context) error {
569+
logger := app.logger
570+
ndb := app.ndb
571+
572+
// Create compacted copy using VACUUM INTO
573+
newDBPath := fmt.Sprintf("%s/frontpage_new.sqlite", ndb.sqliteDataDir)
574+
oldDBPath := fmt.Sprintf("%s/frontpage.sqlite", ndb.sqliteDataDir)
575+
backupPath := fmt.Sprintf("%s/frontpage_backup_%s.sqlite",
576+
ndb.sqliteDataDir,
577+
time.Now().Format("2006_01_02"))
578+
579+
logger.Info("Creating compacted database copy", "target", newDBPath)
580+
581+
// Use VACUUM INTO to create compacted copy (doesn't block reads)
582+
_, err := ndb.db.Exec(fmt.Sprintf("VACUUM INTO '%s'", newDBPath))
583+
if err != nil {
584+
return errors.Wrap(err, "VACUUM INTO failed")
585+
}
586+
587+
logger.Info("Compacted database created successfully")
588+
logger.Info("Swapping database files - brief service interruption expected")
589+
590+
// Close current connection
591+
err = ndb.db.Close()
592+
if err != nil {
593+
return errors.Wrap(err, "failed to close database")
594+
}
595+
596+
// Rename old database as backup
597+
err = os.Rename(oldDBPath, backupPath)
598+
if err != nil {
599+
return errors.Wrap(err, "failed to backup old database")
600+
}
601+
602+
// Move new database into place
603+
err = os.Rename(newDBPath, oldDBPath)
604+
if err != nil {
605+
// Try to restore backup
606+
os.Rename(backupPath, oldDBPath)
607+
return errors.Wrap(err, "failed to move new database")
608+
}
609+
610+
// Reconnect to new database
611+
logger.Info("Reconnecting to compacted database")
612+
newDB, err := sql.Open("sqlite3_ext",
613+
fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=5000", oldDBPath))
614+
if err != nil {
615+
return errors.Wrap(err, "failed to reconnect to database")
616+
}
617+
618+
ndb.db = newDB
619+
620+
// Get new stats
621+
_, freelist, fragmentation, err := ndb.getDatabaseStats()
622+
if err != nil {
623+
logger.Error("Failed to get stats after VACUUM", err)
624+
} else {
625+
logger.Info("Database stats after VACUUM",
626+
"fragmentation_pct", fragmentation,
627+
"freelist_pages", freelist)
628+
}
629+
630+
logger.Info("Database swap complete",
631+
"old_backup", backupPath,
632+
"note", "Old database kept as backup for 24 hours")
633+
634+
return nil
635+
}

main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ func main() {
3030
// Start the purge worker (runs during idle time between crawls)
3131
go app.purgeWorker(ctx)
3232

33+
// Start the vacuum worker (runs Sunday early morning)
34+
go app.vacuumWorker(ctx)
35+
3336
// Listen for a soft kill signal (INT, TERM, HUP)
3437
c := make(chan os.Signal, 1)
3538
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)

0 commit comments

Comments
 (0)