Skip to content

Commit 7929581

Browse files
committed
feat: Implement automatic garbage collection when GC threshold exceeded
Signed-off-by: Lorenzo Buitizon <the.keikun@gmail.com>
1 parent 7852795 commit 7929581

6 files changed

Lines changed: 516 additions & 0 deletions

File tree

cmd/init.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,8 @@ func runInit(cmd *cobra.Command) error {
332332
dedupMaxChunkSize,
333333
dedupGCThreshold,
334334
true, // index enabled
335+
// Default automatic GC settings
336+
true, "1h", 1000, true, ".sietch/logs/gc.log", 5000, "",
335337
)
336338

337339
// Initialize RSA config if not present

cmd/root.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,17 @@ Copyright © 2025 SubstantialCattle5, nilaysharan.com
44
package cmd
55

66
import (
7+
"context"
78
"fmt"
89
"os"
10+
"os/signal"
11+
"syscall"
12+
"time"
913

1014
"github.com/spf13/cobra"
15+
16+
"github.com/substantialcattle5/sietch/internal/fs"
17+
"github.com/substantialcattle5/sietch/internal/gc"
1118
)
1219

1320
// rootCmd represents the base command when called without any subcommands
@@ -24,12 +31,66 @@ encrypted data across machines, even with limited connectivity.`,
2431
// Execute adds all child commands to the root command and sets flags appropriately.
2532
// This is called by main.main(). It only needs to happen once to the rootCmd.
2633
func Execute() {
34+
// Create context that can be cancelled
35+
ctx, cancel := context.WithCancel(context.Background())
36+
defer cancel()
37+
38+
// Setup signal handling for graceful shutdown
39+
sigChan := make(chan os.Signal, 1)
40+
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
41+
42+
// Start GC manager in background if in a vault
43+
var gcCancel context.CancelFunc
44+
if shouldStartGC() {
45+
gcCtx, gcCancelFunc := context.WithCancel(ctx)
46+
gcCancel = gcCancelFunc
47+
48+
go func() {
49+
if err := gc.StartGlobalGC(gcCtx); err != nil {
50+
// Log error but don't fail execution
51+
fmt.Printf("Warning: Failed to start automatic GC: %v\n", err)
52+
}
53+
}()
54+
}
55+
56+
// Handle shutdown
57+
go func() {
58+
sig := <-sigChan
59+
fmt.Printf("\nReceived signal %v, shutting down...\n", sig)
60+
61+
// Cancel GC manager
62+
if gcCancel != nil {
63+
gcCancel()
64+
time.Sleep(100 * time.Millisecond) // Give GC time to stop
65+
}
66+
67+
cancel()
68+
os.Exit(0)
69+
}()
70+
71+
// Execute the command
2772
if err := rootCmd.Execute(); err != nil {
2873
fmt.Println(err)
2974
os.Exit(1)
3075
}
3176
}
3277

78+
// shouldStartGC determines if GC should be started
79+
func shouldStartGC() bool {
80+
// Check if we're in a vault directory
81+
vaultRoot, err := fs.FindVaultRoot()
82+
if err != nil {
83+
return false
84+
}
85+
86+
// Check if vault is initialized
87+
if !fs.IsVaultInitialized(vaultRoot) {
88+
return false
89+
}
90+
91+
return true
92+
}
93+
3394
func init() {
3495
// Here you will define your flags and configuration settings.
3596
// Cobra supports persistent flags, which, if defined here,

cmd/scaffold.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ func runScaffold(templateName, name, path string, force bool) error {
132132
cfg.DedupMaxSize,
133133
cfg.DedupGCThreshold,
134134
cfg.DedupIndexEnabled,
135+
// Default automatic GC settings
136+
true, "1h", 1000, true, ".sietch/logs/gc.log", 5000, "",
135137
)
136138

137139
// Initialize RSA config if not present

internal/config/vault.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,20 @@ type DeduplicationConfig struct {
9292
GCThreshold int `yaml:"gc_threshold"` // Unreferenced chunk count before GC suggestion
9393
IndexEnabled bool `yaml:"index_enabled"` // Enable chunk index for faster lookups
9494
// CrossFileDedup bool `yaml:"cross_file_dedup"` // Enable deduplication across different files
95+
96+
// Automatic GC settings
97+
AutoGC AutoGCConfig `yaml:"auto_gc,omitempty"` // Automatic garbage collection settings
98+
}
99+
100+
// AutoGCConfig contains settings for automatic garbage collection
101+
type AutoGCConfig struct {
102+
Enabled bool `yaml:"enabled"` // Enable automatic GC
103+
CheckInterval string `yaml:"check_interval"` // How often to check (e.g., "1h", "30m")
104+
AutoGCThreshold int `yaml:"auto_gc_threshold"` // Threshold for automatic GC (overrides vault default)
105+
EnableLogging bool `yaml:"enable_logging"` // Enable GC logging
106+
LogFile string `yaml:"log_file"` // Log file path
107+
AlertThreshold int `yaml:"alert_threshold"` // Alert when unreferenced chunks exceed this
108+
AlertWebhook string `yaml:"alert_webhook"` // Webhook URL for alerts
95109
}
96110

97111
// SyncConfig contains synchronization settings
@@ -193,6 +207,8 @@ func BuildVaultConfig(
193207
keyConfig,
194208
// Default deduplication settings
195209
true, "content", "1KB", "64MB", 1000, true,
210+
// Default automatic GC settings
211+
true, "1h", 1000, true, ".sietch/logs/gc.log", 5000, "",
196212
)
197213
}
198214

@@ -207,6 +223,8 @@ func BuildVaultConfigWithDeduplication(
207223
// Deduplication parameters
208224
enableDedup bool, dedupStrategy, dedupMinSize, dedupMaxSize string,
209225
dedupGCThreshold int, dedupIndexEnabled bool,
226+
autoGCEnabled bool, autoGCCheckInterval string, autoGCThreshold int,
227+
autoGCLogging bool, autoGCLogFile string, autoGCAlertThreshold int, autoGCWebhook string,
210228
) VaultConfig {
211229
config := VaultConfig{
212230
VaultID: vaultID,
@@ -237,6 +255,15 @@ func BuildVaultConfigWithDeduplication(
237255
config.Deduplication.GCThreshold = dedupGCThreshold
238256
config.Deduplication.IndexEnabled = dedupIndexEnabled
239257

258+
// Set automatic GC configuration
259+
config.Deduplication.AutoGC.Enabled = autoGCEnabled
260+
config.Deduplication.AutoGC.CheckInterval = autoGCCheckInterval
261+
config.Deduplication.AutoGC.AutoGCThreshold = autoGCThreshold
262+
config.Deduplication.AutoGC.EnableLogging = autoGCLogging
263+
config.Deduplication.AutoGC.LogFile = autoGCLogFile
264+
config.Deduplication.AutoGC.AlertThreshold = autoGCAlertThreshold
265+
config.Deduplication.AutoGC.AlertWebhook = autoGCWebhook
266+
240267
// Set sync configuration
241268
config.Sync.Mode = syncMode
242269
config.Sync.KnownPeers = []string{} // Initialize as empty array

internal/gc/manager.go

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
package gc
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"path/filepath"
7+
"time"
8+
9+
"github.com/substantialcattle5/sietch/internal/config"
10+
"github.com/substantialcattle5/sietch/internal/deduplication"
11+
"github.com/substantialcattle5/sietch/internal/fs"
12+
)
13+
14+
// Manager manages automatic garbage collection for Sietch vaults
15+
type Manager struct {
16+
vaultRoot string
17+
monitor *Monitor
18+
config config.VaultConfig
19+
started bool
20+
}
21+
22+
// NewManager creates a new GC manager for a vault
23+
func NewManager(vaultRoot string) (*Manager, error) {
24+
// Check if vault is initialized
25+
if !fs.IsVaultInitialized(vaultRoot) {
26+
return nil, fmt.Errorf("vault not initialized")
27+
}
28+
29+
// Load vault configuration
30+
vaultConfig, err := config.LoadVaultConfig(vaultRoot)
31+
if err != nil {
32+
return nil, fmt.Errorf("failed to load vault configuration: %w", err)
33+
}
34+
35+
return &Manager{
36+
vaultRoot: vaultRoot,
37+
config: *vaultConfig,
38+
started: false,
39+
}, nil
40+
}
41+
42+
// Start begins automatic GC monitoring for the vault
43+
func (m *Manager) Start(ctx context.Context) error {
44+
if m.started {
45+
return fmt.Errorf("GC manager already started")
46+
}
47+
48+
// Check if automatic GC is enabled
49+
if !m.config.Deduplication.AutoGC.Enabled {
50+
return fmt.Errorf("automatic GC is disabled in vault configuration")
51+
}
52+
53+
// Parse check interval
54+
checkInterval, err := time.ParseDuration(m.config.Deduplication.AutoGC.CheckInterval)
55+
if err != nil {
56+
return fmt.Errorf("invalid check interval '%s': %w", m.config.Deduplication.AutoGC.CheckInterval, err)
57+
}
58+
59+
// Create monitor configuration
60+
monitorConfig := MonitorConfig{
61+
Enabled: m.config.Deduplication.AutoGC.Enabled,
62+
CheckInterval: checkInterval,
63+
AutoGCThreshold: m.getEffectiveGCThreshold(),
64+
EnableLogging: m.config.Deduplication.AutoGC.EnableLogging,
65+
LogFile: m.getEffectiveLogFile(),
66+
AlertThreshold: m.config.Deduplication.AutoGC.AlertThreshold,
67+
AlertWebhook: m.config.Deduplication.AutoGC.AlertWebhook,
68+
}
69+
70+
// Create and start monitor
71+
monitor, err := NewMonitor(m.vaultRoot, monitorConfig, m.config.Deduplication)
72+
if err != nil {
73+
return fmt.Errorf("failed to create GC monitor: %w", err)
74+
}
75+
76+
if err := monitor.Start(ctx); err != nil {
77+
return fmt.Errorf("failed to start GC monitor: %w", err)
78+
}
79+
80+
m.monitor = monitor
81+
m.started = true
82+
83+
return nil
84+
}
85+
86+
// Stop halts automatic GC monitoring
87+
func (m *Manager) Stop() error {
88+
if !m.started {
89+
return fmt.Errorf("GC manager not started")
90+
}
91+
92+
err := m.monitor.Stop()
93+
m.started = false
94+
return err
95+
}
96+
97+
// IsRunning returns whether the GC manager is currently running
98+
func (m *Manager) IsRunning() bool {
99+
return m.started && m.monitor.IsRunning()
100+
}
101+
102+
// GetStats returns current deduplication statistics
103+
func (m *Manager) GetStats() (deduplication.DeduplicationStats, error) {
104+
if !m.started {
105+
return deduplication.DeduplicationStats{}, fmt.Errorf("GC manager not started")
106+
}
107+
return m.monitor.GetStats(), nil
108+
}
109+
110+
// getEffectiveGCThreshold returns the effective GC threshold to use
111+
func (m *Manager) getEffectiveGCThreshold() int {
112+
if m.config.Deduplication.AutoGC.AutoGCThreshold > 0 {
113+
return m.config.Deduplication.AutoGC.AutoGCThreshold
114+
}
115+
return m.config.Deduplication.GCThreshold
116+
}
117+
118+
// getEffectiveLogFile returns the effective log file path
119+
func (m *Manager) getEffectiveLogFile() string {
120+
if m.config.Deduplication.AutoGC.LogFile != "" {
121+
// Convert relative path to absolute path within vault
122+
if !filepath.IsAbs(m.config.Deduplication.AutoGC.LogFile) {
123+
return filepath.Join(m.vaultRoot, m.config.Deduplication.AutoGC.LogFile)
124+
}
125+
return m.config.Deduplication.AutoGC.LogFile
126+
}
127+
return filepath.Join(m.vaultRoot, ".sietch", "logs", "gc.log")
128+
}
129+
130+
// Global variable to hold the active GC manager
131+
var activeManager *Manager
132+
133+
// StartGlobalGC starts automatic GC for the vault in the current directory
134+
func StartGlobalGC(ctx context.Context) error {
135+
vaultRoot, err := fs.FindVaultRoot()
136+
if err != nil {
137+
// Not in a vault, silently ignore
138+
return nil
139+
}
140+
141+
manager, err := NewManager(vaultRoot)
142+
if err != nil {
143+
return fmt.Errorf("failed to create GC manager: %w", err)
144+
}
145+
146+
if err := manager.Start(ctx); err != nil {
147+
return fmt.Errorf("failed to start GC manager: %w", err)
148+
}
149+
150+
activeManager = manager
151+
return nil
152+
}
153+
154+
// StopGlobalGC stops the global GC manager
155+
func StopGlobalGC() error {
156+
if activeManager == nil {
157+
return nil
158+
}
159+
160+
err := activeManager.Stop()
161+
activeManager = nil
162+
return err
163+
}
164+
165+
// GetGlobalGCManager returns the active GC manager
166+
func GetGlobalGCManager() *Manager {
167+
return activeManager
168+
}

0 commit comments

Comments
 (0)