|
1 | 1 | package postgres_monitoring_collectors |
2 | 2 |
|
3 | | -type DbMonitoringBackgroundService struct{} |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "log/slog" |
| 7 | + "postgresus-backend/internal/config" |
| 8 | + "postgresus-backend/internal/features/databases" |
| 9 | + "postgresus-backend/internal/features/databases/databases/postgresql" |
| 10 | + postgres_monitoring_metrics "postgresus-backend/internal/features/monitoring/postgres/metrics" |
| 11 | + postgres_monitoring_settings "postgresus-backend/internal/features/monitoring/postgres/settings" |
| 12 | + "sync" |
| 13 | + "sync/atomic" |
| 14 | + "time" |
| 15 | + |
| 16 | + "github.com/google/uuid" |
| 17 | + "github.com/jackc/pgx/v5" |
| 18 | +) |
| 19 | + |
| 20 | +type DbMonitoringBackgroundService struct { |
| 21 | + databaseService *databases.DatabaseService |
| 22 | + monitoringSettingsService *postgres_monitoring_settings.PostgresMonitoringSettingsService |
| 23 | + metricsService *postgres_monitoring_metrics.PostgresMonitoringMetricService |
| 24 | + logger *slog.Logger |
| 25 | + isRunning int32 |
| 26 | + lastRunTimes map[uuid.UUID]time.Time |
| 27 | + lastRunTimesMutex sync.RWMutex |
| 28 | +} |
| 29 | + |
| 30 | +func (s *DbMonitoringBackgroundService) Run() { |
| 31 | + for { |
| 32 | + if config.IsShouldShutdown() { |
| 33 | + s.logger.Info("stopping background monitoring tasks") |
| 34 | + return |
| 35 | + } |
| 36 | + |
| 37 | + s.processMonitoringTasks() |
| 38 | + time.Sleep(1 * time.Second) |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +func (s *DbMonitoringBackgroundService) processMonitoringTasks() { |
| 43 | + if !atomic.CompareAndSwapInt32(&s.isRunning, 0, 1) { |
| 44 | + s.logger.Warn("skipping background task execution, previous task still running") |
| 45 | + return |
| 46 | + } |
| 47 | + defer atomic.StoreInt32(&s.isRunning, 0) |
| 48 | + |
| 49 | + dbsWithEnabledDbMonitoring, err := s.monitoringSettingsService.GetAllDbsWithEnabledDbMonitoring() |
| 50 | + if err != nil { |
| 51 | + s.logger.Error("failed to get all databases with enabled db monitoring", "error", err) |
| 52 | + return |
| 53 | + } |
| 54 | + |
| 55 | + for _, dbSettings := range dbsWithEnabledDbMonitoring { |
| 56 | + s.processDatabase(&dbSettings) |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +func (s *DbMonitoringBackgroundService) processDatabase( |
| 61 | + settings *postgres_monitoring_settings.PostgresMonitoringSettings, |
| 62 | +) { |
| 63 | + db, err := s.databaseService.GetDatabaseByID(settings.DatabaseID) |
| 64 | + if err != nil { |
| 65 | + s.logger.Error("failed to get database by id", "error", err) |
| 66 | + return |
| 67 | + } |
| 68 | + |
| 69 | + if db.Type != databases.DatabaseTypePostgres { |
| 70 | + return |
| 71 | + } |
| 72 | + |
| 73 | + if !s.isReadyForNextRun(settings) { |
| 74 | + return |
| 75 | + } |
| 76 | + |
| 77 | + err = s.collectAndSaveMetrics(db, settings) |
| 78 | + if err != nil { |
| 79 | + s.logger.Error("failed to collect and save metrics", "error", err) |
| 80 | + return |
| 81 | + } |
| 82 | + |
| 83 | + s.updateLastRunTime(db) |
| 84 | +} |
| 85 | + |
| 86 | +func (s *DbMonitoringBackgroundService) collectAndSaveMetrics( |
| 87 | + db *databases.Database, |
| 88 | + settings *postgres_monitoring_settings.PostgresMonitoringSettings, |
| 89 | +) error { |
| 90 | + if db.Postgresql == nil { |
| 91 | + return nil |
| 92 | + } |
| 93 | + |
| 94 | + s.logger.Debug("collecting metrics for database", "database_id", db.ID) |
| 95 | + |
| 96 | + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
| 97 | + defer cancel() |
| 98 | + |
| 99 | + conn, err := s.connectToDatabase(ctx, db) |
| 100 | + if err != nil { |
| 101 | + return fmt.Errorf("failed to connect to database: %w", err) |
| 102 | + } |
| 103 | + |
| 104 | + if conn == nil { |
| 105 | + return nil |
| 106 | + } |
| 107 | + |
| 108 | + defer func() { |
| 109 | + if closeErr := conn.Close(ctx); closeErr != nil { |
| 110 | + s.logger.Error("Failed to close connection", "error", closeErr) |
| 111 | + } |
| 112 | + }() |
| 113 | + |
| 114 | + var metrics []postgres_monitoring_metrics.PostgresMonitoringMetric |
| 115 | + now := time.Now().UTC() |
| 116 | + |
| 117 | + if settings.IsDbResourcesMonitoringEnabled { |
| 118 | + dbMetrics, err := s.collectDatabaseResourceMetrics(ctx, conn, db.ID, now) |
| 119 | + if err != nil { |
| 120 | + s.logger.Error("failed to collect database resource metrics", "error", err) |
| 121 | + } else { |
| 122 | + metrics = append(metrics, dbMetrics...) |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + if len(metrics) > 0 { |
| 127 | + if err := s.metricsService.Insert(metrics); err != nil { |
| 128 | + return fmt.Errorf("failed to insert metrics: %w", err) |
| 129 | + } |
| 130 | + s.logger.Debug( |
| 131 | + "successfully collected and saved metrics", |
| 132 | + "count", |
| 133 | + len(metrics), |
| 134 | + "database_id", |
| 135 | + db.ID, |
| 136 | + ) |
| 137 | + } |
| 138 | + |
| 139 | + return nil |
| 140 | +} |
| 141 | + |
| 142 | +func (s *DbMonitoringBackgroundService) isReadyForNextRun( |
| 143 | + settings *postgres_monitoring_settings.PostgresMonitoringSettings, |
| 144 | +) bool { |
| 145 | + s.lastRunTimesMutex.RLock() |
| 146 | + defer s.lastRunTimesMutex.RUnlock() |
| 147 | + |
| 148 | + if s.lastRunTimes == nil { |
| 149 | + return true |
| 150 | + } |
| 151 | + |
| 152 | + lastRun, exists := s.lastRunTimes[settings.DatabaseID] |
| 153 | + if !exists { |
| 154 | + return true |
| 155 | + } |
| 156 | + |
| 157 | + return time.Since(lastRun) >= time.Duration(settings.MonitoringIntervalSeconds)*time.Second |
| 158 | +} |
| 159 | + |
| 160 | +func (s *DbMonitoringBackgroundService) updateLastRunTime(db *databases.Database) { |
| 161 | + s.lastRunTimesMutex.Lock() |
| 162 | + defer s.lastRunTimesMutex.Unlock() |
| 163 | + |
| 164 | + if s.lastRunTimes == nil { |
| 165 | + s.lastRunTimes = make(map[uuid.UUID]time.Time) |
| 166 | + } |
| 167 | + s.lastRunTimes[db.ID] = time.Now().UTC() |
| 168 | +} |
| 169 | + |
| 170 | +func (s *DbMonitoringBackgroundService) connectToDatabase( |
| 171 | + ctx context.Context, |
| 172 | + db *databases.Database, |
| 173 | +) (*pgx.Conn, error) { |
| 174 | + if db.Postgresql == nil { |
| 175 | + return nil, nil |
| 176 | + } |
| 177 | + |
| 178 | + if db.Postgresql.Database == nil || *db.Postgresql.Database == "" { |
| 179 | + return nil, nil |
| 180 | + } |
| 181 | + |
| 182 | + connStr := s.buildConnectionString(db.Postgresql) |
| 183 | + return pgx.Connect(ctx, connStr) |
| 184 | +} |
| 185 | + |
| 186 | +func (s *DbMonitoringBackgroundService) buildConnectionString( |
| 187 | + pg *postgresql.PostgresqlDatabase, |
| 188 | +) string { |
| 189 | + sslMode := "disable" |
| 190 | + if pg.IsHttps { |
| 191 | + sslMode = "require" |
| 192 | + } |
| 193 | + |
| 194 | + return fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s", |
| 195 | + pg.Host, |
| 196 | + pg.Port, |
| 197 | + pg.Username, |
| 198 | + pg.Password, |
| 199 | + *pg.Database, |
| 200 | + sslMode, |
| 201 | + ) |
| 202 | +} |
| 203 | + |
| 204 | +func (s *DbMonitoringBackgroundService) collectDatabaseResourceMetrics( |
| 205 | + ctx context.Context, |
| 206 | + conn *pgx.Conn, |
| 207 | + databaseID uuid.UUID, |
| 208 | + timestamp time.Time, |
| 209 | +) ([]postgres_monitoring_metrics.PostgresMonitoringMetric, error) { |
| 210 | + var metrics []postgres_monitoring_metrics.PostgresMonitoringMetric |
| 211 | + |
| 212 | + // Collect I/O statistics |
| 213 | + ioMetrics, err := s.collectIOMetrics(ctx, conn, databaseID, timestamp) |
| 214 | + if err != nil { |
| 215 | + s.logger.Warn("failed to collect I/O metrics", "error", err) |
| 216 | + } else { |
| 217 | + metrics = append(metrics, ioMetrics...) |
| 218 | + } |
| 219 | + |
| 220 | + // Collect memory usage (approximation based on buffer usage) |
| 221 | + ramMetric, err := s.collectRAMUsageMetric(ctx, conn, databaseID, timestamp) |
| 222 | + if err != nil { |
| 223 | + s.logger.Warn("failed to collect RAM usage metric", "error", err) |
| 224 | + } else { |
| 225 | + metrics = append(metrics, ramMetric) |
| 226 | + } |
| 227 | + |
| 228 | + return metrics, nil |
| 229 | +} |
| 230 | + |
| 231 | +func (s *DbMonitoringBackgroundService) collectIOMetrics( |
| 232 | + ctx context.Context, |
| 233 | + conn *pgx.Conn, |
| 234 | + databaseID uuid.UUID, |
| 235 | + timestamp time.Time, |
| 236 | +) ([]postgres_monitoring_metrics.PostgresMonitoringMetric, error) { |
| 237 | + var blocksRead, blocksHit int64 |
| 238 | + query := ` |
| 239 | + SELECT |
| 240 | + COALESCE(SUM(blks_read), 0) as total_reads, |
| 241 | + COALESCE(SUM(blks_hit), 0) as total_hits |
| 242 | + FROM pg_stat_database |
| 243 | + WHERE datname = current_database() |
| 244 | + ` |
| 245 | + |
| 246 | + err := conn.QueryRow(ctx, query).Scan(&blocksRead, &blocksHit) |
| 247 | + if err != nil { |
| 248 | + return nil, err |
| 249 | + } |
| 250 | + |
| 251 | + // Calculate I/O activity as total blocks accessed (PostgreSQL block size is typically 8KB) |
| 252 | + const pgBlockSize = 8192 // 8KB |
| 253 | + totalIOBytes := float64((blocksRead + blocksHit) * pgBlockSize) |
| 254 | + |
| 255 | + return []postgres_monitoring_metrics.PostgresMonitoringMetric{ |
| 256 | + { |
| 257 | + DatabaseID: databaseID, |
| 258 | + Metric: postgres_monitoring_metrics.MetricsTypeDbIO, |
| 259 | + ValueType: postgres_monitoring_metrics.MetricsValueTypeByte, |
| 260 | + Value: totalIOBytes, |
| 261 | + CreatedAt: timestamp, |
| 262 | + }, |
| 263 | + }, nil |
| 264 | +} |
| 265 | + |
| 266 | +func (s *DbMonitoringBackgroundService) collectRAMUsageMetric( |
| 267 | + ctx context.Context, |
| 268 | + conn *pgx.Conn, |
| 269 | + databaseID uuid.UUID, |
| 270 | + timestamp time.Time, |
| 271 | +) (postgres_monitoring_metrics.PostgresMonitoringMetric, error) { |
| 272 | + var sharedBuffers int64 |
| 273 | + query := ` |
| 274 | + SELECT |
| 275 | + COALESCE(SUM(blks_hit), 0) * 8192 as buffer_usage |
| 276 | + FROM pg_stat_database |
| 277 | + WHERE datname = current_database() |
| 278 | + ` |
| 279 | + |
| 280 | + err := conn.QueryRow(ctx, query).Scan(&sharedBuffers) |
| 281 | + if err != nil { |
| 282 | + return postgres_monitoring_metrics.PostgresMonitoringMetric{}, err |
| 283 | + } |
| 284 | + |
| 285 | + return postgres_monitoring_metrics.PostgresMonitoringMetric{ |
| 286 | + DatabaseID: databaseID, |
| 287 | + Metric: postgres_monitoring_metrics.MetricsTypeDbRAM, |
| 288 | + ValueType: postgres_monitoring_metrics.MetricsValueTypeByte, |
| 289 | + Value: float64(sharedBuffers), |
| 290 | + CreatedAt: timestamp, |
| 291 | + }, nil |
| 292 | +} |
0 commit comments