Skip to content

Commit f6c1ad1

Browse files
committed
feat(observability): 修复Prometheus告警规则支持并优化规则同步机制
- 在prometheus.yml中配置告警规则文件路径 - 修改docker-compose.yml挂载规则目录 - 重构AlertService,移除本地文件存储,直接写入容器 - 添加容器内规则文件写入的容错机制
1 parent 54b8f4d commit f6c1ad1

File tree

3 files changed

+43
-31
lines changed

3 files changed

+43
-31
lines changed

internal/prometheus_adapter/service/alert_service.go

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"net/http"
77
"os"
88
"os/exec"
9-
"path/filepath"
109
"strings"
1110

1211
"github.com/qiniu/zeroops/internal/prometheus_adapter/client"
@@ -17,24 +16,16 @@ import (
1716

1817
// AlertService 告警服务 - 仅负责与Prometheus交互,不存储规则
1918
type AlertService struct {
20-
promClient *client.PrometheusClient
21-
rulesFilePath string
19+
promClient *client.PrometheusClient
2220
// 内存中缓存当前规则,用于增量更新
2321
currentRules []model.AlertRule
2422
currentRuleMetas []model.AlertRuleMeta
2523
}
2624

2725
// NewAlertService 创建告警服务
2826
func NewAlertService(promClient *client.PrometheusClient) *AlertService {
29-
rulesFilePath := os.Getenv("PROMETHEUS_RULES_FILE")
30-
if rulesFilePath == "" {
31-
// 在本地生成规则文件,用于调试和后续同步到远程容器
32-
rulesFilePath = "./prometheus_rules/alert_rules.yml"
33-
}
34-
3527
return &AlertService{
3628
promClient: promClient,
37-
rulesFilePath: rulesFilePath,
3829
currentRules: []model.AlertRule{},
3930
currentRuleMetas: []model.AlertRuleMeta{},
4031
}
@@ -221,34 +212,50 @@ func (s *AlertService) buildExpression(rule *model.AlertRule, meta *model.AlertR
221212

222213
// writeRulesFile 写入规则文件
223214
func (s *AlertService) writeRulesFile(rules *model.PrometheusRuleFile) error {
224-
// 确保目录存在
225-
dir := filepath.Dir(s.rulesFilePath)
226-
if err := os.MkdirAll(dir, 0755); err != nil {
227-
return fmt.Errorf("failed to create rules directory: %w", err)
228-
}
229-
230215
// 序列化为YAML
231216
data, err := yaml.Marshal(rules)
232217
if err != nil {
233218
return fmt.Errorf("failed to marshal rules: %w", err)
234219
}
235220

236-
// 写入文件
237-
if err := os.WriteFile(s.rulesFilePath, data, 0644); err != nil {
238-
return fmt.Errorf("failed to write rules file: %w", err)
221+
// 获取容器名称
222+
containerName := os.Getenv("PROMETHEUS_CONTAINER")
223+
if containerName == "" {
224+
containerName = "mock-s3-prometheus"
239225
}
240226

241-
log.Info().
242-
Str("file", s.rulesFilePath).
243-
Int("groups", len(rules.Groups)).
244-
Msg("Prometheus rules file updated locally")
227+
// 直接写入到容器内的规则目录
228+
// 使用docker exec和echo命令写入文件
229+
cmd := exec.Command("docker", "exec", containerName, "sh", "-c",
230+
fmt.Sprintf("cat > /etc/prometheus/rules/alert_rules.yml << 'EOF'\n%s\nEOF", string(data)))
245231

246-
// 同步到 Prometheus 容器
247-
if err := s.syncToPrometheusContainer(); err != nil {
248-
log.Warn().Err(err).Msg("Failed to sync rules to Prometheus container")
249-
// 不返回错误,因为本地文件已经生成成功
232+
if output, err := cmd.CombinedOutput(); err != nil {
233+
// 如果直接写入容器失败,尝试使用临时文件+docker cp
234+
log.Warn().
235+
Err(err).
236+
Str("output", string(output)).
237+
Msg("Failed to write directly to container, trying docker cp")
238+
239+
// 写入临时文件
240+
tmpFile := "/tmp/prometheus_alert_rules.yml"
241+
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
242+
return fmt.Errorf("failed to write temp rules file: %w", err)
243+
}
244+
245+
// 使用docker cp复制到容器
246+
if err := s.syncRuleFileToContainer(tmpFile); err != nil {
247+
return fmt.Errorf("failed to sync to container: %w", err)
248+
}
249+
250+
// 清理临时文件
251+
os.Remove(tmpFile)
250252
}
251253

254+
log.Info().
255+
Str("container", containerName).
256+
Int("groups", len(rules.Groups)).
257+
Msg("Prometheus rules file updated in container")
258+
252259
return nil
253260
}
254261

@@ -275,8 +282,8 @@ func (s *AlertService) reloadPrometheus() error {
275282
return nil
276283
}
277284

278-
// syncToPrometheusContainer 同步规则文件到本地 Prometheus 容器
279-
func (s *AlertService) syncToPrometheusContainer() error {
285+
// syncRuleFileToContainer 同步规则文件到容器
286+
func (s *AlertService) syncRuleFileToContainer(filePath string) error {
280287
// 获取容器名称,默认为 mock-s3-prometheus
281288
containerName := os.Getenv("PROMETHEUS_CONTAINER")
282289
if containerName == "" {
@@ -293,14 +300,14 @@ func (s *AlertService) syncToPrometheusContainer() error {
293300
}
294301

295302
// 2. 将规则文件拷贝到容器内
296-
cmdCopy := exec.Command("docker", "cp", s.rulesFilePath, fmt.Sprintf("%s:/etc/prometheus/rules/alert_rules.yml", containerName))
303+
cmdCopy := exec.Command("docker", "cp", filePath, fmt.Sprintf("%s:/etc/prometheus/rules/alert_rules.yml", containerName))
297304
if output, err := cmdCopy.CombinedOutput(); err != nil {
298305
return fmt.Errorf("failed to copy rules file to container: %w, output: %s", err, string(output))
299306
}
300307

301308
log.Info().
302309
Str("container", containerName).
303-
Str("file", s.rulesFilePath).
310+
Str("file", filePath).
304311
Msg("Rules synced to Prometheus container")
305312

306313
// 3. 确保 Prometheus 配置包含 rule_files

mock/s3/deployments/docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ services:
8484
volumes:
8585
- prometheus-data:/prometheus
8686
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
87+
- ./prometheus/rules:/etc/prometheus/rules:rw
8788
command:
8889
- '--config.file=/etc/prometheus/prometheus.yml'
8990
- '--storage.tsdb.path=/prometheus'

mock/s3/deployments/observability/prometheus.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ global:
55
cluster: mock-s3
66
environment: docker
77

8+
# 告警规则文件
9+
rule_files:
10+
- "/etc/prometheus/rules/*.yml"
11+
812
scrape_configs:
913
# Prometheus自身的指标
1014
- job_name: 'prometheus'

0 commit comments

Comments
 (0)