66 "net/http"
77 "os"
88 "os/exec"
9+ "path/filepath"
10+ "strconv"
911 "strings"
1012
1113 "github.com/qiniu/zeroops/internal/prometheus_adapter/client"
@@ -20,15 +22,167 @@ type AlertService struct {
2022 // 内存中缓存当前规则,用于增量更新
2123 currentRules []model.AlertRule
2224 currentRuleMetas []model.AlertRuleMeta
25+ // 本地规则文件路径
26+ localRulesPath string
2327}
2428
2529// NewAlertService 创建告警服务
2630func NewAlertService (promClient * client.PrometheusClient ) * AlertService {
27- return & AlertService {
31+ service := & AlertService {
2832 promClient : promClient ,
2933 currentRules : []model.AlertRule {},
3034 currentRuleMetas : []model.AlertRuleMeta {},
35+ localRulesPath : "../rules/alert_rules.yml" ,
3136 }
37+
38+ // 启动时尝试加载本地规则
39+ if err := service .LoadRulesFromFile (); err != nil {
40+ log .Warn ().Err (err ).Msg ("Failed to load rules from file, starting with empty rules" )
41+ }
42+
43+ return service
44+ }
45+
46+ // ========== 持久化方法 ==========
47+
48+ // LoadRulesFromFile 从本地文件加载规则
49+ func (s * AlertService ) LoadRulesFromFile () error {
50+ // 检查文件是否存在
51+ if _ , err := os .Stat (s .localRulesPath ); os .IsNotExist (err ) {
52+ log .Info ().Str ("path" , s .localRulesPath ).Msg ("Local rules file does not exist, skipping load" )
53+ return nil
54+ }
55+
56+ // 读取文件内容
57+ data , err := os .ReadFile (s .localRulesPath )
58+ if err != nil {
59+ return fmt .Errorf ("failed to read local rules file: %w" , err )
60+ }
61+
62+ // 解析规则文件
63+ var rulesFile model.PrometheusRuleFile
64+ if err := yaml .Unmarshal (data , & rulesFile ); err != nil {
65+ return fmt .Errorf ("failed to parse rules file: %w" , err )
66+ }
67+
68+ // 从Prometheus格式转换回内部格式
69+ s .currentRules = []model.AlertRule {}
70+ s .currentRuleMetas = []model.AlertRuleMeta {}
71+
72+ // 用于去重的map
73+ ruleMap := make (map [string ]* model.AlertRule )
74+
75+ for _ , group := range rulesFile .Groups {
76+ for _ , rule := range group .Rules {
77+ // 提取基础规则信息
78+ ruleName := rule .Alert
79+
80+ // 从annotations中获取description
81+ description := ""
82+ if desc , ok := rule .Annotations ["description" ]; ok {
83+ description = desc
84+ }
85+
86+ // 从labels中获取severity
87+ severity := "warning"
88+ if sev , ok := rule .Labels ["severity" ]; ok {
89+ severity = sev
90+ delete (rule .Labels , "severity" ) // 移除severity,剩下的是meta的labels
91+ }
92+
93+ // 创建或更新规则模板
94+ if _ , exists := ruleMap [ruleName ]; ! exists {
95+ alertRule := model.AlertRule {
96+ Name : ruleName ,
97+ Description : description ,
98+ Expr : rule .Expr ,
99+ Severity : severity ,
100+ }
101+
102+ // 解析For字段获取WatchTime
103+ if rule .For != "" {
104+ // 简单解析,假设格式为 "300s" 或 "5m"
105+ if strings .HasSuffix (rule .For , "s" ) {
106+ if seconds , err := strconv .Atoi (strings .TrimSuffix (rule .For , "s" )); err == nil {
107+ alertRule .WatchTime = seconds
108+ }
109+ } else if strings .HasSuffix (rule .For , "m" ) {
110+ if minutes , err := strconv .Atoi (strings .TrimSuffix (rule .For , "m" )); err == nil {
111+ alertRule .WatchTime = minutes * 60
112+ }
113+ }
114+ }
115+
116+ ruleMap [ruleName ] = & alertRule
117+ s .currentRules = append (s .currentRules , alertRule )
118+ }
119+
120+ // 创建元信息
121+ if len (rule .Labels ) > 0 {
122+ labelsJSON , _ := json .Marshal (rule .Labels )
123+ meta := model.AlertRuleMeta {
124+ AlertName : ruleName ,
125+ Labels : string (labelsJSON ),
126+ }
127+
128+ // 从表达式中提取threshold(简单实现)
129+ // 假设表达式类似 "metric > 80" 或 "metric{labels} > 80"
130+ parts := strings .Split (rule .Expr , " " )
131+ if len (parts ) >= 3 {
132+ if threshold , err := strconv .ParseFloat (parts [len (parts )- 1 ], 64 ); err == nil {
133+ meta .Threshold = threshold
134+ }
135+ }
136+
137+ s .currentRuleMetas = append (s .currentRuleMetas , meta )
138+ }
139+ }
140+ }
141+
142+ log .Info ().
143+ Int ("rules" , len (s .currentRules )).
144+ Int ("metas" , len (s .currentRuleMetas )).
145+ Str ("path" , s .localRulesPath ).
146+ Msg ("Loaded rules from local file" )
147+
148+ return nil
149+ }
150+
151+ // SaveRulesToFile 保存规则到本地文件
152+ func (s * AlertService ) SaveRulesToFile () error {
153+ // 确保目录存在
154+ dir := filepath .Dir (s .localRulesPath )
155+ if err := os .MkdirAll (dir , 0755 ); err != nil {
156+ return fmt .Errorf ("failed to create rules directory: %w" , err )
157+ }
158+
159+ // 构建Prometheus规则文件格式
160+ prometheusRules := s .buildPrometheusRules (s .currentRules , s .currentRuleMetas )
161+
162+ // 序列化为YAML
163+ data , err := yaml .Marshal (prometheusRules )
164+ if err != nil {
165+ return fmt .Errorf ("failed to marshal rules: %w" , err )
166+ }
167+
168+ // 写入文件
169+ if err := os .WriteFile (s .localRulesPath , data , 0644 ); err != nil {
170+ return fmt .Errorf ("failed to write rules file: %w" , err )
171+ }
172+
173+ log .Info ().
174+ Int ("rules" , len (s .currentRules )).
175+ Int ("metas" , len (s .currentRuleMetas )).
176+ Str ("path" , s .localRulesPath ).
177+ Msg ("Saved rules to local file" )
178+
179+ return nil
180+ }
181+
182+ // Shutdown 优雅关闭,保存当前规则
183+ func (s * AlertService ) Shutdown () error {
184+ log .Info ().Msg ("Shutting down alert service, saving rules..." )
185+ return s .SaveRulesToFile ()
32186}
33187
34188// ========== 公开 API 方法 ==========
0 commit comments