Skip to content

Commit 021a598

Browse files
committed
调试告警规则调整
1 parent 910cfe5 commit 021a598

File tree

23 files changed

+181
-122
lines changed

23 files changed

+181
-122
lines changed

client/src/api/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import axios from 'axios'
22

33
// 创建 axios 实例
44
const api = axios.create({
5+
// baseURL: (import.meta as any).env?.VITE_API_BASE_URL || 'http://10.210.10.33:8080',
56
timeout: 10000,
67
headers: {
78
'Content-Type': 'application/json'

client/src/mock/services.ts

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,27 +1845,9 @@ loadServiceAlertStatus()
18451845
*/
18461846
export const serviceVersionAlertStatusMap: Record<string, Record<string, ServiceAlertStatus>> = {}
18471847

1848-
const saveServiceVersionAlertStatus = () => {
1849-
try {
1850-
localStorage.setItem('serviceVersionAlertStatusMap', JSON.stringify(serviceVersionAlertStatusMap))
1851-
console.log('服务版本告警状态已保存到 localStorage')
1852-
} catch (error) {
1853-
console.error('保存服务版本告警状态失败:', error)
1854-
}
1855-
}
1848+
const saveServiceVersionAlertStatus = () => {}
18561849

1857-
const loadServiceVersionAlertStatus = () => {
1858-
try {
1859-
const data = localStorage.getItem('serviceVersionAlertStatusMap')
1860-
if (data) {
1861-
const parsed = JSON.parse(data)
1862-
Object.assign(serviceVersionAlertStatusMap, parsed)
1863-
console.log('已从 localStorage 加载服务版本告警状态')
1864-
}
1865-
} catch (error) {
1866-
console.error('从 localStorage 加载服务版本告警状态失败:', error)
1867-
}
1868-
}
1850+
const loadServiceVersionAlertStatus = () => {}
18691851

18701852
/**
18711853
* 根据告警状态更新服务版本状态
@@ -1919,8 +1901,7 @@ export const clearServiceVersionAlertStatus = (serviceName: string, version?: st
19191901
console.log(`已清除服务 ${serviceName} ${version ? '版本 ' + version : '所有版本'} 的告警状态`)
19201902
}
19211903

1922-
// 页面加载时恢复服务版本告警状态
1923-
loadServiceVersionAlertStatus()
1904+
// 页面加载时不再从 localStorage 恢复服务版本告警状态(禁用持久化)
19241905

19251906
// ==================== 发布任务状态管理 ====================
19261907
// 管理服务的发布任务状态,用于显示发布指示器

client/vite.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export default defineConfig({
1818
server: {
1919
proxy: {
2020
'/v1': {
21-
target: 'http://127.0.0.1:8080',
21+
target: 'http://10.210.10.33:8080',
2222
changeOrigin: true,
2323
secure: false,
2424
}

cmd/zeroops/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
FROM golang:1.24-alpine AS builder
1+
FROM docker.m.daocloud.io/library/golang:1.24-alpine AS builder
22
WORKDIR /src
33
COPY go.mod go.sum ./
44
RUN go mod download
55
COPY . .
66
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/zeroops ./cmd/zeroops
77

8-
FROM gcr.io/distroless/base-debian12
8+
FROM gcr.m.daocloud.io/distroless/base-debian12:nonroot
99
WORKDIR /app
1010
COPY --from=builder /out/zeroops /app/zeroops
1111
# 复制配置文件目录

cmd/zeroops/main.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"strconv"
77
"time"
88

9-
"github.com/fox-gonic/fox"
9+
"github.com/gin-gonic/gin"
1010
alertapi "github.com/qiniu/zeroops/internal/alerting/api"
1111
adb "github.com/qiniu/zeroops/internal/alerting/database"
1212
"github.com/qiniu/zeroops/internal/alerting/service/healthcheck"
@@ -96,22 +96,24 @@ func main() {
9696
go rem.Start(ctx, alertCh)
9797

9898
// start Prometheus anomaly detection scheduler
99-
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 6*time.Hour)
99+
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 5*time.Minute)
100100
promStep := parseDuration(cfg.Alerting.Prometheus.QueryStep, time.Minute)
101101
promRange := parseDuration(cfg.Alerting.Prometheus.QueryRange, 6*time.Hour)
102102
promCfg := healthcheck.NewPrometheusConfigFromApp(&cfg.Alerting.Prometheus)
103-
promClient := healthcheck.NewPrometheusClient(promCfg)
103+
anomalyDetectClient := healthcheck.NewAnomalyDetectClient(promCfg)
104104
go healthcheck.StartPrometheusScheduler(ctx, healthcheck.PrometheusDeps{
105-
DB: alertDB,
106-
PrometheusClient: promClient,
107-
Interval: promInterval,
108-
QueryStep: promStep,
109-
QueryRange: promRange,
110-
RulesetBase: cfg.Alerting.Ruleset.APIBase,
111-
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
105+
DB: alertDB,
106+
AnomalyDetectClient: anomalyDetectClient,
107+
Interval: promInterval,
108+
QueryStep: promStep,
109+
QueryRange: promRange,
110+
RulesetBase: cfg.Alerting.Ruleset.APIBase,
111+
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
112112
})
113113

114-
router := fox.New()
114+
router := gin.New()
115+
router.Use(gin.Logger())
116+
router.Use(gin.Recovery())
115117
router.Use(middleware.Authentication)
116118
alertapi.NewApiWithConfig(router, cfg)
117119
if err := serviceManagerSrv.UseApi(router); err != nil {

configs/alerting/rules.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
77
"op": ">",
88
"severity": "P0",
9-
"watch_time": "5 minutes",
9+
"watch_time": "5m",
1010
"metas": [
1111
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 1000 },
1212
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 1000 },
@@ -19,7 +19,7 @@
1919
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
2020
"op": ">",
2121
"severity": "P1",
22-
"watch_time": "4 minutes",
22+
"watch_time": "4m",
2323
"metas": [
2424
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 500 },
2525
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 500 },
@@ -31,7 +31,7 @@
3131
"description":"HTTP error rate by service P0",
3232
"op":">",
3333
"severity":"P0",
34-
"watch_time":"5 minutes",
34+
"watch_time":"5m",
3535
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
3636
"metas":[
3737
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":5},
@@ -44,7 +44,7 @@
4444
"description":"HTTP error rate by service P1",
4545
"op":">",
4646
"severity":"P1",
47-
"watch_time":"5 minutes",
47+
"watch_time":"5m",
4848
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
4949
"metas":[
5050
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":3},

docs/alerting/database-design.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
## 数据表设计
1515

16-
### 1) talert_issues(告警问题表)
16+
### 1) alert_issues(告警问题表)
1717

1818
存储告警问题的主要信息。
1919

@@ -23,7 +23,7 @@
2323
| state | enum(Closed, Open) | 问题状态 |
2424
| level | varchar(32) | 告警等级:如 P0/P1/Px |
2525
| alert_state | enum(Pending, Restored, AutoRestored, InProcessing) | 处理状态 |
26-
| title | varchar(255) | 告警标题 |
26+
| title | varchar(255) | 告警标题
2727
| labels | json | 标签,格式:[{key, value}] |
2828
| alert_since | TIMESTAMP(6) | 告警发生时间 |
2929
| resolved_at | TIMESTAMP(6) | 告警结束时间 |
@@ -64,8 +64,6 @@
6464
| labels | text | labels 的 JSON 字符串表示(规范化后) |
6565
| old_threshold | numeric | 旧阈值(可空) |
6666
| new_threshold | numeric | 新阈值(可空) |
67-
| old_watch | interval | 旧观察窗口(可空) |
68-
| new_watch | interval | 新观察窗口(可空) |
6967

7068

7169
**索引建议:**

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/qiniu/zeroops
33
go 1.24
44

55
require (
6-
github.com/fox-gonic/fox v0.0.6
6+
github.com/gin-gonic/gin v1.10.1
77
github.com/google/uuid v1.6.0
88
github.com/jackc/pgx/v5 v5.5.5
99
github.com/lib/pq v1.10.9

internal/alerting/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c \
170170
"CREATE TABLE IF NOT EXISTS alert_issue_comments (issue_id text, create_at timestamp, content text, PRIMARY KEY(issue_id, create_at));"
171171
```
172172

173-
### 2) 初始化/重置规则表(alert_rules / alert_rule_metas)
173+
### 2) 初始化/重置规则表(alert_rules / alert_rule_metas / alert_meta_change_logs
174174

175-
注意:该脚本会 DROP 并重建 `alert_rules``alert_rule_metas`,仅用于本地/开发环境。
175+
注意:该脚本会 DROP 并重建 `alert_rules``alert_rule_metas``alert_meta_change_logs`,仅用于本地/开发环境。
176176

177177
脚本位置:`scripts/sql/alert_rules_bootstrap.sql`
178178

@@ -191,12 +191,13 @@ psql -U postgres -d zeroops -f scripts/sql/alert_rules_bootstrap.sql
191191
```bash
192192
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT name, severity FROM alert_rules;"
193193
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT alert_name, labels, threshold FROM alert_rule_metas;"
194+
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT alert_name, change_type, change_time FROM alert_meta_change_logs;"
194195
```
195196

196197
### 2) 清空数据库与缓存(可选,保证从空开始)
197198

198199
```bash
199-
docker exec -i zeroops-pg psql -U postgres -d zeroops -c "TRUNCATE TABLE alert_issue_comments, service_states, alert_issues;"
200+
docker exec -i zeroops-pg psql -U postgres -d zeroops -c "TRUNCATE TABLE alert_issue_comments, service_states, alert_issues, alert_meta_change_logs;"
200201
docker exec -i zeroops-redis redis-cli --raw DEL $(docker exec -i zeroops-redis redis-cli --raw KEYS 'alert:*' | tr '\n' ' ') 2>/dev/null || true
201202
docker exec -i zeroops-redis redis-cli --raw DEL $(docker exec -i zeroops-redis redis-cli --raw KEYS 'service_state:*' | tr '\n' ' ') 2>/dev/null || true
202203
```

internal/alerting/api/api.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package api
33
import (
44
"fmt"
55

6-
"github.com/fox-gonic/fox"
6+
"github.com/gin-gonic/gin"
77
adb "github.com/qiniu/zeroops/internal/alerting/database"
88
"github.com/qiniu/zeroops/internal/alerting/service/healthcheck"
99
receiver "github.com/qiniu/zeroops/internal/alerting/service/receiver"
@@ -12,15 +12,15 @@ import (
1212

1313
type Api struct{}
1414

15-
func NewApi(router *fox.Engine) *Api { return NewApiWithConfig(router, nil) }
15+
func NewApi(router *gin.Engine) *Api { return NewApiWithConfig(router, nil) }
1616

17-
func NewApiWithConfig(router *fox.Engine, cfg *config.Config) *Api {
17+
func NewApiWithConfig(router *gin.Engine, cfg *config.Config) *Api {
1818
api := &Api{}
1919
api.setupRouters(router, cfg)
2020
return api
2121
}
2222

23-
func (api *Api) setupRouters(router *fox.Engine, cfg *config.Config) {
23+
func (api *Api) setupRouters(router *gin.Engine, cfg *config.Config) {
2424
var h *receiver.Handler
2525
var alertDB *adb.Database
2626
if cfg != nil {

0 commit comments

Comments
 (0)