Skip to content

Commit 9a094d9

Browse files
authored
Merge pull request #94 from acd19ml/develop
feat(Metrics&Alert): 完成告警规则调整记录
2 parents 16078a2 + 188bdb0 commit 9a094d9

File tree

29 files changed

+541
-163
lines changed

29 files changed

+541
-163
lines changed

client/src/api/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import axios from 'axios'
22

33
// 创建 axios 实例
44
const api = axios.create({
5+
// baseURL: (import.meta as any).env?.VITE_API_BASE_URL || 'http://10.210.10.33:8080',
56
timeout: 10000,
67
headers: {
78
'Content-Type': 'application/json'

client/src/mock/services.ts

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,27 +1845,9 @@ loadServiceAlertStatus()
18451845
*/
18461846
export const serviceVersionAlertStatusMap: Record<string, Record<string, ServiceAlertStatus>> = {}
18471847

1848-
const saveServiceVersionAlertStatus = () => {
1849-
try {
1850-
localStorage.setItem('serviceVersionAlertStatusMap', JSON.stringify(serviceVersionAlertStatusMap))
1851-
console.log('服务版本告警状态已保存到 localStorage')
1852-
} catch (error) {
1853-
console.error('保存服务版本告警状态失败:', error)
1854-
}
1855-
}
1848+
const saveServiceVersionAlertStatus = () => {}
18561849

1857-
const loadServiceVersionAlertStatus = () => {
1858-
try {
1859-
const data = localStorage.getItem('serviceVersionAlertStatusMap')
1860-
if (data) {
1861-
const parsed = JSON.parse(data)
1862-
Object.assign(serviceVersionAlertStatusMap, parsed)
1863-
console.log('已从 localStorage 加载服务版本告警状态')
1864-
}
1865-
} catch (error) {
1866-
console.error('从 localStorage 加载服务版本告警状态失败:', error)
1867-
}
1868-
}
1850+
const loadServiceVersionAlertStatus = () => {}
18691851

18701852
/**
18711853
* 根据告警状态更新服务版本状态
@@ -1919,8 +1901,7 @@ export const clearServiceVersionAlertStatus = (serviceName: string, version?: st
19191901
console.log(`已清除服务 ${serviceName} ${version ? '版本 ' + version : '所有版本'} 的告警状态`)
19201902
}
19211903

1922-
// 页面加载时恢复服务版本告警状态
1923-
loadServiceVersionAlertStatus()
1904+
// 页面加载时不再从 localStorage 恢复服务版本告警状态(禁用持久化)
19241905

19251906
// ==================== 发布任务状态管理 ====================
19261907
// 管理服务的发布任务状态,用于显示发布指示器

client/src/views/ChangeLogView.vue

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,31 @@
8484
import { ref, computed, onMounted, watch } from 'vue'
8585
import { useAppStore, type ChangeItem, type AlarmChangeItem } from '@/stores/app'
8686
import { mockApi } from '@/mock/api'
87-
import type { DeploymentChangelogResponse, DeploymentChangelogItem, AlertRuleChangelogResponse, AlertRuleChangeItem } from '@/mock/services'
87+
import { apiService } from '@/api'
88+
import type { DeploymentChangelogResponse } from '@/mock/services'
8889
import ChangeCard from '@/components/ChangeCard.vue'
8990
import AlarmChangeCard from '@/components/AlarmChangeCard.vue'
9091
import { ArrowLeft, Loading } from '@element-plus/icons-vue'
9192
93+
interface AlertRuleChangeValue {
94+
name: string
95+
old: string
96+
new: string
97+
}
98+
99+
interface AlertRuleChangeItem {
100+
name: string
101+
editTime: string
102+
scope: string
103+
values: AlertRuleChangeValue[]
104+
reason: string
105+
}
106+
107+
interface AlertRuleChangelogResponse {
108+
items: AlertRuleChangeItem[]
109+
next?: string
110+
}
111+
92112
const appStore = useAppStore()
93113
94114
const activeTab = ref('service')
@@ -160,7 +180,7 @@ const transformAlertRuleChangelogToAlarmChangeItems = (changelogData: AlertRuleC
160180
const serviceName = item.scope?.startsWith('service:') ? item.scope.slice('service:'.length) + '服务' : '全局服务'
161181
162182
// 构建变更描述
163-
const changeDescription = item.values.map(value => {
183+
const changeDescription = item.values.map((value) => {
164184
return `${value.name}: ${value.old} -> ${value.new}`
165185
}).join(', ')
166186
@@ -200,21 +220,21 @@ const loadDeploymentChangelog = async (start?: string, limit?: number) => {
200220
}
201221
}
202222
203-
// 加载告警规则变更记录
223+
// 加载告警规则变更记录(使用真实 API)
204224
const loadAlertRuleChangelog = async (start?: string, limit?: number) => {
205225
if (alertRuleLoading.value) return // 防止重复加载
206226
207227
try {
208228
alertRuleLoading.value = true
209229
error.value = null
210230
211-
const response = await mockApi.getAlertRuleChangelog(start, limit)
212-
alertRuleChangelog.value = response
231+
const response = await apiService.getAlertRuleChangelog(start, limit ?? 10)
232+
alertRuleChangelog.value = response.data
213233
214234
// 转换数据格式
215-
alarmChangeItems.value = transformAlertRuleChangelogToAlarmChangeItems(response.items)
235+
alarmChangeItems.value = transformAlertRuleChangelogToAlarmChangeItems(response.data.items)
216236
217-
console.log('告警规则变更记录加载成功:', response)
237+
console.log('告警规则变更记录加载成功:', response.data)
218238
} catch (err) {
219239
error.value = '加载告警规则变更记录失败'
220240
console.error('加载告警规则变更记录失败:', err)

client/vite.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export default defineConfig({
1818
server: {
1919
proxy: {
2020
'/v1': {
21-
target: 'http://127.0.0.1:8080',
21+
target: 'http://10.210.10.33:8080',
2222
changeOrigin: true,
2323
secure: false,
2424
}

cmd/zeroops/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
FROM golang:1.24-alpine AS builder
1+
FROM docker.m.daocloud.io/library/golang:1.24-alpine AS builder
22
WORKDIR /src
33
COPY go.mod go.sum ./
44
RUN go mod download
55
COPY . .
66
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/zeroops ./cmd/zeroops
77

8-
FROM gcr.io/distroless/base-debian12
8+
FROM gcr.m.daocloud.io/distroless/base-debian12:nonroot
99
WORKDIR /app
1010
COPY --from=builder /out/zeroops /app/zeroops
1111
# 复制配置文件目录

cmd/zeroops/main.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"strconv"
77
"time"
88

9-
"github.com/fox-gonic/fox"
9+
"github.com/gin-gonic/gin"
1010
alertapi "github.com/qiniu/zeroops/internal/alerting/api"
1111
adb "github.com/qiniu/zeroops/internal/alerting/database"
1212
"github.com/qiniu/zeroops/internal/alerting/service/healthcheck"
@@ -96,22 +96,24 @@ func main() {
9696
go rem.Start(ctx, alertCh)
9797

9898
// start Prometheus anomaly detection scheduler
99-
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 6*time.Hour)
99+
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 5*time.Minute)
100100
promStep := parseDuration(cfg.Alerting.Prometheus.QueryStep, time.Minute)
101101
promRange := parseDuration(cfg.Alerting.Prometheus.QueryRange, 6*time.Hour)
102102
promCfg := healthcheck.NewPrometheusConfigFromApp(&cfg.Alerting.Prometheus)
103-
promClient := healthcheck.NewPrometheusClient(promCfg)
103+
anomalyDetectClient := healthcheck.NewAnomalyDetectClient(promCfg)
104104
go healthcheck.StartPrometheusScheduler(ctx, healthcheck.PrometheusDeps{
105-
DB: alertDB,
106-
PrometheusClient: promClient,
107-
Interval: promInterval,
108-
QueryStep: promStep,
109-
QueryRange: promRange,
110-
RulesetBase: cfg.Alerting.Ruleset.APIBase,
111-
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
105+
DB: alertDB,
106+
AnomalyDetectClient: anomalyDetectClient,
107+
Interval: promInterval,
108+
QueryStep: promStep,
109+
QueryRange: promRange,
110+
RulesetBase: cfg.Alerting.Ruleset.APIBase,
111+
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
112112
})
113113

114-
router := fox.New()
114+
router := gin.New()
115+
router.Use(gin.Logger())
116+
router.Use(gin.Recovery())
115117
router.Use(middleware.Authentication)
116118
alertapi.NewApiWithConfig(router, cfg)
117119
if err := serviceManagerSrv.UseApi(router); err != nil {

configs/alerting/rules.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
77
"op": ">",
88
"severity": "P0",
9-
"watch_time": "5 minutes",
9+
"watch_time": "5m",
1010
"metas": [
1111
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 1000 },
1212
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 1000 },
@@ -19,7 +19,7 @@
1919
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
2020
"op": ">",
2121
"severity": "P1",
22-
"watch_time": "4 minutes",
22+
"watch_time": "4m",
2323
"metas": [
2424
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 500 },
2525
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 500 },
@@ -31,7 +31,7 @@
3131
"description":"HTTP error rate by service P0",
3232
"op":">",
3333
"severity":"P0",
34-
"watch_time":"5 minutes",
34+
"watch_time":"5m",
3535
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
3636
"metas":[
3737
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":5},
@@ -44,7 +44,7 @@
4444
"description":"HTTP error rate by service P1",
4545
"op":">",
4646
"severity":"P1",
47-
"watch_time":"5 minutes",
47+
"watch_time":"5m",
4848
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
4949
"metas":[
5050
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":3},
File renamed without changes.

docs/alerting/api.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,49 @@ curl -X POST http://localhost:8080/v1/integrations/alertmanager/webhook \
315315
}'
316316
```
317317

318+
### 4. 获取告警规则变更记录
319+
320+
用于查询统一化告警规则的变更记录(阈值、观察窗口等),支持按时间游标分页。
321+
322+
**请求:**
323+
```http
324+
GET /v1/changelog/alertrules?start={start}&limit={limit}
325+
```
326+
327+
**查询参数:**
328+
329+
| 参数名 | 类型 | 必填 | 说明 |
330+
|--------|------|------|------|
331+
| start | string || 游标时间(ISO 8601)。第一页可不传;翻页使用上次响应的 `next` |
332+
| limit | integer || 返回数量,范围 1-100 |
333+
334+
分页说明:按 `change_time` 倒序返回,`start` 为上界(`<= start`)。响应中的 `next` 为当前页最后一条的 `editTime`
335+
336+
**响应示例:**
337+
```json
338+
{
339+
"items": [
340+
{
341+
"name": "http_request_latency_p98_seconds_P1",
342+
"editTime": "2024-01-03T03:00:00Z",
343+
"scope": "",
344+
"values": [
345+
{"name": "threshold", "old": "10", "new": "15"}
346+
],
347+
"reason": "Update"
348+
}
349+
],
350+
"next": "2024-01-03T03:00:00Z"
351+
}
352+
```
353+
354+
**状态码:**
355+
- `200 OK`: 成功
356+
- `400 Bad Request`: 参数错误
357+
- `401 Unauthorized`: 认证失败
358+
- `500 Internal Server Error`: 服务器内部错误
359+
318360
## 版本历史
319361

362+
- **v1.1** (2025-10-07): 新增 `GET /v1/changelog/alertrules`
320363
- **v1.0** (2025-09-11): 初始版本,支持基础的告警列表和详情查询

docs/alerting/database-design.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
## 数据表设计
1515

16-
### 1) talert_issues(告警问题表)
16+
### 1) alert_issues(告警问题表)
1717

1818
存储告警问题的主要信息。
1919

@@ -23,7 +23,7 @@
2323
| state | enum(Closed, Open) | 问题状态 |
2424
| level | varchar(32) | 告警等级:如 P0/P1/Px |
2525
| alert_state | enum(Pending, Restored, AutoRestored, InProcessing) | 处理状态 |
26-
| title | varchar(255) | 告警标题 |
26+
| title | varchar(255) | 告警标题
2727
| labels | json | 标签,格式:[{key, value}] |
2828
| alert_since | TIMESTAMP(6) | 告警发生时间 |
2929
| resolved_at | TIMESTAMP(6) | 告警结束时间 |
@@ -64,8 +64,6 @@
6464
| labels | text | labels 的 JSON 字符串表示(规范化后) |
6565
| old_threshold | numeric | 旧阈值(可空) |
6666
| new_threshold | numeric | 新阈值(可空) |
67-
| old_watch | interval | 旧观察窗口(可空) |
68-
| new_watch | interval | 新观察窗口(可空) |
6967

7068

7169
**索引建议:**

0 commit comments

Comments
 (0)