Skip to content

Commit 0971950

Browse files
authored
Automated cherry pick of #23878: feat(monitor): 支持指定过去时间段返回报警资源最多的top5监控策略 (#23880)
* feat(monitor): 支持指定过去时间段返回报警资源最多的top5监控策略 * feat(monitor): 支持指定过去时间段返回报警数量最多的top5资源 * feat(monitor): 支持指定过去时间段某监控策略下各监控指标报警资源最多的top5资源 * feat(monitor): 支持获取过去时间段各项目下报警资源数量
1 parent 201a1e3 commit 0971950

File tree

13 files changed

+672
-14
lines changed

13 files changed

+672
-14
lines changed

cmd/climc/shell/monitor/alertrecord.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ func init() {
2525
cmd.Show(new(options.AlertRecordShowOptions))
2626
cmd.GetProperty(new(options.AlertRecordTotalOptions))
2727
cmd.GetProperty(new(options.AlertRecordHistoryAlertOptions))
28+
cmd.GetProperty(new(options.AlertRecordProjectAlertResourceCountOptions))
2829
}

pkg/apis/monitor/alertrecord.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,27 @@ func (self AlertRecordHistoryAlertData) GetMetricTags() map[string]string {
111111
type AlertRecordHistoryAlert struct {
112112
Data []AlertRecordHistoryAlertData `json:"data"`
113113
}
114+
115+
// ProjectAlertResourceCountData 报警资源统计数据(按 scope 分类)
116+
type ProjectAlertResourceCountData struct {
117+
Scope string `json:"scope"` // system/domain/project
118+
DomainId string `json:"domain_id"` // 域ID(domain/project scope 时有效)
119+
Domain string `json:"domain"` // 域名称(domain/project scope 时有效)
120+
ProjectId string `json:"project_id"` // 项目ID(project scope 时有效)
121+
Project string `json:"project"` // 项目名称(project scope 时有效)
122+
ResCount int64 `json:"res_count"` // 报警资源数量
123+
}
124+
125+
// ProjectAlertResourceCount 报警资源统计结果
126+
type ProjectAlertResourceCount struct {
127+
Data []ProjectAlertResourceCountData `json:"data"`
128+
}
129+
130+
// ProjectAlertResourceCountInput 项目报警资源统计查询输入
131+
type ProjectAlertResourceCountInput struct {
132+
StartTime time.Time `json:"start_time"`
133+
EndTime time.Time `json:"end_time"`
134+
ResType string `json:"res_type"`
135+
AlertId string `json:"alert_id"`
136+
Scope string `json:"scope"`
137+
}

pkg/apis/monitor/commalert.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
package monitor
1616

17+
import (
18+
time "time"
19+
)
20+
1721
const (
1822
ALERT_STATUS_READY = "ready"
1923
ALERT_STATUS_DELETE = "start_delete"
@@ -113,6 +117,16 @@ type CommonAlertQuery struct {
113117
Operator string `json:"operator"`
114118
}
115119

120+
// TopQueryInput 用于 top 查询的通用时间段和 top 参数
121+
type TopQueryInput struct {
122+
// 查询时间段开始时间
123+
StartTime time.Time `json:"start_time"`
124+
// 查询时间段结束时间
125+
EndTime time.Time `json:"end_time"`
126+
// 返回 top N(默认 5)
127+
Top *int `json:"top"`
128+
}
129+
116130
type CommonAlertListInput struct {
117131
AlertListInput
118132
//V1AlertListInput
@@ -125,6 +139,8 @@ type CommonAlertListInput struct {
125139
ResType []string `json:"res_type"`
126140
UsedBy string `json:"used_by"`
127141
Name string `json:"name"`
142+
// Top 查询参数(用于统计报警资源最多的监控策略)
143+
TopQueryInput
128144
}
129145

130146
type CommonAlertUpdateInput struct {

pkg/apis/monitor/monitor_resource.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ type MonitorResourceListInput struct {
4545
AlertStates []string `json:"alert_states"`
4646

4747
ResName string `json:"res_name"`
48+
// Top 查询参数(用于统计报警数量最多的资源)
49+
TopQueryInput
4850
}
4951

5052
type MonitorResourceDetails struct {

pkg/apis/monitor/monitor_resource_alert.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ type MonitorResourceJointListInput struct {
3838
Level string `json:"level"`
3939
// 查询所有状态
4040
AllState bool `json:"all_state"`
41+
// Top 查询参数(用于统计各监控指标报警资源最多的资源)
42+
TopQueryInput
4143
}
4244

4345
type MonitorResourceJointCreateInput struct {

pkg/mcclient/options/monitor/alertrecord.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,22 @@ func (o *AlertRecordHistoryAlertOptions) Property() string {
7777
return "history-alert"
7878
}
7979

80+
type AlertRecordProjectAlertResourceCountOptions struct {
81+
StartTime time.Time `help:"start time (RFC3339 format)" json:"start_time" default:"2025-01-01 00:00:00"`
82+
EndTime time.Time `help:"end time (RFC3339 format)" json:"end_time" default:"2025-01-01 00:00:00"`
83+
ResType string `help:"resource type" json:"res_type"`
84+
AlertId string `help:"alert id" json:"alert_id"`
85+
Scope string `help:"scope" json:"scope" choices:"system|domain|project"`
86+
}
87+
88+
func (o *AlertRecordProjectAlertResourceCountOptions) Params() (jsonutils.JSONObject, error) {
89+
return options.StructToParams(o)
90+
}
91+
92+
func (o *AlertRecordProjectAlertResourceCountOptions) Property() string {
93+
return "project-alert-resource-count"
94+
}
95+
8096
type AlertRecordShieldListOptions struct {
8197
options.BaseListOptions
8298

pkg/mcclient/options/monitor/commonalert.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
package monitor
1616

1717
import (
18+
"time"
19+
1820
"yunion.io/x/jsonutils"
1921
"yunion.io/x/pkg/errors"
2022

@@ -27,9 +29,12 @@ import (
2729
type CommonAlertListOptions struct {
2830
options.BaseListOptions
2931
// 报警类型
30-
AlertType string `help:"common alert type" choices:"normal|system"`
31-
Level string `help:"common alert notify level" choices:"normal|important|fatal"`
32-
MonitorResourceId []string `help:"monitor resource id"`
32+
AlertType string `help:"common alert type" choices:"normal|system"`
33+
Level string `help:"common alert notify level" choices:"normal|important|fatal"`
34+
MonitorResourceId []string `help:"monitor resource id"`
35+
StartTime time.Time `help:"start time, format: 2025-01-01 00:00:00" json:"start_time"`
36+
EndTime time.Time `help:"end time, format: 2025-01-01 00:00:00" json:"end_time"`
37+
Top int `help:"top" json:"top"`
3338
}
3439

3540
func (o *CommonAlertListOptions) Params() (jsonutils.JSONObject, error) {

pkg/mcclient/options/monitor/monitor_resource.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
package monitor
1616

1717
import (
18+
"time"
19+
1820
"yunion.io/x/jsonutils"
1921

2022
"yunion.io/x/onecloud/pkg/mcclient/options"
@@ -33,10 +35,13 @@ func (o *MonitorResourceJointAlertOptions) Property() string {
3335

3436
type MonitorResourceListOptions struct {
3537
options.BaseListOptions
36-
ResType string `help:"filter by resource type" json:"res_type"`
37-
ResId []string `help:"filter by resource id" json:"res_id"`
38-
ResName string `help:"filter by resource name" json:"res_name"`
39-
AlertStates []string `help:"filter by alert state" json:"alert_states"`
38+
ResType string `help:"filter by resource type" json:"res_type"`
39+
ResId []string `help:"filter by resource id" json:"res_id"`
40+
ResName string `help:"filter by resource name" json:"res_name"`
41+
AlertStates []string `help:"filter by alert state" json:"alert_states"`
42+
StartTime time.Time `help:"start time for top query, format: 2025-01-01 00:00:00" json:"start_time"`
43+
EndTime time.Time `help:"end time for top query, format: 2025-01-01 00:00:00" json:"end_time"`
44+
Top int `help:"return top N resources by alert count (default: 5)" json:"top"`
4045
}
4146

4247
func (o *MonitorResourceListOptions) Params() (jsonutils.JSONObject, error) {

pkg/mcclient/options/monitor/monitor_resource_alert.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,24 @@
1515
package monitor
1616

1717
import (
18+
"time"
19+
1820
"yunion.io/x/jsonutils"
1921

2022
"yunion.io/x/onecloud/pkg/mcclient/options"
2123
)
2224

2325
type MonitorResourceAlertListOptions struct {
2426
options.BaseListOptions
25-
MonitorResourceId string `help:"ID of monitor resource" json:"monitor_resource_id"`
26-
AlertId string `help:"ID of alert" json:"alert_id"`
27-
Alerting bool `help:"search alerting resource" json:"alerting"`
28-
SendState string `json:"send_state"`
29-
AllState bool `help:"Show all state" json:"all_state"`
30-
Ip string `help:"IP address" json:"ip"`
27+
MonitorResourceId string `help:"ID of monitor resource" json:"monitor_resource_id"`
28+
AlertId string `help:"ID of alert" json:"alert_id"`
29+
Alerting bool `help:"search alerting resource" json:"alerting"`
30+
SendState string `json:"send_state"`
31+
AllState bool `help:"Show all state" json:"all_state"`
32+
Ip string `help:"IP address" json:"ip"`
33+
StartTime time.Time `help:"start time for top query, format: 2025-01-01 00:00:00" json:"start_time"`
34+
EndTime time.Time `help:"end time for top query, format: 2025-01-01 00:00:00" json:"end_time"`
35+
Top int `help:"return top N resources by alert count (default: 5)" json:"top"`
3136
}
3237

3338
func (o *MonitorResourceAlertListOptions) GetMasterOpt() string {

pkg/monitor/models/alertrecord.go

Lines changed: 170 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,16 @@ func (record *SAlertRecord) CustomizeCreate(
330330
query jsonutils.JSONObject,
331331
data jsonutils.JSONObject,
332332
) error {
333-
err := record.SMonitorScopedResource.CustomizeCreate(ctx, userCred, ownerId, query, data)
333+
/*err := record.SMonitorScopedResource.CustomizeCreate(ctx, userCred, ownerId, query, data)
334334
if err != nil {
335335
return err
336+
}*/
337+
alert, err := AlertManager.GetAlert(record.AlertId)
338+
if err != nil {
339+
return errors.Wrapf(err, "GetAlert %s", record.AlertId)
336340
}
341+
record.DomainId = alert.GetDomainId()
342+
record.ProjectId = alert.GetProjectId()
337343
obj, err := db.NewModelObject(AlertRecordManager)
338344
if err != nil {
339345
return errors.Wrapf(err, "NewModelObject %s", AlertRecordManager.Keyword())
@@ -532,3 +538,166 @@ func (manager *SAlertRecordManager) GetPropertyHistoryAlert(
532538
}
533539
return result, nil
534540
}
541+
542+
// GetPropertyProjectAlertResourceCount 获取指定时间段内各项目下的报警资源数量
543+
func (manager *SAlertRecordManager) GetPropertyProjectAlertResourceCount(
544+
ctx context.Context,
545+
userCred mcclient.TokenCredential,
546+
input monitor.ProjectAlertResourceCountInput,
547+
) (*monitor.ProjectAlertResourceCount, error) {
548+
// 验证时间段参数
549+
if input.StartTime.IsZero() || input.EndTime.IsZero() {
550+
return nil, httperrors.NewInputParameterError("start_time and end_time must be specified")
551+
}
552+
if input.StartTime.After(input.EndTime) {
553+
return nil, httperrors.NewInputParameterError("start_time must be before end_time")
554+
}
555+
556+
// 构建查询
557+
q := manager.Query()
558+
q = q.GE("created_at", input.StartTime).LE("created_at", input.EndTime)
559+
q = q.IsNotEmpty("res_ids")
560+
561+
// 应用权限过滤
562+
scope := rbacscope.ScopeSystem
563+
if input.Scope != "" {
564+
scope = rbacscope.TRbacScope(input.Scope)
565+
}
566+
q = manager.SMonitorScopedResourceManager.FilterByOwner(ctx, q, manager, userCred, userCred, scope)
567+
568+
// 如果指定了 ResType,添加过滤条件
569+
if input.ResType != "" {
570+
q = q.Equals("res_type", input.ResType)
571+
}
572+
573+
// 如果指定了 AlertId,添加过滤条件
574+
if input.AlertId != "" {
575+
q = q.Equals("alert_id", input.AlertId)
576+
}
577+
578+
// 执行查询获取所有记录
579+
alerts := make([]SAlertRecord, 0)
580+
err := q.All(&alerts)
581+
if err != nil {
582+
return nil, errors.Wrap(err, "query alert records")
583+
}
584+
585+
// 按 scope 分组统计唯一资源数量
586+
// systemResourceSet = set of resource IDs (system scope)
587+
// domainResourceSet[domainId] = set of resource IDs (domain scope)
588+
// projectResourceSet[domainId][projectId] = set of resource IDs (project scope)
589+
systemResourceSet := sets.NewString()
590+
domainResourceSet := make(map[string]sets.String)
591+
projectResourceSet := make(map[string]map[string]sets.String)
592+
domainIds := sets.NewString()
593+
projectIds := sets.NewString()
594+
595+
for _, alert := range alerts {
596+
if len(alert.ResIds) == 0 {
597+
continue
598+
}
599+
domainId := alert.DomainId
600+
projectId := alert.ProjectId
601+
602+
// 解析 res_ids(逗号分隔)
603+
resIds := strings.Split(alert.ResIds, ",")
604+
for _, resId := range resIds {
605+
resId = strings.TrimSpace(resId)
606+
if len(resId) == 0 {
607+
continue
608+
}
609+
610+
// 根据 domainId 和 projectId 判断 scope
611+
if domainId == "" && projectId == "" {
612+
// system scope
613+
systemResourceSet.Insert(resId)
614+
} else if domainId != "" && projectId == "" {
615+
// domain scope
616+
domainIds.Insert(domainId)
617+
if domainResourceSet[domainId] == nil {
618+
domainResourceSet[domainId] = sets.NewString()
619+
}
620+
domainResourceSet[domainId].Insert(resId)
621+
} else if domainId != "" && projectId != "" {
622+
// project scope
623+
domainIds.Insert(domainId)
624+
projectIds.Insert(projectId)
625+
if projectResourceSet[domainId] == nil {
626+
projectResourceSet[domainId] = make(map[string]sets.String)
627+
}
628+
if projectResourceSet[domainId][projectId] == nil {
629+
projectResourceSet[domainId][projectId] = sets.NewString()
630+
}
631+
projectResourceSet[domainId][projectId].Insert(resId)
632+
}
633+
}
634+
}
635+
636+
// 获取项目和域的名称
637+
domainMap := make(map[string]string)
638+
if domainIds.Len() > 0 {
639+
domains := []db.STenant{}
640+
err = db.TenantCacheManager.GetDomainQuery().In("id", domainIds.List()).All(&domains)
641+
if err != nil {
642+
return nil, errors.Wrap(err, "GetDomainQuery.In.All")
643+
}
644+
for _, domain := range domains {
645+
domainMap[domain.Id] = domain.Name
646+
}
647+
}
648+
649+
projectMap := make(map[string]string)
650+
if projectIds.Len() > 0 {
651+
projects := []db.STenant{}
652+
err = db.TenantCacheManager.GetTenantQuery().In("id", projectIds.List()).All(&projects)
653+
if err != nil {
654+
return nil, errors.Wrap(err, "GetTenantQuery.In.All")
655+
}
656+
for _, project := range projects {
657+
projectMap[project.Id] = project.Name
658+
}
659+
}
660+
661+
// 构建返回结果
662+
result := &monitor.ProjectAlertResourceCount{
663+
Data: make([]monitor.ProjectAlertResourceCountData, 0),
664+
}
665+
666+
// system scope
667+
if systemResourceSet.Len() > 0 {
668+
result.Data = append(result.Data, monitor.ProjectAlertResourceCountData{
669+
Scope: string(rbacscope.ScopeSystem),
670+
ResCount: int64(systemResourceSet.Len()),
671+
})
672+
}
673+
674+
// domain scope
675+
for domainId, resourceSet := range domainResourceSet {
676+
if resourceSet.Len() > 0 {
677+
result.Data = append(result.Data, monitor.ProjectAlertResourceCountData{
678+
Scope: string(rbacscope.ScopeDomain),
679+
DomainId: domainId,
680+
Domain: domainMap[domainId],
681+
ResCount: int64(resourceSet.Len()),
682+
})
683+
}
684+
}
685+
686+
// project scope
687+
for domainId, projects := range projectResourceSet {
688+
for projectId, resourceSet := range projects {
689+
if resourceSet.Len() > 0 {
690+
result.Data = append(result.Data, monitor.ProjectAlertResourceCountData{
691+
Scope: string(rbacscope.ScopeProject),
692+
DomainId: domainId,
693+
Domain: domainMap[domainId],
694+
ProjectId: projectId,
695+
Project: projectMap[projectId],
696+
ResCount: int64(resourceSet.Len()),
697+
})
698+
}
699+
}
700+
}
701+
702+
return result, nil
703+
}

0 commit comments

Comments
 (0)