Skip to content

Commit 822ff84

Browse files
committed
fix dimensions not match(DescribeBaseMetrics/GetMonitorData) bug
1 parent 044b83d commit 822ff84

File tree

6 files changed

+87
-51
lines changed

6 files changed

+87
-51
lines changed

pkg/collector/product.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ func (c *TcProductCollector) LoadMetricsByProductConf() error {
9696
level.Error(c.logger).Log("msg", "create metric series err", "err", err, "Namespace", c.Namespace, "name", mname)
9797
continue
9898
}
99+
level.Info(c.logger).Log("msg", "found instances", "count", len(series), "Namespace", c.Namespace, "name", mname)
99100
err = nm.LoadSeries(series)
100101
if err != nil {
101102
level.Error(c.logger).Log("msg", "load metric series err", "err", err, "Namespace", c.Namespace, "name", mname)
@@ -227,7 +228,7 @@ func (c *TcProductCollector) initQuerys() (err error) {
227228
return e
228229
}
229230
c.Querys = append(c.Querys, q)
230-
numSeries += len(q.Metric.Series)
231+
numSeries += len(q.Metric.SeriesCache.Series)
231232
}
232233
level.Info(c.logger).Log("msg", "Init all query ok", "Namespace", c.Namespace, "numMetric", len(c.Querys), "numSeries", numSeries)
233234
return
@@ -276,11 +277,13 @@ func (r *TcProductCollectorReloader) Run() {
276277
time.Sleep(r.relodInterval)
277278

278279
for {
280+
level.Info(r.logger).Log("msg", "start reload product metadata", "Namespace", r.collector.Namespace)
279281
e := r.reloadMetricsByProductConf()
280282
if e != nil {
281283
level.Error(r.logger).Log("msg", "reload product error", "err", e,
282284
"namespace", r.collector.Namespace)
283285
}
286+
level.Info(r.logger).Log("msg", "complete reload product metadata", "Namespace", r.collector.Namespace)
284287
select {
285288
case <-r.ctx.Done():
286289
return

pkg/config/config.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,10 @@ type TencentProduct struct {
106106
}
107107

108108
func (p *TencentProduct) IsReloadEnable() bool {
109-
if len(p.OnlyIncludeMetrics) > 0 {
110-
return false
111-
}
112109
if util.IsStrInList(constant.NotSupportInstanceNamespaces, p.Namespace) {
113110
return false
114111
}
115-
return p.AllInstances
112+
return true
116113
}
117114

118115
type TencentConfig struct {

pkg/metric/label.go

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ type TcmLabels struct {
3030
}
3131

3232
// 根据标签名, 获取所有标签的值
33-
func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance) (values []string, err error) {
33+
func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance) map[string]string {
3434
lowerKeyFilters := map[string]string{}
3535
for k, v := range filters {
3636
lowerKeyFilters[strings.ToLower(k)] = v
@@ -41,25 +41,18 @@ func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance
4141
v, ok := lowerKeyFilters[strings.ToLower(name)]
4242
if ok {
4343
nameValues[name] = v
44-
} else {
45-
nameValues[name] = ""
4644
}
4745
}
4846
for _, name := range l.instanceLabelNames {
4947
v, e := ins.GetFieldValueByName(name)
50-
if e != nil {
51-
nameValues[name] = ""
52-
} else {
48+
if e == nil && v != "" {
5349
nameValues[name] = v
5450
}
5551
}
5652
for name, value := range l.constLabels {
5753
nameValues[name] = value
5854
}
59-
for _, name := range l.Names {
60-
values = append(values, nameValues[name])
61-
}
62-
return
55+
return nameValues
6356
}
6457

6558
func NewTcmLabels(qln []string, iln []string, cl Labels) (*TcmLabels, error) {

pkg/metric/metric.go

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,31 @@ import (
1010
"github.com/tencentyun/tencentcloud-exporter/pkg/util"
1111
)
1212

13+
type SeriesCache struct {
14+
Series map[string]*TcmSeries // 包含的多个时间线
15+
// need cache it, because some cases DescribeBaseMetrics/GetMonitorData dims not match
16+
LabelNames map[string]struct{}
17+
}
18+
19+
func newCache() *SeriesCache {
20+
return &SeriesCache{
21+
Series: make(map[string]*TcmSeries),
22+
LabelNames: make(map[string]struct{}),
23+
}
24+
}
25+
26+
type Desc struct {
27+
FQName string
28+
Help string
29+
}
30+
1331
// 代表一个指标, 包含多个时间线
1432
type TcmMetric struct {
1533
Id string
16-
Meta *TcmMeta // 指标元数据
17-
Labels *TcmLabels // 指标labels
18-
Series map[string]*TcmSeries // 包含的多个时间线
19-
StatPromDesc map[string]*prometheus.Desc // 按统计纬度的Desc, max、min、avg、last
34+
Meta *TcmMeta // 指标元数据
35+
Labels *TcmLabels // 指标labels
36+
SeriesCache *SeriesCache
37+
StatPromDesc map[string]Desc // 按统计纬度的Desc, max、min、avg、last
2038
Conf *TcmMetricConfig
2139
seriesLock sync.Mutex
2240
}
@@ -25,11 +43,16 @@ func (m *TcmMetric) LoadSeries(series []*TcmSeries) error {
2543
m.seriesLock.Lock()
2644
defer m.seriesLock.Unlock()
2745

28-
newSeries := make(map[string]*TcmSeries)
46+
newSeriesCache := newCache()
47+
2948
for _, s := range series {
30-
newSeries[s.Id] = s
49+
newSeriesCache.Series[s.Id] = s
50+
// add label names
51+
for key, _ := range s.QueryLabels {
52+
newSeriesCache.LabelNames[key] = struct{}{}
53+
}
3154
}
32-
m.Series = newSeries
55+
m.SeriesCache = newSeriesCache
3356
return nil
3457
}
3558

@@ -73,21 +96,34 @@ func (m *TcmMetric) GetLatestPromMetrics(repo TcmMetricRepository) (pms []promet
7396
return nil, err
7497
}
7598
}
76-
values, err := m.Labels.GetValues(samples.Series.QueryLabels, samples.Series.Instance)
77-
if err != nil {
78-
return nil, err
99+
labels := m.Labels.GetValues(samples.Series.QueryLabels, samples.Series.Instance)
100+
// add all dimensions from cloud monitor into prom labels
101+
for _, dim := range point.Dimensions {
102+
labels[*dim.Name] = *dim.Value
79103
}
104+
var names []string
105+
var values []string
106+
for k, v := range labels {
107+
names = append(names, util.ToUnderlineLower(k))
108+
values = append(values, v)
109+
}
110+
newDesc := prometheus.NewDesc(
111+
desc.FQName,
112+
desc.Help,
113+
names,
114+
nil,
115+
)
80116
var pm prometheus.Metric
81117
if m.Conf.StatDelaySeconds > 0 {
82118
pm = prometheus.NewMetricWithTimestamp(time.Unix(int64(point.Timestamp), 0), prometheus.MustNewConstMetric(
83-
desc,
119+
newDesc,
84120
prometheus.GaugeValue,
85121
point.Value,
86122
values...,
87123
))
88124
} else {
89125
pm = prometheus.MustNewConstMetric(
90-
desc,
126+
newDesc,
91127
prometheus.GaugeValue,
92128
point.Value,
93129
values...,
@@ -102,7 +138,7 @@ func (m *TcmMetric) GetLatestPromMetrics(repo TcmMetricRepository) (pms []promet
102138

103139
func (m *TcmMetric) GetSeriesSplitByBatch(batch int) (steps [][]*TcmSeries) {
104140
var series []*TcmSeries
105-
for _, s := range m.Series {
141+
for _, s := range m.SeriesCache.Series {
106142
series = append(series, s)
107143
}
108144

@@ -130,7 +166,7 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
130166
return nil, err
131167
}
132168

133-
statDescs := make(map[string]*prometheus.Desc)
169+
statDescs := make(map[string]Desc)
134170
statType, err := meta.GetStatType(conf.StatPeriodSeconds)
135171
if err != nil {
136172
return nil, err
@@ -142,10 +178,6 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
142178
statType,
143179
*meta.m.Meaning.Zh,
144180
)
145-
var lnames []string
146-
for _, name := range labels.Names {
147-
lnames = append(lnames, util.ToUnderlineLower(name))
148-
}
149181
for _, s := range conf.StatTypes {
150182
var st string
151183
if s == "last" {
@@ -176,20 +208,18 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
176208
st,
177209
)
178210
fqName = strings.ToLower(fqName)
179-
desc := prometheus.NewDesc(
180-
fqName,
181-
help,
182-
lnames,
183-
nil,
184-
)
185-
statDescs[strings.ToLower(s)] = desc
211+
statDescs[strings.ToLower(s)] = Desc{
212+
FQName: fqName,
213+
Help: help,
214+
}
186215
}
187216

188217
m := &TcmMetric{
189-
Id: id,
190-
Meta: meta,
191-
Labels: labels,
192-
Series: map[string]*TcmSeries{},
218+
Id: id,
219+
Meta: meta,
220+
Labels: labels,
221+
SeriesCache: newCache(),
222+
193223
StatPromDesc: statDescs,
194224
Conf: conf,
195225
}

pkg/metric/repository.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,15 +228,22 @@ func (repo *TcmMetricRepositoryImpl) buildSamples(
228228
) (*TcmSamples, map[string]string, error) {
229229
ql := map[string]string{}
230230
for _, dimension := range points.Dimensions {
231+
name := *dimension.Name
231232
if *dimension.Value != "" {
232-
ql[*dimension.Name] = *dimension.Value
233+
_, ok := m.SeriesCache.LabelNames[name]
234+
if !ok {
235+
// if not in query label names, need ignore it
236+
// because series id = query labels md5
237+
continue
238+
}
239+
ql[name] = *dimension.Value
233240
}
234241
}
235242
sid, e := GetTcmSeriesId(m, ql)
236243
if e != nil {
237244
return nil, ql, fmt.Errorf("get series id fail")
238245
}
239-
s, ok := m.Series[sid]
246+
s, ok := m.SeriesCache.Series[sid]
240247
if !ok {
241248
return nil, ql, fmt.Errorf("response data point not match series")
242249
}

pkg/metric/sample.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import (
88

99
// 代表一个数据点
1010
type TcmSample struct {
11-
Timestamp float64
12-
Value float64
11+
Timestamp float64
12+
Value float64
13+
Dimensions []*monitor.Dimension
1314
}
1415

1516
// 代表一个时间线的多个数据点
@@ -57,8 +58,9 @@ func (s *TcmSamples) GetAvgPoint() (point *TcmSample, err error) {
5758
}
5859
avg := sum / float64(len(s.Samples))
5960
sample := &TcmSample{
60-
Timestamp: s.Samples[len(s.Samples)-1].Timestamp,
61-
Value: avg,
61+
Timestamp: s.Samples[len(s.Samples)-1].Timestamp,
62+
Value: avg,
63+
Dimensions: s.Samples[len(s.Samples)-1].Dimensions,
6264
}
6365
return sample, nil
6466
}
@@ -78,7 +80,11 @@ func NewTcmSamples(series *TcmSeries, p *monitor.DataPoint) (s *TcmSamples, err
7880
}
7981

8082
for i := 0; i < len(p.Timestamps); i++ {
81-
s.Samples = append(s.Samples, &TcmSample{*p.Timestamps[i], *p.Values[i]})
83+
s.Samples = append(s.Samples, &TcmSample{
84+
Timestamp: *p.Timestamps[i],
85+
Value: *p.Values[i],
86+
Dimensions: p.Dimensions,
87+
})
8288
}
8389
return
8490
}

0 commit comments

Comments
 (0)