Skip to content

Commit 5271f6c

Browse files
authored
Merge pull request #100 from BattleL/dev/fix-client-err
feat: set custom monitor clent and retry when GetMonitorData context…
2 parents fc341cf + c7420de commit 5271f6c

File tree

3 files changed

+65
-18
lines changed

3 files changed

+65
-18
lines changed

pkg/client/client.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package client
22

33
import (
4-
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/regions"
4+
"net"
55
"net/http"
66
"net/url"
7+
"time"
8+
9+
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/regions"
710

811
cbs "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/cbs/v20170312"
912
cdb "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/cdb/v20170320"
@@ -45,7 +48,29 @@ func NewMonitorClient(cred common.CredentialIface, conf *config.TencentConfig, r
4548
} else {
4649
cpf.HttpProfile.Endpoint = "monitor.tencentcloudapi.com"
4750
}
48-
return monitor.NewClient(cred, region, cpf)
51+
return newClient(cred, region, cpf)
52+
}
53+
54+
func newClient(credential common.CredentialIface,
55+
region string, clientProfile *profile.ClientProfile) (client *monitor.Client, err error) {
56+
client = &monitor.Client{}
57+
transport := &http.Transport{
58+
Proxy: http.ProxyFromEnvironment,
59+
DialContext: (&net.Dialer{
60+
Timeout: 30 * time.Second,
61+
KeepAlive: 5 * time.Second,
62+
}).DialContext,
63+
ForceAttemptHTTP2: true,
64+
MaxIdleConns: 0,
65+
IdleConnTimeout: 30 * time.Second,
66+
TLSHandshakeTimeout: 30 * time.Second,
67+
ExpectContinueTimeout: 1 * time.Second,
68+
}
69+
clientProfile.HttpProfile.ReqTimeout = 5
70+
client.Init(region).
71+
WithCredential(credential).
72+
WithProfile(clientProfile).WithHttpTransport(transport)
73+
return
4974
}
5075

5176
func NewMongodbClient(cred common.CredentialIface, conf *config.TencentConfig) (*mongodb.Client, error) {

pkg/collector/product.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,13 +244,14 @@ func (c *TcProductCollector) Collect(ch chan<- prometheus.Metric) (err error) {
244244
for _, query := range c.Querys {
245245
go func(q *metric.TcmQuery) {
246246
defer wg.Done()
247-
pms, err := q.GetPromMetrics()
248-
if err != nil {
247+
pms, err0 := q.GetPromMetrics()
248+
if err0 != nil {
249249
level.Error(c.logger).Log(
250250
"msg", "Get samples fail",
251251
"err", err,
252252
"metric", q.Metric.Id,
253253
)
254+
err = err0
254255
} else {
255256
for _, pm := range pms {
256257
ch <- pm

pkg/metric/repository.go

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package metric
33
import (
44
"context"
55
"fmt"
6+
"strings"
67
"time"
78

89
"github.com/tencentyun/tencentcloud-exporter/pkg/util"
@@ -134,15 +135,12 @@ func (repo *TcmMetricRepositoryImpl) GetSamples(s *TcmSeries, st int64, et int64
134135
request.EndTime = &etStr
135136
}
136137

138+
start := time.Now()
137139
response := &v20180724.GetMonitorDataResponse{}
138-
if repo.IsInternational && s.Metric.Meta.ProductName == "QAAP" {
139-
response, err = repo.monitorClientInSinapore.GetMonitorData(request)
140-
} else if util.IsStrInList(config.QcloudNamespace, s.Metric.Meta.ProductName) {
141-
response, err = repo.monitorClientInGuangzhou.GetMonitorData(request)
142-
} else {
143-
response, err = repo.monitorClient.GetMonitorData(request)
144-
}
140+
response, err = repo.getMonitorDataWithRetry(s.Metric.Meta.ProductName, request)
145141
if err != nil {
142+
level.Error(repo.logger).Log(
143+
"request start time ", stStr, "duration ", time.Since(start).Seconds(), "err ", err.Error())
146144
return
147145
}
148146

@@ -157,6 +155,29 @@ func (repo *TcmMetricRepositoryImpl) GetSamples(s *TcmSeries, st int64, et int64
157155
return
158156
}
159157

158+
func (repo *TcmMetricRepositoryImpl) getMonitorDataWithRetry(
159+
productName string, request *monitor.GetMonitorDataRequest) (*v20180724.GetMonitorDataResponse, error) {
160+
var lastErr error
161+
monitorClient := repo.monitorClient
162+
if repo.IsInternational && productName == "QAAP" {
163+
monitorClient = repo.monitorClientInSinapore
164+
} else if util.IsStrInList(config.QcloudNamespace, productName) {
165+
monitorClient = repo.monitorClientInGuangzhou
166+
}
167+
for i := 0; i < 3; i++ {
168+
resp, err := monitorClient.GetMonitorData(request)
169+
if err != nil {
170+
if strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
171+
lastErr = err
172+
continue
173+
}
174+
return nil, err
175+
}
176+
return resp, nil
177+
}
178+
return nil, lastErr
179+
}
180+
160181
func (repo *TcmMetricRepositoryImpl) ListSamples(m *TcmMetric, st int64, et int64) ([]*TcmSamples, error) {
161182
var samplesList []*TcmSamples
162183
for _, seriesList := range m.GetSeriesSplitByBatch(repo.queryMetricBatchSize) {
@@ -188,17 +209,17 @@ func (repo *TcmMetricRepositoryImpl) listSampleByBatch(
188209

189210
request := repo.buildGetMonitorDataRequest(m, seriesList, st, et)
190211

212+
start := time.Now()
191213
response := &v20180724.GetMonitorDataResponse{}
192-
if repo.IsInternational && m.Meta.ProductName == "QAAP" {
193-
response, err = repo.monitorClientInSinapore.GetMonitorData(request)
194-
} else if util.IsStrInList(config.QcloudNamespace, m.Meta.ProductName) {
195-
response, err = repo.monitorClientInGuangzhou.GetMonitorData(request)
196-
} else {
197-
response, err = repo.monitorClient.GetMonitorData(request)
198-
}
214+
response, err = repo.getMonitorDataWithRetry(m.Meta.ProductName, request)
199215
if err != nil {
216+
level.Error(repo.logger).Log(
217+
"request start time ", *request.StartTime,
218+
"duration ", time.Since(start).Seconds(),
219+
"err ", err.Error())
200220
return nil, err
201221
}
222+
202223
for _, points := range response.Response.DataPoints {
203224
samples, ql, e := repo.buildSamples(m, points)
204225
if e != nil {

0 commit comments

Comments
 (0)