Skip to content

Commit 9b3c8fb

Browse files
meilirensheng2020jackblack369
authored andcommitted
[refact][monitor] Automatically generate monitor target.
1 parent 88274f1 commit 9b3c8fb

File tree

6 files changed

+100
-145
lines changed

6 files changed

+100
-145
lines changed

scripts/monitor/README.md

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ monitor
88
├── grafana # grafana相关目录
99
│ ├── dashboards # grafana所有dashboards的json文件存放目录,grafana将从该目录加载文件来创建dashboards;
1010
| | |
11-
│ │ ├── etcd.json
1211
│ │ ├── mds.json
13-
│ │ ├── metaserver.json
12+
│ │ ├── remotecache.json
1413
│ │ └── clinet.json
1514
│ ├── grafana.ini # grafana的启动配置文件,将映射到容器的 `/etc/grafana/grafana.ini` 上
1615
│ ├── provisioning # grafana预配置相关目录,将映射到容器的`/etc/grafana/provisioning`上
@@ -114,7 +113,7 @@ ff6f895bf9b1 grafana/grafana "/run.sh"
114113

115114
* 手工修改
116115

117-
你可以手工修改监控目标文件targets.json和etcd_targets.json来新增或者删除目标,如下所示:
116+
你可以手工修改监控目标文件targets.json来新增或者删除目标,如下所示:
118117

119118
targets.json:
120119
```
@@ -129,16 +128,6 @@ targets.json:
129128
"172.20.0.12:7700"
130129
]
131130
},
132-
{
133-
"labels": {
134-
"job": "metaserver"
135-
},
136-
"targets": [
137-
"172.20.0.10:6800",
138-
"172.20.0.11:6800",
139-
"172.20.0.12:6800",
140-
]
141-
},
142131
{
143132
"labels": {
144133
"job": "client"
@@ -161,22 +150,6 @@ targets.json:
161150
]
162151
```
163152

164-
etcd_targets.json:
165-
```
166-
[
167-
{
168-
"labels": {
169-
"job": "etcd"
170-
},
171-
"targets": [
172-
"172.20.0.10:2379",
173-
"172.20.0.11:2379",
174-
"172.20.0.12:2379",
175-
]
176-
}
177-
]
178-
```
179-
180153
* 自动更新
181154

182155
自动更新需要依赖于dingo工具,确保已经安装好dingo工具和配置文件($HOME/.dingo/dingo.yaml),并在PATH配置好dingo安装路径。
@@ -192,7 +165,12 @@ dingo version
192165
```
193166
nohup python3 target_json.py &
194167
```
195-
target_json.py工具每隔30秒会通过dingo工具拉去集群信息,并更新targets.json文件,后续新增挂载点或者卸载挂载点,都将会自动更新。
168+
target_json.py工具默认每隔60秒会通过dingo工具拉去集群信息,并更新targets.json文件,后续新增挂载点或者卸载挂载点,都将会自动更新。
169+
170+
你也可以通过参数来制定时间间隔和次数:
171+
```
172+
python3 target_json.py --interval 10 --count 2
173+
```
196174

197175
# 7.监控系统的访问
198176

scripts/monitor/prometheus/data/etcd_targets.json

Lines changed: 0 additions & 9 deletions
This file was deleted.
Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,31 @@
1-
[
2-
{
3-
"labels": {
4-
"job": "mds"
5-
},
6-
"targets": [
7-
]
8-
},
9-
{
10-
"labels": {
11-
"job": "metaserver"
12-
},
13-
"targets": [
14-
]
15-
},
16-
{
17-
"labels": {
18-
"job": "client"
19-
},
20-
"targets": [
21-
]
22-
}
23-
]
1+
[
2+
{
3+
"labels": {
4+
"job": "mds"
5+
},
6+
"targets": [
7+
"10.220.32.16:6900",
8+
"10.220.32.17:6900",
9+
"10.220.32.18:6900"
10+
]
11+
},
12+
{
13+
"labels": {
14+
"job": "client"
15+
},
16+
"targets": [
17+
"10.220.32.40:30030",
18+
"10.220.32.40:30020"
19+
]
20+
},
21+
{
22+
"labels": {
23+
"job": "remotecache"
24+
},
25+
"targets": [
26+
"10.220.32.18:30020",
27+
"10.220.32.17:30020",
28+
"10.220.32.16:30020"
29+
]
30+
}
31+
]

scripts/monitor/prometheus/prometheus.yml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,3 @@ scrape_configs:
3131
metrics_path: 'brpc_metrics'
3232
file_sd_configs:
3333
- files: ['/prometheus/targets.json']
34-
35-
- job_name: 'etcd_metrics'
36-
metrics_path: 'metrics'
37-
file_sd_configs:
38-
- files: ['/prometheus/etcd_targets.json']
39-
40-
- job_name: 'node'
41-
# Override the global default and scrape targets from this job every 5 seconds.
42-
scrape_interval: 5s
43-
static_configs:
44-
- targets: []
45-
labels:
46-
group: 'metric1'

scripts/monitor/target.ini

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
[path]
2-
target_path=prometheus/data/targets.json
3-
etcd_target_path=prometheus/data/etcd_targets.json
2+
target_path=prometheus/data/targets.json

scripts/monitor/target_json.py

Lines changed: 60 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,19 @@
88
import configparser
99
import subprocess
1010
import re
11+
import argparse
1112

1213
DingoFS_TOOL = "dingo"
1314
HOSTNAME_PORT_REGEX = r"[^\"\ ]\S*:\d+"
1415
IP_PORT_REGEX = r"[0-9]+(?:\.[0-9]+){3}:\d+"
1516

1617
targetPath=None
17-
etcdTargetPath=None
1818

1919
def loadConf():
2020
global targetPath
21-
global etcdTargetPath
2221
conf=configparser.ConfigParser()
2322
conf.read("target.ini")
2423
targetPath=conf.get("path", "target_path")
25-
etcdTargetPath=conf.get("path", "etcd_target_path")
2624

2725
def runDingofsToolCommand(command):
2826
cmd = [DingoFS_TOOL]+command
@@ -41,63 +39,37 @@ def loadMdsServer():
4139
label = lablesValue(None, "mds")
4240
if ret == 0 :
4341
data = json.loads(output)
44-
for mdsInfo in data["result"]:
45-
# hostname:port:path
46-
dummyAddr = mdsInfo["dummyAddr"]
47-
mdsServers.append(dummyAddr)
42+
result = data["result"]
43+
for mdsInfo in result["mdses"]:
44+
location = mdsInfo["location"]
45+
mdsServers.append(ipPort2Addr(location["host"],location["port"]))
4846
return unitValue(label, mdsServers)
4947

50-
def loadMetaServer():
51-
ret, data = runDingofsToolCommand(["list", "topology","--format=json"])
52-
if ret == 0:
53-
jsonData = json.loads(data)
54-
jsonData = jsonData["result"]
55-
56-
metaservers = []
57-
if jsonData is not None:
58-
for pool in jsonData["poollist"]:
59-
for zone in pool["zoneList"]:
60-
for server in zone["serverList"]:
61-
for metaserver in server["metaserverList"]:
62-
metaservers.append(metaserver)
63-
targets = []
64-
labels = lablesValue(None, "metaserver")
65-
for server in metaservers:
66-
targets.append(ipPort2Addr(server["externalIp"], server["externalPort"]))
67-
targets = list(set(targets))
68-
return unitValue(labels, targets)
69-
70-
def loadEtcdServer():
71-
ret, output = runDingofsToolCommand(["status","etcd","--format=json"])
72-
etcdServers = []
73-
label = lablesValue(None, "etcd")
74-
if ret == 0 :
75-
data = json.loads(output)
76-
for etcdInfo in data["result"]:
77-
etcdAddr = etcdInfo["addr"]
78-
etcdServers.append(etcdAddr)
79-
return unitValue(label, etcdServers)
80-
8148
def loadClient():
8249
ret, output = runDingofsToolCommand(["list","mountpoint","--format=json"])
8350
clients = []
8451
label = lablesValue(None, "client")
8552
if ret == 0 :
8653
data = json.loads(output)
87-
for fsinfo in data["result"]:
88-
# hostname:port:path
89-
mountpoint = str(fsinfo["mountpoint"])
90-
muontListData=mountpoint.split(":")
91-
clients.append(muontListData[0] + ":" + muontListData[1])
54+
result = data["result"]
55+
for fsinfo in result["fsInfos"]:
56+
mountPoints = fsinfo.get("mountPoints")
57+
if mountPoints is None:
58+
continue
59+
for mountpoint in mountPoints:
60+
clients.append(ipPort2Addr(mountpoint["hostname"],mountpoint["port"]))
9261
return unitValue(label, clients)
9362

94-
def loadType(hostType):
95-
ret, output = runDingofsToolCommand(["status-%s"%hostType])
96-
targets = []
97-
if ret == 0:
98-
targets = re.findall(IP_PORT_REGEX, str(output))
99-
labels = lablesValue(None, hostType)
100-
return unitValue(labels, targets)
63+
def loadRemoteCacheServer():
64+
ret, output = runDingofsToolCommand(["list","cachemember","--format=json"])
65+
cacheServers = []
66+
label = lablesValue(None, "remotecache")
67+
if ret == 0 :
68+
data = json.loads(output)
69+
result = data["result"]
70+
for cacheMember in result["members"]:
71+
cacheServers.append(ipPort2Addr(cacheMember["ip"],cacheMember["port"]))
72+
return unitValue(label, cacheServers)
10173

10274
def ipPort2Addr(ip, port):
10375
return str(ip) + ":" + str(port)
@@ -119,22 +91,18 @@ def unitValue(lables, targets):
11991
return unit
12092

12193

122-
def refresh():
94+
def refresh(isShow=False):
12395
targets = []
124-
etcd_targets = []
12596

12697
# load mds
12798
mdsServers = loadMdsServer()
12899
targets.append(mdsServers)
129-
# load metaserver
130-
metaServers = loadMetaServer()
131-
targets.append(metaServers)
132100
# load client
133101
client = loadClient()
134102
targets.append(client)
135-
# load etcd
136-
etcdServers = loadEtcdServer()
137-
etcd_targets.append(etcdServers)
103+
# load cachemember
104+
cachemember = loadRemoteCacheServer()
105+
targets.append(cachemember)
138106

139107
with open(targetPath+'.new', 'w', 0o777) as fd:
140108
json.dump(targets, fd, indent=4)
@@ -144,17 +112,41 @@ def refresh():
144112
os.rename(targetPath+'.new', targetPath)
145113
os.chmod(targetPath, 0o777)
146114

147-
with open(etcdTargetPath+'.new', 'w', 0o777) as etcd_fd:
148-
json.dump(etcd_targets, etcd_fd, indent=4)
149-
etcd_fd.flush()
150-
os.fsync(etcd_fd.fileno())
151-
152-
os.rename(etcdTargetPath+'.new', etcdTargetPath)
153-
os.chmod(etcdTargetPath, 0o777)
115+
if isShow:
116+
print(json.dumps(targets, indent=4))
154117

155118
if __name__ == '__main__':
119+
parser = argparse.ArgumentParser(
120+
description='generate target for dingofs monitor',
121+
formatter_class=argparse.RawDescriptionHelpFormatter
122+
)
123+
parser.add_argument('--interval',
124+
type=int,
125+
default=60,
126+
help='execute internal(s), default 60s')
127+
128+
parser.add_argument('--count',
129+
type=int,
130+
help='execute count, default infinite, always execute')
131+
132+
parser.add_argument('--show',
133+
type=bool,
134+
default=False,
135+
help='show target info, default False')
136+
137+
args = parser.parse_args()
138+
139+
interval = args.interval
140+
count = args.count
141+
isShow = args.show
142+
143+
print("Realtime update target is running, ","interval:", interval, "count:", count, "show:", isShow)
144+
145+
current_count = 0
156146
while True:
147+
current_count += 1
157148
loadConf()
158-
refresh()
159-
# refresh every 30s
160-
time.sleep(30)
149+
refresh(isShow)
150+
if count is not None and current_count >= count:
151+
break
152+
time.sleep(interval)

0 commit comments

Comments
 (0)