Skip to content

Commit 469178d

Browse files
authored
Merge pull request #101 from ARGOeu/devel
Preparing for Release
2 parents f526422 + 277f734 commit 469178d

File tree

90 files changed

+9441
-1937
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+9441
-1937
lines changed

.travis.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ python:
88
script:
99
- pip install -r ./bin/requirements.txt
1010
- pytest
11-
- cd flink_jobs/ams_ingest_metric/ && mvn test
12-
- cd ../batch_ar && mvn test
13-
- cd ../batch_status && mvn test
14-
- cd ../stream_status && mvn test
15-
- cd ../ams_ingest_sync && mvn test
11+
- cd flink_jobs/ams_ingest_metric/ && travis_wait mvn test
12+
- cd ../batch_ar && travis_wait mvn test
13+
- cd ../batch_status && travis_wait mvn test
14+
- cd ../stream_status && travis_wait mvn test
15+
- cd ../ams_ingest_sync && travis_wait mvn test
1616

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ Job optional cli parameters:
5757

5858
`--ams.verify` : optional turn on/off ssl verify
5959

60+
### Restart strategy
61+
Job has a fixed delay restart strategy. If it fails it will try to restart for a maximum of 10 attempt with a retry interval of 2 minutes
62+
between each attempt
63+
6064
### Metric data hbase schema
6165

6266
Metric data are stored in hbase tables using different namespaces for different tenants (e.g. hbase table name = '{TENANT_name}:metric_data')
@@ -127,6 +131,9 @@ Job required cli parameters:
127131

128132
`--ams.verify` : optional turn on/off ssl verify
129133

134+
### Restart strategy
135+
Job has a fixed delay restart strategy. If it fails it will try to restart for a maximum of 10 attempt with a retry interval of 2 minutes
136+
between each attempt
130137

131138
### Stream Status
132139

@@ -210,6 +217,9 @@ Other optional cli parameters
210217

211218
`--ams.verify` : optional turn on/off ssl verify
212219

220+
### Restart strategy
221+
Job has a fixed delay restart strategy. If it fails it will try to restart for a maximum of 10 attempt with a retry interval of 2 minutes
222+
between each attempt
213223

214224

215225
### Status events schema
@@ -233,6 +243,25 @@ Status events are generated as JSON messages that are defined by the following c
233243
A metric data message can produce zero, one or more status metric events. The system analyzes the new status introduced by the metric and then aggregates on top levels to see if any other status changes are produced.
234244
If a status of an item actually changes an appropriate status event is produced based on the item type (endpoint_group,service,endpoint,metric).
235245

246+
## Threshold rule files
247+
Each report can be accompanied by a threshold rules file which includes rules on low level metric data which may accompany a monitoring message with the field 'actual_data'.
248+
The rule file is in JSON format and has the following schema:
249+
```
250+
{
251+
"rules": [
252+
{
253+
"group" : "site-101",
254+
"host" : "host.foo",
255+
"metric": "org.namespace.metric",
256+
"thresholds": "firstlabel=10s;30;50:60;0;100 secondlabel=5;0:10;20:30;50;30"
257+
}
258+
]
259+
}
260+
```
261+
Each rule has multiple thresholds separated by whitespace. Each threshold has the following format:
262+
`firstlabel=10s;30;50:60;0;100` which corresponds to `{{label}}={{value}}{{uom}};{{warning-range}};{{critical-range}};{{min}};{{max}}`. Each range is in the form of`{{floor}}:{{ceiling}}` but some shortcuts can be taken in declarations.
263+
264+
236265
## Batch Status
237266

238267
Flink batch job that calculates status results for a specific date
@@ -273,6 +302,8 @@ Job required cli parameters:
273302

274303
`--mongo.method` : MongoDB method to be used when storing the results ~ either: `insert` or `upsert`
275304

305+
`--thr` : (optional) file location of threshold rules
306+
276307

277308
## Batch AR
278309

@@ -318,6 +349,8 @@ Job required cli parameters:
318349

319350
`--mongo.method` : MongoDB method to be used when storing the results ~ either: `insert` or `upsert`
320351

352+
`--thr` : (optional) file location of threshold rules
353+
321354

322355
## Flink job names
323356
Running flink jobs can be listed either in flink dashboard by visiting `http://{{flink.webui.host}}:{{flink.webui.port}}`

bin/ar_job_submit.py

Lines changed: 102 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -5,88 +5,101 @@
55
import argparse
66
import datetime
77
from snakebite.client import Client
8-
import ConfigParser
98
import logging
109
from urlparse import urlparse
11-
from utils.argo_log import ArgoLogger
1210
from utils.argo_mongo import ArgoMongoClient
13-
from utils.common import cmd_toString, date_rollback, flink_job_submit, hdfs_check_path
11+
from utils.common import cmd_to_string, date_rollback, flink_job_submit, hdfs_check_path, get_log_conf, get_config_paths
1412
from utils.update_profiles import ArgoProfileManager
13+
from utils.argo_config import ArgoConfig
14+
from utils.recomputations import upload_recomputations
1515

16-
def compose_hdfs_commands(year, month, day, args, config, logger):
16+
17+
log = logging.getLogger(__name__)
18+
19+
20+
def compose_hdfs_commands(year, month, day, args, config):
1721

1822
# set up the hdfs client to be used in order to check the files
19-
client = Client(config.get("HDFS", "hdfs_host"), config.getint("HDFS", "hdfs_port"), use_trash=False)
23+
namenode = config.get("HDFS", "namenode")
24+
client = Client(namenode.hostname, namenode.port, use_trash=False)
2025

2126
# hdfs sync path for the tenant
22-
hdfs_sync = config.get("HDFS", "hdfs_sync")
23-
hdfs_sync = hdfs_sync.replace("{{hdfs_host}}", config.get("HDFS", "hdfs_host"))
24-
hdfs_sync = hdfs_sync.replace("{{hdfs_port}}", config.get("HDFS", "hdfs_port"))
25-
hdfs_sync = hdfs_sync.replace("{{hdfs_user}}", config.get("HDFS", "hdfs_user"))
26-
hdfs_sync = hdfs_sync.replace("{{tenant}}", args.Tenant)
27-
28-
# hdfs metric path for the tenant
29-
hdfs_metric = config.get("HDFS", "hdfs_metric")
30-
hdfs_metric = hdfs_metric.replace("{{hdfs_host}}", config.get("HDFS", "hdfs_host"))
31-
hdfs_metric = hdfs_metric.replace("{{hdfs_port}}", config.get("HDFS", "hdfs_port"))
32-
hdfs_metric = hdfs_metric.replace("{{hdfs_user}}", config.get("HDFS", "hdfs_user"))
33-
hdfs_metric = hdfs_metric.replace("{{tenant}}", args.Tenant)
27+
28+
hdfs_user = config.get("HDFS", "user")
29+
tenant = args.tenant
30+
hdfs_sync = config.get("HDFS", "path_sync")
31+
hdfs_sync = hdfs_sync.fill(namenode=namenode.geturl(), hdfs_user=hdfs_user, tenant=tenant).geturl()
32+
33+
hdfs_metric = config.get("HDFS", "path_metric")
34+
35+
hdfs_metric = hdfs_metric.fill(namenode=namenode.geturl(), hdfs_user=hdfs_user, tenant=tenant).geturl()
3436

3537
# dictionary holding all the commands with their respective arguments' name
36-
hdfs_commands = {}
38+
hdfs_commands = dict()
3739

3840
# file location of previous day's metric data (local or hdfs)
39-
hdfs_commands["--pdata"] = hdfs_check_path(hdfs_metric+"/"+str(datetime.date(year, month, day) - datetime.timedelta(1)), logger, client)
41+
hdfs_commands["--pdata"] = hdfs_check_path(
42+
hdfs_metric + "/" + str(datetime.date(year, month, day) - datetime.timedelta(1)), client)
4043

4144
# file location of target day's metric data (local or hdfs)
42-
hdfs_commands["--mdata"] = hdfs_check_path(hdfs_metric+"/"+args.Date, logger, client)
45+
hdfs_commands["--mdata"] = hdfs_check_path(hdfs_metric + "/" + args.date, client)
4346

4447
# file location of report configuration json file (local or hdfs)
45-
hdfs_commands["--conf"] = hdfs_check_path(hdfs_sync+"/"+args.Tenant+"_"+args.Report+"_cfg.json", logger, client)
48+
hdfs_commands["--conf"] = hdfs_check_path(hdfs_sync + "/" + args.tenant+"_"+args.report+"_cfg.json", client)
4649

4750
# file location of metric profile (local or hdfs)
48-
hdfs_commands["--mps"] = date_rollback(hdfs_sync+"/"+args.Report+"/"+"metric_profile_"+"{{date}}"+".avro", year, month, day, config, logger, client)
51+
hdfs_commands["--mps"] = date_rollback(
52+
hdfs_sync + "/" + args.report + "/" + "metric_profile_" + "{{date}}" + ".avro", year, month, day, config,
53+
client)
4954

5055
# file location of operations profile (local or hdfs)
51-
hdfs_commands["--ops"] = hdfs_check_path(hdfs_sync+"/"+args.Tenant+"_ops.json", logger, client)
56+
hdfs_commands["--ops"] = hdfs_check_path(hdfs_sync+"/"+args.tenant+"_ops.json", client)
5257

5358
# file location of aggregations profile (local or hdfs)
54-
hdfs_commands["--apr"] = hdfs_check_path(hdfs_sync+"/"+args.Tenant+"_"+args.Report+"_ap.json", logger, client)
59+
hdfs_commands["--apr"] = hdfs_check_path(hdfs_sync+"/"+args.tenant+"_"+args.report+"_ap.json", client)
60+
61+
if args.thresholds:
62+
# file location of thresholds rules file (local or hdfs)
63+
hdfs_commands["--thr"] = hdfs_check_path(
64+
os.path.join(hdfs_sync, "".join([args.tenant, "_", args.report, "_thresholds.json"])), client)
5565

5666
# file location of endpoint group topology file (local or hdfs)
57-
hdfs_commands["-egp"] = date_rollback(hdfs_sync+"/"+args.Report+"/"+"group_endpoints_"+"{{date}}"+".avro", year, month, day, config, logger, client)
67+
hdfs_commands["-egp"] = date_rollback(
68+
hdfs_sync + "/" + args.report + "/" + "group_endpoints_" + "{{date}}" + ".avro", year, month, day, config,
69+
client)
5870

5971
# file location of group of groups topology file (local or hdfs)
60-
hdfs_commands["-ggp"] = date_rollback(hdfs_sync+"/"+args.Report+"/"+"group_groups_"+"{{date}}"+".avro", year, month, day, config, logger, client)
72+
hdfs_commands["-ggp"] = date_rollback(hdfs_sync + "/" + args.report + "/" + "group_groups_" + "{{date}}" + ".avro",
73+
year, month, day, config, client)
6174

6275
# file location of weights file (local or hdfs)
63-
hdfs_commands["--weights"] = date_rollback(hdfs_sync+"/"+args.Report+"/weights_"+"{{date}}"+".avro", year, month, day, config, logger, client)
76+
hdfs_commands["--weights"] = date_rollback(hdfs_sync + "/" + args.report + "/weights_" + "{{date}}" + ".avro", year,
77+
month, day, config, client)
6478

6579
# file location of downtimes file (local or hdfs)
66-
hdfs_commands["--downtimes"] = hdfs_check_path(hdfs_sync+"/"+args.Report+"/downtimes_"+str(datetime.date(year, month, day))+".avro", logger, client)
80+
hdfs_commands["--downtimes"] = hdfs_check_path(
81+
hdfs_sync + "/" + args.report + "/downtimes_" + str(datetime.date(year, month, day)) + ".avro", client)
6782

6883
# file location of recomputations file (local or hdfs)
6984
# first check if there is a recomputations file for the given date
70-
if client.test(urlparse(hdfs_sync+"/recomp_"+args.Date+".json").path, exists=True):
71-
hdfs_commands["--rec"] = hdfs_sync+"/recomp_"+args.Date+".json"
85+
# recomputation lies in the hdfs in the form of
86+
# /sync/recomp_TENANTNAME_ReportName_2018-08-02.json
87+
if client.test(urlparse(hdfs_sync+"/recomp_"+args.tenant+"_"+args.report+"_"+args.date+".json").path, exists=True):
88+
hdfs_commands["--rec"] = hdfs_sync+"/recomp_"+args.date+".json"
7289
else:
73-
hdfs_commands["--rec"] = hdfs_check_path(hdfs_sync+"/recomp.json", logger, client)
90+
hdfs_commands["--rec"] = hdfs_check_path(hdfs_sync+"/recomp.json", client)
7491

7592
return hdfs_commands
7693

7794

78-
def compose_command(config, args, hdfs_commands, logger=None):
95+
def compose_command(config, args, hdfs_commands):
7996

80-
# job sumbission command
97+
# job submission command
8198
cmd_command = []
8299

83-
if args.Sudo is True:
100+
if args.sudo is True:
84101
cmd_command.append("sudo")
85102

86-
# create a simple stream_handler whenever tetsing
87-
if logger is None:
88-
logger = ArgoLogger()
89-
90103
# flink executable
91104
cmd_command.append(config.get("FLINK", "path"))
92105

@@ -102,105 +115,105 @@ def compose_command(config, args, hdfs_commands, logger=None):
102115

103116
# date the report will run for
104117
cmd_command.append("--run.date")
105-
cmd_command.append(args.Date)
118+
cmd_command.append(args.date)
106119

107120
# MongoDB uri for outputting the results to (e.g. mongodb://localhost:21017/example_db)
108121
cmd_command.append("--mongo.uri")
109-
mongo_tenant = "TENANTS:"+args.Tenant+":MONGO"
110-
mongo_uri = config.get(mongo_tenant, "mongo_uri")
111-
mongo_uri = mongo_uri.replace("{{mongo_host}}", config.get(mongo_tenant, "mongo_host"))
112-
mongo_uri = mongo_uri.replace("{{mongo_port}}", config.get(mongo_tenant, "mongo_port"))
113-
cmd_command.append(mongo_uri)
114-
115-
if args.Method == "insert":
116-
argo_mongo_client = ArgoMongoClient(args, config, logger, ["service_ar", "endpoint_group_ar"])
122+
group_tenant = "TENANTS:"+args.tenant
123+
mongo_endpoint = config.get("MONGO","endpoint").geturl()
124+
mongo_uri = config.get(group_tenant, "mongo_uri").fill(mongo_endpoint=mongo_endpoint, tenant=args.tenant)
125+
cmd_command.append(mongo_uri.geturl())
126+
127+
if args.method == "insert":
128+
argo_mongo_client = ArgoMongoClient(args, config, ["service_ar", "endpoint_group_ar"])
117129
argo_mongo_client.mongo_clean_ar(mongo_uri)
118130

119131
# MongoDB method to be used when storing the results, either insert or upsert
120132
cmd_command.append("--mongo.method")
121-
cmd_command.append(args.Method)
133+
cmd_command.append(args.method)
122134

123135
# add the hdfs commands
124136
for command in hdfs_commands:
125137
cmd_command.append(command)
126138
cmd_command.append(hdfs_commands[command])
127139

128-
# ams proxy
129-
if config.getboolean("AMS", "proxy_enabled"):
140+
# get optional ams proxy
141+
proxy = config.get("AMS", "proxy")
142+
if proxy is not None:
130143
cmd_command.append("--ams.proxy")
131-
cmd_command.append(config.get("AMS", "ams_proxy"))
144+
cmd_command.append(proxy.geturl())
132145

133146
# ssl verify
134147
cmd_command.append("--ams.verify")
135-
if config.getboolean("AMS", "ssl_enabled"):
136-
cmd_command.append("true")
148+
ams_verify = config.get("AMS", "verify")
149+
if ams_verify is not None:
150+
cmd_command.append(str(ams_verify).lower())
137151
else:
138-
cmd_command.append("false")
152+
# by default assume ams verify is always true
153+
cmd_command.append("true")
139154

140155
return cmd_command
141156

142157

143158
def main(args=None):
144159

145-
# make sure the argument are in the correct form
146-
args.Tenant = args.Tenant.upper()
147-
args.Method = args.Method.lower()
148-
149-
year, month, day = [int(x) for x in args.Date.split("-")]
160+
# Get configuration paths
161+
conf_paths = get_config_paths(args.config)
150162

151-
# set up the config parser
152-
config = ConfigParser.ConfigParser()
163+
# Get logger config file
164+
get_log_conf(conf_paths['log'])
153165

154-
# check if config file has been given as cli argument else
155-
# check if config file resides in /etc/argo-streaming/ folder else
156-
# check if config file resides in local folder
157-
if args.ConfigPath is None:
158-
if os.path.isfile("/etc/argo-streaming/conf/conf.cfg"):
159-
config.read("/etc/argo-streaming/conf/conf.cfg")
160-
else:
161-
config.read("../conf/conf.cfg")
162-
else:
163-
config.read(args.ConfigPath)
166+
# Get main configuration and schema
167+
config = ArgoConfig(conf_paths["main"], conf_paths["schema"])
164168

165-
# set up the logger
166-
logger = ArgoLogger(log_name="batch-ar", config=config)
169+
year, month, day = [int(x) for x in args.date.split("-")]
167170

168171
# check if configuration for the given tenant exists
169-
if not config.has_section("TENANTS:"+args.Tenant):
170-
logger.print_and_log(logging.CRITICAL, "Tenant: "+args.Tenant+" doesn't exist.", 1)
172+
if not config.has("TENANTS:"+args.tenant):
173+
log.info("Tenant: "+args.tenant+" doesn't exist.")
174+
sys.exit(1)
171175

172-
# call update profiles
173-
profile_mgr = ArgoProfileManager(args.ConfigPath)
174-
profile_mgr.profile_update_check(args.Tenant, args.Report)
176+
# check and upload recomputations
177+
upload_recomputations(args.tenant, args.report, args.date, config)
175178

179+
# optional call to update profiles
180+
if args.profile_check:
181+
profile_mgr = ArgoProfileManager(config)
182+
profile_type_checklist = ["operations", "aggregations", "reports", "thresholds"]
183+
for profile_type in profile_type_checklist:
184+
profile_mgr.profile_update_check(args.tenant, args.report, profile_type)
176185

177186
# dictionary containing the argument's name and the command assosciated with each name
178-
hdfs_commands = compose_hdfs_commands(year, month, day, args, config, logger)
187+
hdfs_commands = compose_hdfs_commands(year, month, day, args, config)
179188

180-
cmd_command = compose_command(config, args, hdfs_commands, logger)
189+
cmd_command = compose_command(config, args, hdfs_commands)
181190

182-
logger.print_and_log(logging.INFO, "Getting ready to submit job")
183-
logger.print_and_log(logging.INFO, cmd_toString(cmd_command)+"\n")
191+
log.info("Getting ready to submit job")
192+
log.info(cmd_to_string(cmd_command)+"\n")
184193

185194
# submit the script's command
186-
flink_job_submit(config, logger, cmd_command)
195+
flink_job_submit(config, cmd_command)
187196

188197

189198
if __name__ == "__main__":
190199

191200
parser = argparse.ArgumentParser(description="Batch A/R Job submit script")
192201
parser.add_argument(
193-
"-t", "--Tenant", type=str, help="Name of the tenant", required=True)
202+
"-t", "--tenant", metavar="STRING", help="Name of the tenant", required=True, dest="tenant")
194203
parser.add_argument(
195-
"-r", "--Report", type=str, help="Report status", required=True)
204+
"-r", "--report", metavar="STRING", help="Report status", required=True, dest="report")
196205
parser.add_argument(
197-
"-d", "--Date", type=str, help="Date to run the job for", required=True)
206+
"-d", "--date", metavar="DATE(YYYY-MM-DD)", help="Date to run the job for", required=True, dest="date")
198207
parser.add_argument(
199-
"-m", "--Method", type=str, help="Insert or Upsert data in mongoDB", required=True)
208+
"-m", "--method", metavar="KEYWORD(insert|upsert)", help="Insert or Upsert data in mongoDB", required=True, dest="method")
200209
parser.add_argument(
201-
"-c", "--ConfigPath", type=str, help="Path for the config file")
210+
"-c", "--config", metavar="PATH", help="Path for the config file", dest="config")
202211
parser.add_argument(
203-
"-u", "--Sudo", help="Run the submition as superuser", action="store_true")
212+
"-u", "--sudo", help="Run the submition as superuser", action="store_true")
213+
parser.add_argument("--profile-check", help="check if profiles are up to date before running job",
214+
dest="profile_check", action="store_true")
215+
parser.add_argument("--thresholds", help="check and use threshold rule file if exists",
216+
dest="thresholds", action="store_true")
204217

205218
# Pass the arguments to main method
206219
sys.exit(main(parser.parse_args()))

0 commit comments

Comments
 (0)