Skip to content

Commit 7e1cf64

Browse files
Initial integration with slack notifications on remote failure (#344)
* Initial integration with slack notifications on remote failure
1 parent 1f95e02 commit 7e1cf64

File tree

5 files changed

+149
-11
lines changed

5 files changed

+149
-11
lines changed

poetry.lock

Lines changed: 42 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redisbench-admin"
3-
version = "0.8.1"
3+
version = "0.8.2"
44
description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
55
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
66
readme = "README.md"
@@ -44,6 +44,9 @@ scikit-learn = "^0.22.2"
4444
Jinja2 = "^3.0.3"
4545
watchdog = "^2.1.6"
4646
redis = "4.2.2"
47+
slack-sdk = "^3.15.2"
48+
slack-bolt = "^1.13.0"
49+
certifi = "^2021.10.8"
4750

4851
[tool.poetry.dev-dependencies]
4952
pytest = "^4.6"

redisbench_admin/run/common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
)
5353

5454
BENCHMARK_REPETITIONS = int(os.getenv("BENCHMARK_REPETITIONS", 1))
55+
# circleci related info
56+
CIRCLE_BUILD_URL = os.getenv("CIRCLE_BUILD_URL", None)
57+
CIRCLE_JOB = os.getenv("CIRCLE_JOB", None)
58+
WH_TOKEN = os.getenv("PERFORMANCE_WH_TOKEN", None)
5559
REDIS_BINARY = os.getenv("REDIS_BINARY", "redis-server")
5660

5761

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# BSD 3-Clause License
2+
#
3+
# Copyright (c) 2022., Redis Labs Modules
4+
# All rights reserved.
5+
#
6+
7+
import logging
8+
9+
10+
def generate_failure_notification(
11+
webhook_client,
12+
job_name,
13+
https_link,
14+
failure_reason,
15+
gh_org,
16+
gh_repo,
17+
branch=None,
18+
tag=None,
19+
):
20+
21+
headline_test = "{}/{} FAILED job {} due to {}".format(
22+
gh_org, gh_repo, job_name, failure_reason
23+
)
24+
extra_detail = ""
25+
if branch is not None:
26+
extra_detail = "This job was on a branch named {}\n".format(branch)
27+
if tag is not None:
28+
extra_detail = "This job was on a tag named {}\n".format(tag)
29+
blocks = [
30+
{
31+
"type": "section",
32+
"text": {
33+
"type": "mrkdwn",
34+
"text": "{}/{} job name {} failed due to *{}*.\n{}<{}|Check CI job details>\n".format(
35+
gh_org, gh_repo, job_name, failure_reason, extra_detail, https_link
36+
),
37+
},
38+
}
39+
]
40+
response = webhook_client.send(text=headline_test, blocks=blocks)
41+
if response.status_code != 200:
42+
logging.error(
43+
"Error while sending slack notification. Error message {}".response.body
44+
)

redisbench_admin/run_remote/run_remote.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
from redisbench_admin.run.common import (
2626
get_start_time_vars,
2727
BENCHMARK_REPETITIONS,
28+
CIRCLE_BUILD_URL,
29+
CIRCLE_JOB,
30+
WH_TOKEN,
2831
get_setup_type_and_primaries_count,
2932
common_properties_log,
3033
print_results_table_stdout,
@@ -41,6 +44,7 @@
4144
from redisbench_admin.run.ssh import ssh_pem_check
4245
from redisbench_admin.run_remote.args import TF_OVERRIDE_NAME
4346
from redisbench_admin.run_remote.consts import min_recommended_benchmark_duration
47+
from redisbench_admin.run_remote.notifications import generate_failure_notification
4448
from redisbench_admin.run_remote.remote_client import run_remote_client_tool
4549
from redisbench_admin.run_remote.remote_db import (
4650
remote_tmpdir_prune,
@@ -75,6 +79,7 @@
7579
EC2_REGION,
7680
)
7781

82+
from slack_sdk.webhook import WebhookClient
7883

7984
# 7 days expire
8085
STALL_INFO_DAYS = 7
@@ -116,6 +121,8 @@ def run_remote_command_logic(args, project_name, project_version):
116121
profilers_enabled = args.enable_profilers
117122
keep_env_and_topo = args.keep_env_and_topo
118123

124+
webhook_url = "https://hooks.slack.com/services/{}".format(WH_TOKEN)
125+
119126
if args.skip_env_vars_verify is False:
120127
check_ec2_env()
121128

@@ -144,14 +151,39 @@ def run_remote_command_logic(args, project_name, project_version):
144151
clusterconfig,
145152
) = prepare_benchmark_definitions(args)
146153

154+
ci_job_link = CIRCLE_BUILD_URL
155+
ci_job_name = CIRCLE_JOB
156+
failure_reason = ""
157+
webhook_notifications_active = False
158+
webhook_client_slack = None
159+
if ci_job_link is not None:
160+
logging.info(
161+
"Detected where in a CI flow named {}. Here's the reference link: {}".format(
162+
ci_job_name, ci_job_link
163+
)
164+
)
165+
webhook_notifications_active = True
166+
webhook_client_slack = WebhookClient(webhook_url)
167+
147168
return_code = 0
148169
if benchmark_defs_result is False:
149170
return_code = 1
150171
if args.fail_fast:
151-
logging.critical(
152-
"Detected errors while preparing benchmark definitions. Exiting right away!"
153-
)
154-
exit(1)
172+
failure_reason = "Detected errors while preparing benchmark definitions"
173+
logging.critical("{}. Exiting right away!".format(failure_reason))
174+
if webhook_notifications_active:
175+
generate_failure_notification(
176+
webhook_client_slack,
177+
ci_job_name,
178+
ci_job_link,
179+
failure_reason,
180+
tf_github_org,
181+
tf_github_repo,
182+
tf_github_branch,
183+
None,
184+
)
185+
186+
exit(return_code)
155187

156188
remote_envs = {}
157189
dirname = "."
@@ -872,10 +904,13 @@ def run_remote_command_logic(args, project_name, project_version):
872904
tsname_project_total_failures,
873905
)
874906
return_code |= 1
907+
failure_reason = "Some unexpected exception was caught during remote work on test named {}".format(
908+
test_name
909+
)
875910
logging.critical(
876-
"Some unexpected exception was caught "
877-
"during remote work. Failing test...."
911+
"{}. Failing test....".format(failure_reason)
878912
)
913+
879914
logging.critical(sys.exc_info()[0])
880915
print("-" * 60)
881916
traceback.print_exc(file=sys.stdout)
@@ -940,6 +975,20 @@ def run_remote_command_logic(args, project_name, project_version):
940975
EXPIRE_TIME_SECS_PROFILE_KEYS,
941976
profile_markdown_str,
942977
)
978+
979+
if return_code != 0 and webhook_notifications_active:
980+
if failure_reason == "":
981+
failure_reason = "Some unexpected exception was caught during remote work"
982+
generate_failure_notification(
983+
webhook_client_slack,
984+
ci_job_name,
985+
ci_job_link,
986+
failure_reason,
987+
tf_github_org,
988+
tf_github_repo,
989+
tf_github_branch,
990+
None,
991+
)
943992
exit(return_code)
944993

945994

0 commit comments

Comments
 (0)