Skip to content

Commit 2d33fc6

Browse files
authored
A few improvements to bazel ci scripts (#2076)
`aggregate_incompatible_flags_test_result.py`: - Stripped out timestamps added by BuildKite in job log. - Added support for collecting incompatible flag test result for https://buildkite.com/bazel/bcr-bazel-compatibility-test `bazelci.py`: - Added support for overriding Bazel version in task config before task expansion. - Support `concurrency` and `concurrency_group` to limit CI resource usage. - Avoid hitting 429 Too Many Requests error while fetching a large number of buildkite job logs.
1 parent 7fe019f commit 2d33fc6

File tree

2 files changed

+137
-57
lines changed

2 files changed

+137
-57
lines changed

buildkite/aggregate_incompatible_flags_test_result.py

Lines changed: 85 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,26 @@
3131

3232
FLAG_LINE_PATTERN = re.compile(r"\s*(?P<flag>--\S+)\s*")
3333

34+
MODULE_VERSION_PATTERN = re.compile(r'(?P<module_version>[a-z](?:[a-z0-9._-]*[a-z0-9])?@[^\s]+)')
35+
36+
BAZEL_TEAM_OWNED_MODULES = frozenset([
37+
"bazel-skylib",
38+
"rules_android",
39+
"rules_android_ndk",
40+
"rules_cc",
41+
"rules_java",
42+
"rules_license",
43+
"rules_pkg",
44+
"rules_platform",
45+
"rules_shell",
46+
"rules_testing",
47+
])
48+
49+
PROJECT = "module" if PIPELINE == "bcr-bazel-compatibility-test" else "project"
50+
51+
MAX_LOG_FETCHER_THREADS = 30
52+
LOG_FETCHER_SEMAPHORE = threading.Semaphore(MAX_LOG_FETCHER_THREADS)
53+
3454
class LogFetcher(threading.Thread):
3555
def __init__(self, job, client):
3656
threading.Thread.__init__(self)
@@ -39,7 +59,8 @@ def __init__(self, job, client):
3959
self.log = None
4060

4161
def run(self):
42-
self.log = self.client.get_build_log(self.job)
62+
with LOG_FETCHER_SEMAPHORE:
63+
self.log = self.client.get_build_log(self.job)
4364

4465

4566
def process_build_log(failed_jobs_per_flag, already_failing_jobs, log, job):
@@ -59,6 +80,10 @@ def handle_failing_flags(line):
5980
if index_success == -1 or index_failure == -1:
6081
raise bazelci.BuildkiteException("Cannot recognize log of " + job["web_url"])
6182
for line in log[index_failure:].split("\n"):
83+
# Strip out BuildKite timestamp prefix
84+
line = re.sub(r'\x1b.*?\x07', '', line.strip())
85+
if not line:
86+
break
6287
handle_failing_flags(line)
6388
log = log[0 : log.rfind("+++ Result")]
6489

@@ -67,6 +92,12 @@ def handle_failing_flags(line):
6792
already_failing_jobs.append(job)
6893

6994

95+
def extract_module_version(line):
96+
match = MODULE_VERSION_PATTERN.search(line)
97+
if match:
98+
return match.group("module_version")
99+
100+
70101
def extract_flag(line):
71102
match = FLAG_LINE_PATTERN.match(line)
72103
if match:
@@ -77,19 +108,28 @@ def get_html_link_text(content, link):
77108
return f'<a href="{link}" target="_blank">{content}</a>'
78109

79110

111+
def is_project_owned_by_bazel_team(project):
112+
if bazelci.is_downstream_pipeline() and project in bazelci.DOWNSTREAM_PROJECTS and bazelci.DOWNSTREAM_PROJECTS[project].get(
113+
"owned_by_bazel"
114+
):
115+
# Check the downstream projects definition.
116+
return True
117+
elif project.split("@")[0] in BAZEL_TEAM_OWNED_MODULES:
118+
# Parse the module name and check if it's bazel team owned.
119+
return True
120+
return False
121+
80122
# Check if any of the given jobs needs to be migrated by the Bazel team
81123
def needs_bazel_team_migrate(jobs):
82124
for job in jobs:
83-
pipeline, _ = get_pipeline_and_platform(job)
84-
if pipeline in bazelci.DOWNSTREAM_PROJECTS and bazelci.DOWNSTREAM_PROJECTS[pipeline].get(
85-
"owned_by_bazel"
86-
):
125+
project = get_project_name(job)
126+
if is_project_owned_by_bazel_team(project):
87127
return True
88128
return False
89129

90130

91131
def print_flags_ready_to_flip(failed_jobs_per_flag, incompatible_flags):
92-
info_text1 = ["#### The following flags didn't break any passing projects"]
132+
info_text1 = [f"#### The following flags didn't break any passing {PROJECT}s"]
93133
for flag in sorted(list(incompatible_flags.keys())):
94134
if flag not in failed_jobs_per_flag:
95135
html_link_text = get_html_link_text(":github:", incompatible_flags[flag])
@@ -99,7 +139,7 @@ def print_flags_ready_to_flip(failed_jobs_per_flag, incompatible_flags):
99139
info_text1 = []
100140

101141
info_text2 = [
102-
"#### The following flags didn't break any passing Bazel team owned/co-owned projects"
142+
f"#### The following flags didn't break any passing Bazel team owned/co-owned {PROJECT}s"
103143
]
104144
for flag, jobs in failed_jobs_per_flag.items():
105145
if flag not in incompatible_flags:
@@ -128,7 +168,7 @@ def print_already_fail_jobs(already_failing_jobs):
128168

129169

130170
def print_projects_need_to_migrate(failed_jobs_per_flag):
131-
info_text = ["#### The following projects need migration"]
171+
info_text = [f"#### The following {PROJECT}s need migration"]
132172
jobs_need_migration = {}
133173
for jobs in failed_jobs_per_flag.values():
134174
for job in jobs.values():
@@ -141,14 +181,14 @@ def print_projects_need_to_migrate(failed_jobs_per_flag):
141181

142182
projects = set()
143183
for job in job_list:
144-
project, _ = get_pipeline_and_platform(job)
184+
project = get_project_name(job)
145185
projects.add(project)
146186
project_num = len(projects)
147187

148188
s1 = "" if project_num == 1 else "s"
149189
s2 = "s" if project_num == 1 else ""
150190
info_text.append(
151-
f"<details><summary>{project_num} project{s1} need{s2} migration, click to see details</summary><ul>"
191+
f"<details><summary>{project_num} {PROJECT}{s1} need{s2} migration, click to see details</summary><ul>"
152192
)
153193

154194
entries = merge_and_format_jobs(job_list, " <li><strong>{}</strong>: {}</li>")
@@ -179,62 +219,68 @@ def print_flags_need_to_migrate(failed_jobs_per_flag, incompatible_flags):
179219
if jobs:
180220
github_url = incompatible_flags[flag]
181221
info_text = [f"* **{flag}** " + get_html_link_text(":github:", github_url)]
182-
jobs_per_pipeline = merge_jobs(jobs.values())
183-
for pipeline, platforms in jobs_per_pipeline.items():
222+
jobs_per_project = merge_jobs(jobs.values())
223+
for project, platforms in jobs_per_project.items():
184224
bazel_mark = ""
185-
if pipeline in bazelci.DOWNSTREAM_PROJECTS and bazelci.DOWNSTREAM_PROJECTS[
186-
pipeline
187-
].get("owned_by_bazel"):
225+
if is_project_owned_by_bazel_team(project):
188226
bazel_mark = ":bazel:"
189227
platforms_text = ", ".join(platforms)
190-
info_text.append(f" - {bazel_mark}**{pipeline}**: {platforms_text}")
228+
info_text.append(f" - {bazel_mark}**{project}**: {platforms_text}")
191229
# Use flag as the context so that each flag gets a different info box.
192230
print_info(flag, "error", info_text)
193231
printed_flag_boxes = True
194232
if not printed_flag_boxes:
195233
return
196234
info_text = [
197-
"#### Downstream projects need to migrate for the following flags:",
235+
"#### Projects need to migrate for the following flags:",
198236
"Projects marked with :bazel: need to be migrated by the Bazel team.",
199237
]
200238
print_info("flags_need_to_migrate", "error", info_text)
201239

202240

203241
def merge_jobs(jobs):
204-
jobs_per_pipeline = collections.defaultdict(list)
242+
jobs_per_project = collections.defaultdict(list)
205243
for job in sorted(jobs, key=lambda s: s["name"].lower()):
206-
pipeline, platform = get_pipeline_and_platform(job)
207-
jobs_per_pipeline[pipeline].append(get_html_link_text(platform, job["web_url"]))
208-
return jobs_per_pipeline
244+
project = get_project_name(job)
245+
platform_label = get_platform_emoji_name(job)
246+
jobs_per_project[project].append(get_html_link_text(platform_label, job["web_url"]))
247+
return jobs_per_project
209248

210249

211250
def merge_and_format_jobs(jobs, line_pattern):
212-
# Merges all jobs for a single pipeline into one line.
251+
# Merges all jobs for a single project into one line.
213252
# Example:
214-
# pipeline (platform1)
215-
# pipeline (platform2)
216-
# pipeline (platform3)
253+
# project (platform1)
254+
# project (platform2)
255+
# project (platform3)
217256
# with line_pattern ">> {}: {}" becomes
218-
# >> pipeline: platform1, platform2, platform3
219-
jobs_per_pipeline = merge_jobs(jobs)
257+
# >> project: platform1, platform2, platform3
258+
jobs_per_project = merge_jobs(jobs)
220259
return [
221-
line_pattern.format(pipeline, ", ".join(platforms))
222-
for pipeline, platforms in jobs_per_pipeline.items()
260+
line_pattern.format(project, ", ".join(platforms))
261+
for project, platforms in jobs_per_project.items()
223262
]
224263

225264

226-
def get_pipeline_and_platform(job):
265+
def get_project_name(job):
266+
# Strip out platform label from job name
267+
name = job["name"].replace(get_platform_emoji_name(job), "")
268+
if bazelci.is_downstream_pipeline():
269+
# This is for downstream pipeline, parse the pipeline name
270+
return name.partition("-")[0].partition("(")[0].strip()
271+
else:
272+
# This is for BCR compatibility test pipeline, parse the module name + version
273+
return extract_module_version(name)
274+
275+
276+
def get_platform_emoji_name(job):
277+
# By search for the platform label in the job name.
227278
name = job["name"]
228-
platform = ""
229279
for p in bazelci.PLATFORMS.values():
230280
platform_label = p.get("emoji-name")
231281
if platform_label in name:
232-
platform = platform_label
233-
name = name.replace(platform_label, "")
234-
break
235-
236-
name = name.partition("-")[0].partition("(")[0].strip()
237-
return name, platform
282+
return platform_label
283+
raise bazelci.BuildkiteException("Cannot detect platform name for: " + job["web_url"])
238284

239285

240286
def print_info(context, style, info):
@@ -264,8 +310,8 @@ def analyze_logs(build_number, client):
264310

265311
threads = []
266312
for job in build_info["jobs"]:
267-
# Some irrelevant job has no "state" field
268-
if "state" in job:
313+
# Some irrelevant job has no "state" or "raw_log_url" field
314+
if "state" in job and "raw_log_url" in job:
269315
thread = LogFetcher(job, client)
270316
threads.append(thread)
271317
thread.start()

buildkite/bazelci.py

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -699,16 +699,28 @@ def _get_buildkite_token(self):
699699
project=("bazel-public" if THIS_IS_TRUSTED else "bazel-untrusted"),
700700
)
701701

702-
def _open_url(self, url, params=[]):
703-
try:
704-
params_str = "".join("&{}={}".format(k, v) for k, v in params)
705-
return (
706-
urllib.request.urlopen("{}?access_token={}{}".format(url, self._token, params_str))
707-
.read()
708-
.decode("utf-8", "ignore")
709-
)
710-
except urllib.error.HTTPError as ex:
711-
raise BuildkiteException("Failed to open {}: {} - {}".format(url, ex.code, ex.reason))
702+
def _open_url(self, url, params=[], retries=5):
703+
params_str = "".join("&{}={}".format(k, v) for k, v in params)
704+
full_url = "{}?access_token={}{}".format(url, self._token, params_str)
705+
706+
for attempt in range(retries):
707+
try:
708+
response = urllib.request.urlopen(full_url)
709+
return response.read().decode("utf-8", "ignore")
710+
except urllib.error.HTTPError as ex:
711+
# Handle specific error codes
712+
if ex.code == 429: # Too Many Requests
713+
retry_after = ex.headers.get("RateLimit-Reset")
714+
if retry_after:
715+
wait_time = int(retry_after)
716+
else:
717+
wait_time = (2 ** attempt) # Exponential backoff if no RateLimit-Reset header
718+
719+
time.sleep(wait_time)
720+
else:
721+
raise BuildkiteException("Failed to open {}: {} - {}".format(url, ex.code, ex.reason))
722+
723+
raise BuildkiteException(f"Failed to open {url} after {retries} retries.")
712724

713725
def get_pipeline_info(self):
714726
"""Get details for a pipeline given its organization slug
@@ -984,7 +996,7 @@ def get_expanded_task(task, combination):
984996
return expanded_task
985997

986998

987-
def fetch_configs(http_url, file_config):
999+
def fetch_configs(http_url, file_config, bazel_version=None):
9881000
"""
9891001
If specified fetches the build configuration from file_config or http_url, else tries to
9901002
read it from .bazelci/presubmit.yml.
@@ -993,7 +1005,7 @@ def fetch_configs(http_url, file_config):
9931005
if file_config is not None and http_url is not None:
9941006
raise BuildkiteException("file_config and http_url cannot be set at the same time")
9951007

996-
return load_config(http_url, file_config)
1008+
return load_config(http_url, file_config, bazel_version=bazel_version)
9971009

9981010

9991011
def expand_task_config(config):
@@ -1023,7 +1035,15 @@ def expand_task_config(config):
10231035
config["tasks"].update(expanded_tasks)
10241036

10251037

1026-
def load_config(http_url, file_config, allow_imports=True):
1038+
def maybe_overwrite_bazel_version(bazel_version, config):
1039+
if not bazel_version:
1040+
return
1041+
for task in config.get("tasks", {}):
1042+
config["tasks"][task]["old_bazel"] = config["tasks"][task].get("bazel")
1043+
config["tasks"][task]["bazel"] = bazel_version
1044+
1045+
1046+
def load_config(http_url, file_config, allow_imports=True, bazel_version=None):
10271047
if http_url:
10281048
config = load_remote_yaml_file(http_url)
10291049
else:
@@ -1041,6 +1061,7 @@ def load_config(http_url, file_config, allow_imports=True):
10411061
if "tasks" not in config:
10421062
config["tasks"] = {}
10431063

1064+
maybe_overwrite_bazel_version(bazel_version, config)
10441065
expand_task_config(config)
10451066

10461067
imports = config.pop("imports", None)
@@ -1049,7 +1070,7 @@ def load_config(http_url, file_config, allow_imports=True):
10491070
raise BuildkiteException("Nested imports are not allowed")
10501071

10511072
for i in imports:
1052-
imported_tasks = load_imported_tasks(i, http_url, file_config)
1073+
imported_tasks = load_imported_tasks(i, http_url, file_config, bazel_version)
10531074
config["tasks"].update(imported_tasks)
10541075

10551076
if len(config["tasks"]) > MAX_TASK_NUMBER:
@@ -1066,7 +1087,7 @@ def load_remote_yaml_file(http_url):
10661087
return yaml.safe_load(reader(resp))
10671088

10681089

1069-
def load_imported_tasks(import_name, http_url, file_config):
1090+
def load_imported_tasks(import_name, http_url, file_config, bazel_version):
10701091
if "/" in import_name:
10711092
raise BuildkiteException("Invalid import '%s'" % import_name)
10721093

@@ -1077,7 +1098,7 @@ def load_imported_tasks(import_name, http_url, file_config):
10771098
else:
10781099
file_config = new_path
10791100

1080-
imported_config = load_config(http_url=http_url, file_config=file_config, allow_imports=False)
1101+
imported_config = load_config(http_url=http_url, file_config=file_config, allow_imports=False, bazel_version=bazel_version)
10811102

10821103
namespace = import_name.partition(".")[0]
10831104
tasks = {}
@@ -2777,7 +2798,7 @@ def terminate_background_process(process):
27772798
process.kill()
27782799

27792800

2780-
def create_step(label, commands, platform, shards=1, soft_fail=None):
2801+
def create_step(label, commands, platform, shards=1, soft_fail=None, concurrency=None, concurrency_group=None):
27812802
if "docker-image" in PLATFORMS[platform]:
27822803
step = create_docker_step(
27832804
label,
@@ -2823,6 +2844,10 @@ def create_step(label, commands, platform, shards=1, soft_fail=None):
28232844
step["retry"]["automatic"].append({"exit_status": 128, "limit": 1})
28242845
step["retry"]["automatic"].append({"exit_status": 1, "limit": 1})
28252846

2847+
if concurrency and concurrency_group:
2848+
step["concurrency"] = concurrency
2849+
step["concurrency_group"] = concurrency_group
2850+
28262851
return step
28272852

28282853

@@ -4455,6 +4480,7 @@ def main(argv=None):
44554480
runner.add_argument("--task", action="store", type=str, default="")
44564481
runner.add_argument("--file_config", type=str)
44574482
runner.add_argument("--http_config", type=str)
4483+
runner.add_argument("--overwrite_bazel_version", type=str, help="Overwrite the bazel version in the config file.")
44584484
runner.add_argument("--git_repository", type=str)
44594485
runner.add_argument(
44604486
"--git_commit", type=str, help="Reset the git repository to this commit after cloning it"
@@ -4533,7 +4559,9 @@ def main(argv=None):
45334559
elif args.git_repository:
45344560
clone_git_repository(args.git_repository, args.git_commit)
45354561

4536-
configs = fetch_configs(args.http_config, args.file_config)
4562+
# Maybe overwrite the bazel version for each task, we have to do it before the config expansion.
4563+
bazel_version = args.overwrite_bazel_version
4564+
configs = fetch_configs(args.http_config, args.file_config, bazel_version)
45374565
tasks = configs.get("tasks", {})
45384566
task_config = tasks.get(args.task)
45394567
if not task_config:
@@ -4553,6 +4581,12 @@ def main(argv=None):
45534581
if "BUILDKITE_MESSAGE" in os.environ:
45544582
os.environ["BUILDKITE_MESSAGE"] = os.environ["BUILDKITE_MESSAGE"][:1000]
45554583

4584+
# Give user a warning that the bazel version in the config file has been overridden.
4585+
old_bazel = task_config.get("old_bazel")
4586+
if old_bazel:
4587+
new_bazel = task_config.get("bazel")
4588+
print_collapsed_group(f":bazel: Bazel version overridden from {old_bazel} to {new_bazel}")
4589+
45564590
execute_commands(
45574591
task_config=task_config,
45584592
platform=platform,

0 commit comments

Comments
 (0)