Skip to content

Commit 91c4171

Browse files
committed
Improve and document pull_circleci_artifacts
1 parent 36460bf commit 91c4171

File tree

1 file changed

+62
-11
lines changed

1 file changed

+62
-11
lines changed

buildconfig/ci/circleci/pull_circleci_artifacts.py

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,28 @@
1+
"""
2+
A script to automate downloading CircleCI artifacts.
3+
4+
Usage: python3 pull_circleci_artifacts.py <TOKEN> <PIPELINE_ID> <SAVE_DIR>
5+
TOKEN:
6+
CircleCI "personal access token" of a github (preferably machine) user.
7+
This is secret!
8+
9+
PIPELINE_ID:
10+
A unique string id that represents the CircleCI pipeline, whose artifacts this
11+
script pulls.
12+
This pipeline must have exactly one workflow and that workflow must have exactly
13+
one job. This script waits for the pipeline to finish, and pulls artifacts from
14+
this job. If the pipeline isn't successful on finish, this script exits with an
15+
error.
16+
17+
SAVE_DIR:
18+
The downloaded artifacts are saved to this directory
19+
20+
CircleCI API docs: https://circleci.com/docs/api/v2/index.html (useful for understanding
21+
this code)
22+
"""
23+
24+
# yup, all these are stdlib modules incase you are wondering
25+
import concurrent.futures
126
import http.client
227
import json
328
import sys
@@ -18,6 +43,12 @@
1843

1944

2045
def paginate_get_items_and_next(url, next_page=""):
46+
"""
47+
Helper to get "items" and "next_page_token" from CircleCI API, used to handle
48+
pagination.
49+
"""
50+
51+
# page-token is used for pagination. Initially, it is unspecified.
2152
url_query = f"{url}?page-token={next_page}" if next_page else url
2253
cci_api.request("GET", f"/api/v2/{url_query}", headers=headers)
2354
response = cci_api.getresponse()
@@ -36,6 +67,9 @@ def paginate_get_items_and_next(url, next_page=""):
3667

3768

3869
def paginate_get_single_item(url):
70+
"""
71+
Helper to get exactly one item from CircleCI paginated APIs
72+
"""
3973
items, _ = paginate_get_items_and_next(url)
4074
if len(items) != 1:
4175
raise RuntimeError(f"Expected one item, got {len(items)}")
@@ -44,22 +78,41 @@ def paginate_get_single_item(url):
4478

4579

4680
def paginate_get_all_items(url):
81+
"""
82+
Helper to get all "items" from CircleCI paginated APIs
83+
"""
4784
prev_page_tag = ""
4885
while True:
4986
items, prev_page_tag = paginate_get_items_and_next(url, prev_page_tag)
5087
if not items:
88+
# all artifacts are probably downloaded at this point
5189
break
5290

5391
yield from items
5492
if not prev_page_tag:
93+
# done with pagination, exit
5594
break
5695

96+
97+
def download_artifact(artifact):
98+
"""
99+
Helper to download an artifact given an "artifact dict". This can be concurrently
100+
called in multiple threads to speed up downloads.
101+
"""
102+
path = Path(artifact["path"])
103+
save_path = save_dir / path.name
104+
print(f"Downloading {path.name}")
105+
request.urlretrieve(artifact["url"], save_path)
106+
print(f"Done with saving {path.name}")
107+
108+
57109
cnt = 1
58110
while True:
59111
print(f"\nAttempt {cnt}")
60112
workflow = paginate_get_single_item(f"/pipeline/{pipeline_id}/workflow")
61113
if workflow["status"] != "running":
62114
if workflow["status"] != "success":
115+
# workflow failed
63116
raise RuntimeError(f"The workflow has status '{workflow['status']}'")
64117

65118
# successfully finished workflow at this point
@@ -69,19 +122,17 @@ def paginate_get_all_items(url):
69122
if job["status"] != "success":
70123
raise RuntimeError(f"The job has status '{workflow['status']}'")
71124

72-
print("Handling artifacts...")
73-
for i, artifact in enumerate(
74-
paginate_get_all_items(
75-
f"/project/{job['project_slug']}/{job['job_number']}/artifacts"
76-
),
77-
start=1,
78-
):
79-
path = Path(artifact["path"])
80-
save_path = save_dir / path.name
81-
print(f"{i}) Downloading {path.name} ({path}) and saving it in {save_path}")
82-
request.urlretrieve(artifact["url"], save_path)
125+
print(f"Downloading artifacts (they will all be saved in {str(save_dir)})")
126+
with concurrent.futures.ThreadPoolExecutor() as pool:
127+
pool.map(
128+
download_artifact,
129+
paginate_get_all_items(
130+
f"/project/{job['project_slug']}/{job['job_number']}/artifacts"
131+
),
132+
)
83133

84134
break
85135

86136
cnt += 1
137+
print("Job is still running (now sleeping for 30s before retrying)")
87138
time.sleep(30)

0 commit comments

Comments
 (0)