1+ """
2+ A script to automate downloading CircleCI artifacts.
3+
4+ Usage: python3 pull_circleci_artifacts.py <TOKEN> <PIPELINE_ID> <SAVE_DIR>
5+ TOKEN:
6+ CircleCI "personal access token" of a github (preferably machine) user.
7+ This is secret!
8+
9+ PIPELINE_ID:
10+ A unique string id that represents the CircleCI pipeline, whose artifacts this
11+ script pulls.
12+ This pipeline must have exactly one workflow and that workflow must have exactly
13+ one job. This script waits for the pipeline to finish, and pulls artifacts from
14+ this job. If the pipeline isn't successful on finish, this script exits with an
15+ error.
16+
17+ SAVE_DIR:
18+ The downloaded artifacts are saved to this directory
19+
20+ CircleCI API docs: https://circleci.com/docs/api/v2/index.html (useful for understanding
21+ this code)
22+ """
23+
24+ # yup, all these are stdlib modules incase you are wondering
25+ import concurrent .futures
126import http .client
227import json
328import sys
1843
1944
2045def paginate_get_items_and_next (url , next_page = "" ):
46+ """
47+ Helper to get "items" and "next_page_token" from CircleCI API, used to handle
48+ pagination.
49+ """
50+
51+ # page-token is used for pagination. Initially, it is unspecified.
2152 url_query = f"{ url } ?page-token={ next_page } " if next_page else url
2253 cci_api .request ("GET" , f"/api/v2/{ url_query } " , headers = headers )
2354 response = cci_api .getresponse ()
@@ -36,6 +67,9 @@ def paginate_get_items_and_next(url, next_page=""):
3667
3768
3869def paginate_get_single_item (url ):
70+ """
71+ Helper to get exactly one item from CircleCI paginated APIs
72+ """
3973 items , _ = paginate_get_items_and_next (url )
4074 if len (items ) != 1 :
4175 raise RuntimeError (f"Expected one item, got { len (items )} " )
@@ -44,22 +78,41 @@ def paginate_get_single_item(url):
4478
4579
4680def paginate_get_all_items (url ):
81+ """
82+ Helper to get all "items" from CircleCI paginated APIs
83+ """
4784 prev_page_tag = ""
4885 while True :
4986 items , prev_page_tag = paginate_get_items_and_next (url , prev_page_tag )
5087 if not items :
88+ # all artifacts are probably downloaded at this point
5189 break
5290
5391 yield from items
5492 if not prev_page_tag :
93+ # done with pagination, exit
5594 break
5695
96+
97+ def download_artifact (artifact ):
98+ """
99+ Helper to download an artifact given an "artifact dict". This can be concurrently
100+ called in multiple threads to speed up downloads.
101+ """
102+ path = Path (artifact ["path" ])
103+ save_path = save_dir / path .name
104+ print (f"Downloading { path .name } " )
105+ request .urlretrieve (artifact ["url" ], save_path )
106+ print (f"Done with saving { path .name } " )
107+
108+
57109cnt = 1
58110while True :
59111 print (f"\n Attempt { cnt } " )
60112 workflow = paginate_get_single_item (f"/pipeline/{ pipeline_id } /workflow" )
61113 if workflow ["status" ] != "running" :
62114 if workflow ["status" ] != "success" :
115+ # workflow failed
63116 raise RuntimeError (f"The workflow has status '{ workflow ['status' ]} '" )
64117
65118 # successfully finished workflow at this point
@@ -69,19 +122,17 @@ def paginate_get_all_items(url):
69122 if job ["status" ] != "success" :
70123 raise RuntimeError (f"The job has status '{ workflow ['status' ]} '" )
71124
72- print ("Handling artifacts..." )
73- for i , artifact in enumerate (
74- paginate_get_all_items (
75- f"/project/{ job ['project_slug' ]} /{ job ['job_number' ]} /artifacts"
76- ),
77- start = 1 ,
78- ):
79- path = Path (artifact ["path" ])
80- save_path = save_dir / path .name
81- print (f"{ i } ) Downloading { path .name } ({ path } ) and saving it in { save_path } " )
82- request .urlretrieve (artifact ["url" ], save_path )
125+ print (f"Downloading artifacts (they will all be saved in { str (save_dir )} )" )
126+ with concurrent .futures .ThreadPoolExecutor () as pool :
127+ pool .map (
128+ download_artifact ,
129+ paginate_get_all_items (
130+ f"/project/{ job ['project_slug' ]} /{ job ['job_number' ]} /artifacts"
131+ ),
132+ )
83133
84134 break
85135
86136 cnt += 1
137+ print ("Job is still running (now sleeping for 30s before retrying)" )
87138 time .sleep (30 )
0 commit comments