Skip to content

Commit f4bca02

Browse files
authored
Merge pull request #12 from mloncode/download-notebook
Improve download notebook
2 parents 4d66d44 + 3c231c5 commit f4bca02

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

notebooks/Download repositories.ipynb

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@
2727
"outputs": [],
2828
"source": [
2929
"from logging import getLogger\n",
30-
"from os import makedirs\n",
31-
"from os.path import join as path_join\n",
30+
"from pathlib import Path\n",
3231
"\n",
3332
"from coloredlogs import install as coloredlogs_install\n",
3433
"\n",
@@ -37,16 +36,17 @@
3736
"logger = getLogger(\"downloader\")\n",
3837
"\n",
3938
"\n",
40-
"git_data_dir = path_join(\"/devfest\", \"repos\", \"git-data\")\n",
41-
"makedirs(git_data_dir, exist_ok=True)\n",
42-
"repos_json = path_join(git_data_dir, \"repos.json\")"
39+
"repos_dir = Path(\"repos\")\n",
40+
"git_data_dir = repos_dir / \"git-data\"\n",
41+
"git_data_dir.mkdir(parents=True, exist_ok=True)\n",
42+
"repos_json = repos_dir / \"repos.json\""
4343
]
4444
},
4545
{
4646
"cell_type": "markdown",
4747
"metadata": {},
4848
"source": [
49-
"To use GitHub API, we need a token. Please create one in your [GitHub account settings](https://github.com/settings/tokens) (the basic permissions are fine), and fill it here:"
49+
"To use GitHub API, we need a token. Normally it has been made available through the `GITHUB_TOKEN` environment variable. Let's check!"
5050
]
5151
},
5252
{
@@ -55,7 +55,10 @@
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
58-
"TOKEN = "
58+
"from os import environ\n",
59+
"TOKEN = environ.get(\"GITHUB_TOKEN\")\n",
60+
"if TOKEN is None:\n",
61+
" logger.critical(\"Could not find GITHUB_TOKEN\")"
5962
]
6063
},
6164
{
@@ -66,32 +69,31 @@
6669
"source": [
6770
"from json import dump as json_dump\n",
6871
"from operator import itemgetter\n",
69-
"from re import compile as re_compile\n",
72+
"from re import search as re_search\n",
7073
"from typing import Any, Dict, List, Optional\n",
7174
"\n",
7275
"import requests\n",
7376
"from tqdm import tqdm_notebook as tqdm\n",
7477
"\n",
7578
"\n",
76-
"next_pattern = re_compile('<(https://api.github.com/user/[^/]+/repos\\?[^>]*page=\\d+[^>]*)>; rel=\"next\"')\n",
77-
"last_pattern = re_compile('<https://api.github.com/user/[^/]+/repos\\?[^>]*page=(\\d+)[^>]*>; rel=\"last\"')\n",
78-
"\n",
79-
"\n",
80-
"def parse_next(link_header: str) -> Optional[str]:\n",
81-
" match = next_pattern.search(link_header)\n",
82-
" return match.group(1) if match is not None else None\n",
83-
"\n",
84-
"\n",
85-
"def parse_last(link_header: str) -> Optional[int]:\n",
86-
" match = last_pattern.search(link_header)\n",
87-
" return int(match.group(1)) if match is not None else None\n",
88-
"\n",
89-
"\n",
9079
"def list_repositories(user: str,\n",
9180
" token: str,\n",
9281
" max_size_mb: int,\n",
9382
" repos_number: int\n",
9483
" ) -> List[Dict[str, Any]]:\n",
84+
"\n",
85+
" def parse_last(link_header: str) -> Optional[int]:\n",
86+
" match = re_search(\n",
87+
" r'<'\n",
88+
" r'https://api.github.com/user/'\n",
89+
" r'[^/]+/repos\\?[^>]*page='\n",
90+
" r'(\\d+)'\n",
91+
" r'[^>]*>; rel=\"last\"',\n",
92+
" link_header)\n",
93+
" if match is None:\n",
94+
" return None\n",
95+
" return int(match.group(1))\n",
96+
"\n",
9597
" repos_list_headers = dict(Authorization=\"token %s\" % token)\n",
9698
" repos_url = \"https://api.github.com/users/%s/repos\" % user\n",
9799
"\n",
@@ -100,13 +102,10 @@
100102
" total_pages = parse_last(request_total.headers[\"Link\"])\n",
101103
" assert total_pages is not None\n",
102104
"\n",
103-
" def get_page_url(page: int):\n",
104-
" return \"%s?page=%d\" % (repos_url, page)\n",
105-
"\n",
106105
" logger.info(\"Retrieving repos list for user %s\" % user)\n",
107106
" repos = []\n",
108107
" for page in tqdm(range(1, total_pages + 1)):\n",
109-
" request = requests.get(get_page_url(page),\n",
108+
" request = requests.get(\"%s?page=%d\" % (repos_url, page),\n",
110109
" headers=repos_list_headers)\n",
111110
" request.raise_for_status()\n",
112111
" for repo in request.json():\n",
@@ -222,7 +221,7 @@
222221
"name": "python",
223222
"nbconvert_exporter": "python",
224223
"pygments_lexer": "ipython3",
225-
"version": "3.6.8"
224+
"version": "3.7.6"
226225
}
227226
},
228227
"nbformat": 4,

0 commit comments

Comments
 (0)