|
27 | 27 | "outputs": [], |
28 | 28 | "source": [ |
29 | 29 | "from logging import getLogger\n", |
30 | | - "from os import makedirs\n", |
31 | | - "from os.path import join as path_join\n", |
| 30 | + "from pathlib import Path\n", |
32 | 31 | "\n", |
33 | 32 | "from coloredlogs import install as coloredlogs_install\n", |
34 | 33 | "\n", |
|
37 | 36 | "logger = getLogger(\"downloader\")\n", |
38 | 37 | "\n", |
39 | 38 | "\n", |
40 | | - "git_data_dir = path_join(\"/devfest\", \"repos\", \"git-data\")\n", |
41 | | - "makedirs(git_data_dir, exist_ok=True)\n", |
42 | | - "repos_json = path_join(git_data_dir, \"repos.json\")" |
| 39 | + "repos_dir = Path(\"repos\")\n", |
| 40 | + "git_data_dir = repos_dir / \"git-data\"\n", |
| 41 | + "git_data_dir.mkdir(parents=True, exist_ok=True)\n", |
| 42 | + "repos_json = repos_dir / \"repos.json\"" |
43 | 43 | ] |
44 | 44 | }, |
45 | 45 | { |
46 | 46 | "cell_type": "markdown", |
47 | 47 | "metadata": {}, |
48 | 48 | "source": [ |
49 | | - "To use GitHub API, we need a token. Please create one in your [GitHub account settings](https://github.com/settings/tokens) (the basic permissions are fine), and fill it here:" |
| 49 | + "To use GitHub API, we need a token. Normally it has been made available through the `GITHUB_TOKEN` environment variable. Let's check!" |
50 | 50 | ] |
51 | 51 | }, |
52 | 52 | { |
|
55 | 55 | "metadata": {}, |
56 | 56 | "outputs": [], |
57 | 57 | "source": [ |
58 | | - "TOKEN = " |
| 58 | + "from os import environ\n", |
| 59 | + "TOKEN = environ.get(\"GITHUB_TOKEN\")\n", |
| 60 | + "if TOKEN is None:\n", |
| 61 | + " logger.critical(\"Could not find GITHUB_TOKEN\")" |
59 | 62 | ] |
60 | 63 | }, |
61 | 64 | { |
|
66 | 69 | "source": [ |
67 | 70 | "from json import dump as json_dump\n", |
68 | 71 | "from operator import itemgetter\n", |
69 | | - "from re import compile as re_compile\n", |
| 72 | + "from re import search as re_search\n", |
70 | 73 | "from typing import Any, Dict, List, Optional\n", |
71 | 74 | "\n", |
72 | 75 | "import requests\n", |
73 | 76 | "from tqdm import tqdm_notebook as tqdm\n", |
74 | 77 | "\n", |
75 | 78 | "\n", |
76 | | - "next_pattern = re_compile('<(https://api.github.com/user/[^/]+/repos\\?[^>]*page=\\d+[^>]*)>; rel=\"next\"')\n", |
77 | | - "last_pattern = re_compile('<https://api.github.com/user/[^/]+/repos\\?[^>]*page=(\\d+)[^>]*>; rel=\"last\"')\n", |
78 | | - "\n", |
79 | | - "\n", |
80 | | - "def parse_next(link_header: str) -> Optional[str]:\n", |
81 | | - " match = next_pattern.search(link_header)\n", |
82 | | - " return match.group(1) if match is not None else None\n", |
83 | | - "\n", |
84 | | - "\n", |
85 | | - "def parse_last(link_header: str) -> Optional[int]:\n", |
86 | | - " match = last_pattern.search(link_header)\n", |
87 | | - " return int(match.group(1)) if match is not None else None\n", |
88 | | - "\n", |
89 | | - "\n", |
90 | 79 | "def list_repositories(user: str,\n", |
91 | 80 | " token: str,\n", |
92 | 81 | " max_size_mb: int,\n", |
93 | 82 | " repos_number: int\n", |
94 | 83 | " ) -> List[Dict[str, Any]]:\n", |
| 84 | + "\n", |
| 85 | + " def parse_last(link_header: str) -> Optional[int]:\n", |
| 86 | + " match = re_search(\n", |
| 87 | + " r'<'\n", |
| 88 | + " r'https://api.github.com/user/'\n", |
| 89 | + " r'[^/]+/repos\\?[^>]*page='\n", |
| 90 | + " r'(\\d+)'\n", |
| 91 | + " r'[^>]*>; rel=\"last\"',\n", |
| 92 | + " link_header)\n", |
| 93 | + " if match is None:\n", |
| 94 | + " return None\n", |
| 95 | + " return int(match.group(1))\n", |
| 96 | + "\n", |
95 | 97 | " repos_list_headers = dict(Authorization=\"token %s\" % token)\n", |
96 | 98 | " repos_url = \"https://api.github.com/users/%s/repos\" % user\n", |
97 | 99 | "\n", |
|
100 | 102 | " total_pages = parse_last(request_total.headers[\"Link\"])\n", |
101 | 103 | " assert total_pages is not None\n", |
102 | 104 | "\n", |
103 | | - " def get_page_url(page: int):\n", |
104 | | - " return \"%s?page=%d\" % (repos_url, page)\n", |
105 | | - "\n", |
106 | 105 | " logger.info(\"Retrieving repos list for user %s\" % user)\n", |
107 | 106 | " repos = []\n", |
108 | 107 | " for page in tqdm(range(1, total_pages + 1)):\n", |
109 | | - " request = requests.get(get_page_url(page),\n", |
| 108 | + " request = requests.get(\"%s?page=%d\" % (repos_url, page),\n", |
110 | 109 | " headers=repos_list_headers)\n", |
111 | 110 | " request.raise_for_status()\n", |
112 | 111 | " for repo in request.json():\n", |
|
222 | 221 | "name": "python", |
223 | 222 | "nbconvert_exporter": "python", |
224 | 223 | "pygments_lexer": "ipython3", |
225 | | - "version": "3.6.8" |
| 224 | + "version": "3.7.6" |
226 | 225 | } |
227 | 226 | }, |
228 | 227 | "nbformat": 4, |
|
0 commit comments