|
1 | | -import requests |
| 1 | +import base64 |
2 | 2 |
|
3 | | -import fsspec |
| 3 | +import requests |
4 | 4 |
|
5 | 5 | from ..spec import AbstractFileSystem |
6 | 6 | from ..utils import infer_storage_options |
@@ -36,7 +36,7 @@ class GithubFileSystem(AbstractFileSystem): |
36 | 36 | """ |
37 | 37 |
|
38 | 38 | url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}" |
39 | | - rurl = "https://raw.githubusercontent.com/{org}/{repo}/{sha}/{path}" |
| 39 | + content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}" |
40 | 40 | protocol = "github" |
41 | 41 | timeout = (60, 60) # connect, read timeouts |
42 | 42 |
|
@@ -219,21 +219,35 @@ def _open( |
219 | 219 | ): |
220 | 220 | if mode != "rb": |
221 | 221 | raise NotImplementedError |
222 | | - url = self.rurl.format( |
| 222 | + |
| 223 | + # construct a url to hit the GitHub API's repo contents API |
| 224 | + url = self.content_url.format( |
223 | 225 | org=self.org, repo=self.repo, path=path, sha=sha or self.root |
224 | 226 | ) |
| 227 | + |
| 228 | + # make a request to this API, and parse the response as JSON |
225 | 229 | r = requests.get(url, timeout=self.timeout, **self.kw) |
226 | 230 | if r.status_code == 404: |
227 | 231 | raise FileNotFoundError(path) |
228 | 232 | r.raise_for_status() |
| 233 | + content_json = r.json() |
| 234 | + |
| 235 | + # if the response's content key is not empty, try to parse it as base64 |
| 236 | + if content_json["content"]: |
| 237 | + content = base64.b64decode(content_json["content"]) |
| 238 | + |
| 239 | + # as long as the content does not start with the string |
| 240 | + # "version https://git-lfs.github.com/" |
| 241 | + # then it is probably not a git-lfs pointer and we can just return |
| 242 | + # the content directly |
| 243 | + if not content.startswith(b"version https://git-lfs.github.com/"): |
| 244 | + return MemoryFile(None, None, content) |
| 245 | + |
| 246 | + # we land here if the content was not present in the first response |
| 247 | + # (regular file over 1MB or git-lfs tracked file) |
| 248 | + # in this case, we get the content from the download_url |
| 249 | + r = requests.get(content_json["download_url"], timeout=self.timeout, **self.kw) |
| 250 | + if r.status_code == 404: |
| 251 | + raise FileNotFoundError(path) |
| 252 | + r.raise_for_status() |
229 | 253 | return MemoryFile(None, None, r.content) |
230 | | - |
231 | | - def cat(self, path, recursive=False, on_error="raise", **kwargs): |
232 | | - paths = self.expand_path(path, recursive=recursive) |
233 | | - urls = [ |
234 | | - self.rurl.format(org=self.org, repo=self.repo, path=u, sha=self.root) |
235 | | - for u, sh in paths |
236 | | - ] |
237 | | - fs = fsspec.filesystem("http") |
238 | | - data = fs.cat(urls, on_error="return") |
239 | | - return {u: v for ((k, v), u) in zip(data.items(), urls)} |
|
0 commit comments