|
1 | | -from .unixfs_pb2 import Data as UnixFSData |
2 | | -import cid |
| 1 | +from .utils import get_default_gateways |
3 | 2 |
|
4 | 3 | from fsspec.spec import AbstractFileSystem, AbstractBufferedFile |
5 | 4 | import requests |
6 | 5 | from requests.exceptions import HTTPError |
7 | 6 | import hashlib |
8 | | -import base64 |
9 | 7 | import functools |
10 | 8 | import time |
11 | | -import json |
12 | | -import os |
13 | 9 |
|
14 | 10 | import logging |
15 | 11 |
|
@@ -59,6 +55,22 @@ def get(self, path): |
59 | 55 | res.raise_for_status() |
60 | 56 | return res.content |
61 | 57 |
|
| 58 | + def head(self, path, headers=None): |
| 59 | + logger.debug("head %s via %s", path, self.url, headers=headers or {}) |
| 60 | + try: |
| 61 | + res = self.session.get(self.url + "/ipfs/" + path) |
| 62 | + except requests.ConnectionError as e: |
| 63 | + logger.debug("Connection Error: %s", e) |
| 64 | + self._backoff() |
| 65 | + return None |
| 66 | + if res.status_code == 429: # too many requests |
| 67 | + self._backoff() |
| 68 | + return None |
| 69 | + elif res.status_code == 200: |
| 70 | + self._speedup() |
| 71 | + res.raise_for_status() |
| 72 | + return res.headers |
| 73 | + |
62 | 74 | def apipost(self, call, **kwargs): |
63 | 75 | logger.debug("post %s via %s", call, self.url) |
64 | 76 | try: |
@@ -108,22 +120,6 @@ def get_state(self): |
108 | 120 | return (self.state, None) |
109 | 121 |
|
110 | 122 |
|
111 | | -GATEWAYS = [ |
112 | | - "http://127.0.0.1:8080", |
113 | | - "https://ipfs.io", |
114 | | - "https://gateway.pinata.cloud", |
115 | | - "https://cloudflare-ipfs.com", |
116 | | - "https://dweb.link", |
117 | | -] |
118 | | - |
119 | | - |
120 | | -def get_default_gateways(): |
121 | | - try: |
122 | | - return os.environ["IPFSSPEC_GATEWAYS"].split() |
123 | | - except KeyError: |
124 | | - return GATEWAYS |
125 | | - |
126 | | - |
127 | 123 | class IPFSFileSystem(AbstractFileSystem): |
128 | 124 | protocol = "ipfs" |
129 | 125 |
|
@@ -160,6 +156,9 @@ def _run_on_any_gateway(self, f): |
160 | 156 | def _gw_get(self, path): |
161 | 157 | return self._run_on_any_gateway(lambda gw: gw.get(path)) |
162 | 158 |
|
| 159 | + def _gw_head(self, path, headers=None): |
| 160 | + return self._run_on_any_gateway(lambda gw: gw.head(path, headers)) |
| 161 | + |
163 | 162 | def _gw_apipost(self, call, **kwargs): |
164 | 163 | return self._run_on_any_gateway(lambda gw: gw.apipost(call, **kwargs)) |
165 | 164 |
|
@@ -213,43 +212,28 @@ def _open( |
213 | 212 | ) |
214 | 213 |
|
215 | 214 | def info(self, path, **kwargs): |
| 215 | + path = self._strip_protocol(path) |
216 | 216 | logger.debug("info on %s", path) |
217 | 217 |
|
218 | | - def req(endpoint): |
219 | | - try: |
220 | | - return self._gw_apipost(endpoint, arg=path) |
221 | | - except HTTPError as e: |
222 | | - try: |
223 | | - msg = e.response.json() |
224 | | - except json.JSONDecodeError: |
225 | | - raise IOError("unknown error") from e |
226 | | - else: |
227 | | - if "Message" in msg: |
228 | | - raise FileNotFoundError(msg["Message"]) from e |
229 | | - else: |
230 | | - raise IOError(msg) from e |
231 | | - |
232 | | - stat = req("object/stat") |
233 | | - c = cid.from_string(stat["Hash"]) |
234 | | - if c.codec == "raw": |
235 | | - size = stat["DataSize"] |
236 | | - ftype = "file" |
237 | | - else: |
238 | | - dag = req("dag/get") |
239 | | - data = UnixFSData() |
240 | | - if "data" in dag: |
241 | | - data.ParseFromString(base64.b64decode(dag["data"])) |
| 218 | + headers = {"Accept-Encoding": "identity"} # this ensures correct file size |
| 219 | + response_headers = self._gw_head(path, headers) |
| 220 | + |
| 221 | + info = {"name": path} |
| 222 | + if "Content-Length" in response_headers: |
| 223 | + info["size"] = int(response_headers["Content-Length"]) |
| 224 | + elif "Content-Range" in response_headers: |
| 225 | + info["size"] = int(response_headers["Content-Range"].split("/")[1]) |
| 226 | + |
| 227 | + if "ETag" in response_headers: |
| 228 | + etag = response_headers["ETag"].strip("\"") |
| 229 | + info["ETag"] = etag |
| 230 | + if etag.startswith("DirIndex"): |
| 231 | + info["type"] = "directory" |
| 232 | + info["CID"] = etag.split("-")[-1] |
242 | 233 | else: |
243 | | - rawdata = dag["Data"]["/"]["bytes"] |
244 | | - data.ParseFromString(base64.b64decode(rawdata + "=" * (-len(rawdata) % 4))) |
245 | | - |
246 | | - size = data.filesize |
247 | | - if data.Type == data.File: |
248 | | - ftype = "file" |
249 | | - else: |
250 | | - ftype = "directory" |
251 | | - |
252 | | - return {"name": path, "size": size, "type": ftype} |
| 234 | + info["type"] = "file" |
| 235 | + info["CID"] = etag |
| 236 | + return info |
253 | 237 |
|
254 | 238 |
|
255 | 239 | class IPFSBufferedFile(AbstractBufferedFile): |
|
0 commit comments