diff --git a/mapillary_tools/api_v4.py b/mapillary_tools/api_v4.py index a664ca993..fb6606243 100644 --- a/mapillary_tools/api_v4.py +++ b/mapillary_tools/api_v4.py @@ -2,6 +2,7 @@ import os import ssl import typing as T +from json import dumps import requests from requests.adapters import HTTPAdapter @@ -46,6 +47,106 @@ def cert_verify(self, *args, **kwargs): conn.ca_certs = None +@T.overload +def _truncate(s: bytes, limit: int = 512) -> bytes: ... + + +@T.overload +def _truncate(s: str, limit: int = 512) -> str: ... + + +def _truncate(s, limit=512): + if limit < len(s): + remaining = len(s) - limit + if isinstance(s, bytes): + return ( + s[:limit] + + b"..." + + f"({remaining} more bytes truncated)".encode("utf-8") + ) + else: + return str(s[:limit]) + f"...({remaining} more chars truncated)" + else: + return s + + +def _sanitize(headers: T.Dict): + new_headers = {} + + for k, v in headers.items(): + if k.lower() in [ + "authorization", + "cookie", + "x-fb-access-token", + "access-token", + "access_token", + "password", + ]: + new_headers[k] = "[REDACTED]" + else: + new_headers[k] = _truncate(v) + + return new_headers + + +def _log_debug_request( + method: str, + url: str, + json: T.Optional[T.Dict] = None, + params: T.Optional[T.Dict] = None, + headers: T.Optional[T.Dict] = None, + timeout: T.Any = None, +): + if logging.getLogger().getEffectiveLevel() <= logging.DEBUG: + return + + msg = f"HTTP {method} {url}" + + if USE_SYSTEM_CERTS: + msg += " (w/sys_certs)" + + if json: + t = _truncate(dumps(_sanitize(json))) + msg += f" JSON={t}" + + if params: + msg += f" PARAMS={_sanitize(params)}" + + if headers: + msg += f" HEADERS={_sanitize(headers)}" + + if timeout is not None: + msg += f" TIMEOUT={timeout}" + + LOG.debug(msg) + + +def _log_debug_response(resp: requests.Response): + if logging.getLogger().getEffectiveLevel() <= logging.DEBUG: + return + + data: T.Union[str, bytes] + try: + data = _truncate(dumps(_sanitize(resp.json()))) + except Exception: + data = _truncate(resp.content) + + LOG.debug(f"HTTP {resp.status_code} ({resp.reason}): %s", data) + + +def readable_http_error(ex: requests.HTTPError) -> str: + req = ex.request + resp = ex.response + + data: T.Union[str, bytes] + try: + data = _truncate(dumps(_sanitize(resp.json()))) + except Exception: + data = _truncate(resp.content) + + return f"{req.method} {resp.url} => {resp.status_code} ({resp.reason}): {str(data)}" + + def request_post( url: str, data: T.Optional[T.Any] = None, @@ -54,14 +155,23 @@ def request_post( ) -> requests.Response: global USE_SYSTEM_CERTS + _log_debug_request( + "POST", + url, + json=json, + params=kwargs.get("params"), + headers=kwargs.get("headers"), + timeout=kwargs.get("timeout"), + ) + if USE_SYSTEM_CERTS: with requests.Session() as session: session.mount("https://", HTTPSystemCertsAdapter()) - return session.post(url, data=data, json=json, **kwargs) + resp = session.post(url, data=data, json=json, **kwargs) else: try: - return requests.post(url, data=data, json=json, **kwargs) + resp = requests.post(url, data=data, json=json, **kwargs) except requests.exceptions.SSLError as ex: if "SSLCertVerificationError" not in str(ex): raise ex @@ -70,9 +180,11 @@ def request_post( LOG.warning( "SSL error occurred, falling back to system SSL certificates: %s", ex ) - with requests.Session() as session: - session.mount("https://", HTTPSystemCertsAdapter()) - return session.post(url, data=data, json=json, **kwargs) + return request_post(url, data=data, json=json, **kwargs) + + _log_debug_response(resp) + + return resp def request_get( @@ -82,13 +194,21 @@ def request_get( ) -> requests.Response: global USE_SYSTEM_CERTS + _log_debug_request( + "GET", + url, + params=kwargs.get("params"), + headers=kwargs.get("headers"), + timeout=kwargs.get("timeout"), + ) + if USE_SYSTEM_CERTS: with requests.Session() as session: session.mount("https://", HTTPSystemCertsAdapter()) - return session.get(url, params=params, **kwargs) + resp = session.get(url, params=params, **kwargs) else: try: - return requests.get(url, params=params, **kwargs) + resp = requests.get(url, params=params, **kwargs) except requests.exceptions.SSLError as ex: if "SSLCertVerificationError" not in str(ex): raise ex @@ -97,15 +217,17 @@ def request_get( LOG.warning( "SSL error occurred, falling back to system SSL certificates: %s", ex ) - with requests.Session() as session: - session.mount("https://", HTTPSystemCertsAdapter()) - return session.get(url, params=params, **kwargs) + resp = request_get(url, params=params, **kwargs) + + _log_debug_response(resp) + + return resp def get_upload_token(email: str, password: str) -> requests.Response: resp = request_post( f"{MAPILLARY_GRAPH_API_ENDPOINT}/login", - params={"access_token": MAPILLARY_CLIENT_TOKEN}, + headers={"Authorization": f"OAuth {MAPILLARY_CLIENT_TOKEN}"}, json={"email": email, "password": password, "locale": "en_US"}, timeout=REQUESTS_TIMEOUT, ) diff --git a/mapillary_tools/commands/__main__.py b/mapillary_tools/commands/__main__.py index b9f0ca226..0e3652686 100644 --- a/mapillary_tools/commands/__main__.py +++ b/mapillary_tools/commands/__main__.py @@ -5,7 +5,9 @@ import typing as T from pathlib import Path -from .. import constants, exceptions, VERSION +import requests + +from .. import api_v4, constants, exceptions, VERSION from . import ( authenticate, process, @@ -160,11 +162,14 @@ def main(): try: args.func(argvars) - except exceptions.MapillaryUserError as exc: + except requests.HTTPError as ex: + LOG.error("%s: %s", ex.__class__.__name__, api_v4.readable_http_error(ex)) + + except exceptions.MapillaryUserError as ex: LOG.error( - "%s: %s", exc.__class__.__name__, exc, exc_info=log_level == logging.DEBUG + "%s: %s", ex.__class__.__name__, ex, exc_info=log_level == logging.DEBUG ) - sys.exit(exc.exit_code) + sys.exit(ex.exit_code) if __name__ == "__main__": diff --git a/mapillary_tools/commands/process_and_upload.py b/mapillary_tools/commands/process_and_upload.py index ccb239a07..c7ffadd3f 100644 --- a/mapillary_tools/commands/process_and_upload.py +++ b/mapillary_tools/commands/process_and_upload.py @@ -15,5 +15,6 @@ def run(self, args: dict): # \x00 is a special path similiar to /dev/null # it tells process command do not write anything args["desc_path"] = "\x00" + ProcessCommand().run(args) UploadCommand().run(args) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index b22b649a4..6f6d39f3d 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -47,25 +47,6 @@ def __init__(self, inner_ex) -> None: super().__init__(str(inner_ex)) -class UploadHTTPError(Exception): - pass - - -def wrap_http_exception(ex: requests.HTTPError): - req = ex.request - resp = ex.response - if isinstance(resp, requests.Response) and isinstance(req, requests.Request): - lines = [ - f"{req.method} {resp.url}", - f"> HTTP Status: {resp.status_code}", - str(resp.content), - ] - else: - lines = [] - - return UploadHTTPError("\n".join(lines)) - - def _load_validate_metadatas_from_desc_path( desc_path: T.Optional[str], import_paths: T.Sequence[Path] ) -> T.List[types.Metadata]: @@ -175,18 +156,12 @@ def fetch_user_items( "Found multiple Mapillary accounts. Please specify one with --user_name" ) else: - try: - user_items = authenticate.authenticate_user(user_name) - except requests.HTTPError as exc: - raise wrap_http_exception(exc) from exc + user_items = authenticate.authenticate_user(user_name) if organization_key is not None: - try: - resp = api_v4.fetch_organization( - user_items["user_upload_token"], organization_key - ) - except requests.HTTPError as ex: - raise wrap_http_exception(ex) from ex + resp = api_v4.fetch_organization( + user_items["user_upload_token"], organization_key + ) org = resp.json() LOG.info("Uploading to organization: %s", json.dumps(org)) user_items = T.cast( @@ -430,15 +405,12 @@ def _api_logging_finished(summary: T.Dict): action: api_v4.ActionType = "upload_finished_upload" LOG.debug("API Logging for action %s: %s", action, summary) try: - api_v4.log_event( - action, - summary, - ) + api_v4.log_event(action, summary) except requests.HTTPError as exc: LOG.warning( - "Error from API Logging for action %s", + "HTTPError from API Logging for action %s: %s", action, - exc_info=wrap_http_exception(exc), + api_v4.readable_http_error(exc), ) except Exception: LOG.warning("Error from API Logging for action %s", action, exc_info=True) @@ -452,16 +424,12 @@ def _api_logging_failed(payload: T.Dict, exc: Exception): action: api_v4.ActionType = "upload_failed_upload" LOG.debug("API Logging for action %s: %s", action, payload) try: - api_v4.log_event( - action, - payload_with_reason, - ) + api_v4.log_event(action, payload_with_reason) except requests.HTTPError as exc: - wrapped_exc = wrap_http_exception(exc) LOG.warning( - "Error from API Logging for action %s", + "HTTPError from API Logging for action %s: %s", action, - exc_info=wrapped_exc, + api_v4.readable_http_error(exc), ) except Exception: LOG.warning("Error from API Logging for action %s", action, exc_info=True) @@ -678,7 +646,7 @@ def upload( raise exceptions.MapillaryUploadUnauthorizedError( debug_info.get("message") ) from inner_ex - raise wrap_http_exception(inner_ex) from inner_ex + raise inner_ex raise inner_ex diff --git a/mapillary_tools/upload_api_v4.py b/mapillary_tools/upload_api_v4.py index 3b66d83de..0e5cdf8c7 100644 --- a/mapillary_tools/upload_api_v4.py +++ b/mapillary_tools/upload_api_v4.py @@ -1,7 +1,5 @@ import enum import io -import json -import logging import os import random import typing as T @@ -9,9 +7,13 @@ import requests -from .api_v4 import MAPILLARY_GRAPH_API_ENDPOINT, request_get, request_post +from .api_v4 import ( + MAPILLARY_GRAPH_API_ENDPOINT, + request_get, + request_post, + REQUESTS_TIMEOUT, +) -LOG = logging.getLogger(__name__) MAPILLARY_UPLOAD_ENDPOINT = os.getenv( "MAPILLARY_UPLOAD_ENDPOINT", "https://rupload.facebook.com/mapillary_public_uploads" ) @@ -22,7 +24,6 @@ # i.e. if your the server does not respond within this timeout, it will throw: # ConnectionError: ('Connection aborted.', timeout('The write operation timed out')) # So let us make sure the largest possible chunks can be uploaded before this timeout for now, -REQUESTS_TIMEOUT = (20, 20) # 20 seconds UPLOAD_REQUESTS_TIMEOUT = (30 * 60, 30 * 60) # 30 minutes @@ -32,28 +33,6 @@ class ClusterFileType(enum.Enum): CAMM = "mly_camm_video" -def _sanitize_headers(headers: T.Dict): - return { - k: v - for k, v in headers.items() - if k.lower() not in ["authorization", "cookie", "x-fb-access-token"] - } - - -_S = T.TypeVar("_S", str, bytes) - - -def _truncate_end(s: _S) -> _S: - MAX_LENGTH = 512 - if MAX_LENGTH < len(s): - if isinstance(s, bytes): - return s[:MAX_LENGTH] + b"..." - else: - return str(s[:MAX_LENGTH]) + "..." - else: - return s - - class UploadService: user_access_token: str session_key: str @@ -92,13 +71,11 @@ def fetch_offset(self) -> int: "Authorization": f"OAuth {self.user_access_token}", } url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}" - LOG.debug("GET %s", url) resp = request_get( url, headers=headers, timeout=REQUESTS_TIMEOUT, ) - LOG.debug("HTTP response %s: %s", resp.status_code, resp.content) resp.raise_for_status() data = resp.json() return data["offset"] @@ -160,14 +137,12 @@ def upload_chunks( "X-Entity-Type": self.MIME_BY_CLUSTER_TYPE[self.cluster_filetype], } url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}" - LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers))) resp = request_post( url, headers=headers, data=chunks, timeout=UPLOAD_REQUESTS_TIMEOUT, ) - LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content)) payload = resp.json() try: @@ -190,14 +165,12 @@ def finish(self, file_handle: str) -> str: url = f"{MAPILLARY_GRAPH_API_ENDPOINT}/finish_upload" - LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers))) resp = request_post( url, headers=headers, json=data, timeout=REQUESTS_TIMEOUT, ) - LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content)) resp.raise_for_status() diff --git a/tests/unit/test_upload_api_v4.py b/tests/unit/test_upload_api_v4.py index 6d8ebb3a3..83e47ad0f 100644 --- a/tests/unit/test_upload_api_v4.py +++ b/tests/unit/test_upload_api_v4.py @@ -1,4 +1,5 @@ import io + import py from mapillary_tools import upload_api_v4