|
5 | 5 | import os |
6 | 6 | import random |
7 | 7 | import typing as T |
| 8 | +import uuid |
8 | 9 |
|
9 | 10 | import requests |
10 | 11 |
|
@@ -55,31 +56,31 @@ def _truncate_end(s: _S) -> _S: |
55 | 56 |
|
56 | 57 | class UploadService: |
57 | 58 | user_access_token: str |
58 | | - entity_size: int |
59 | 59 | session_key: str |
60 | 60 | callbacks: T.List[T.Callable[[bytes, T.Optional[requests.Response]], None]] |
61 | 61 | cluster_filetype: ClusterFileType |
62 | 62 | organization_id: T.Optional[T.Union[str, int]] |
63 | 63 | chunk_size: int |
64 | 64 |
|
| 65 | + MIME_BY_CLUSTER_TYPE: T.Dict[ClusterFileType, str] = { |
| 66 | + ClusterFileType.ZIP: "application/zip", |
| 67 | + ClusterFileType.BLACKVUE: "video/mp4", |
| 68 | + ClusterFileType.CAMM: "video/mp4", |
| 69 | + } |
| 70 | + |
65 | 71 | def __init__( |
66 | 72 | self, |
67 | 73 | user_access_token: str, |
68 | 74 | session_key: str, |
69 | | - entity_size: int, |
70 | 75 | organization_id: T.Optional[T.Union[str, int]] = None, |
71 | 76 | cluster_filetype: ClusterFileType = ClusterFileType.ZIP, |
72 | 77 | chunk_size: int = DEFAULT_CHUNK_SIZE, |
73 | 78 | ): |
74 | | - if entity_size <= 0: |
75 | | - raise ValueError(f"Expect positive entity size but got {entity_size}") |
76 | | - |
77 | 79 | if chunk_size <= 0: |
78 | 80 | raise ValueError("Expect positive chunk size") |
79 | 81 |
|
80 | 82 | self.user_access_token = user_access_token |
81 | 83 | self.session_key = session_key |
82 | | - self.entity_size = entity_size |
83 | 84 | self.organization_id = organization_id |
84 | 85 | # validate the input |
85 | 86 | self.cluster_filetype = ClusterFileType(cluster_filetype) |
@@ -107,55 +108,66 @@ def upload( |
107 | 108 | data: T.IO[bytes], |
108 | 109 | offset: T.Optional[int] = None, |
109 | 110 | ) -> str: |
110 | | - if offset is None: |
111 | | - offset = self.fetch_offset() |
112 | | - |
113 | | - entity_type_map: T.Dict[ClusterFileType, str] = { |
114 | | - ClusterFileType.ZIP: "application/zip", |
115 | | - ClusterFileType.BLACKVUE: "video/mp4", |
116 | | - ClusterFileType.CAMM: "video/mp4", |
117 | | - } |
118 | | - |
119 | | - entity_type = entity_type_map[self.cluster_filetype] |
120 | | - |
121 | | - data.seek(offset, io.SEEK_CUR) |
| 111 | + chunks = self._chunkize_byte_stream(data) |
| 112 | + return self.upload_chunks(chunks, offset=offset) |
122 | 113 |
|
| 114 | + def _chunkize_byte_stream( |
| 115 | + self, stream: T.IO[bytes] |
| 116 | + ) -> T.Generator[bytes, None, None]: |
123 | 117 | while True: |
124 | | - chunk = data.read(self.chunk_size) |
125 | | - # it is possible to upload an empty chunk here |
126 | | - # in order to return the handle |
127 | | - headers = { |
128 | | - "Authorization": f"OAuth {self.user_access_token}", |
129 | | - "Offset": f"{offset}", |
130 | | - "X-Entity-Length": str(self.entity_size), |
131 | | - "X-Entity-Name": self.session_key, |
132 | | - "X-Entity-Type": entity_type, |
133 | | - } |
134 | | - url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}" |
135 | | - LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers))) |
136 | | - resp = request_post( |
137 | | - url, |
138 | | - headers=headers, |
139 | | - data=chunk, |
140 | | - timeout=UPLOAD_REQUESTS_TIMEOUT, |
141 | | - ) |
142 | | - LOG.debug( |
143 | | - "HTTP response %s: %s", resp.status_code, _truncate_end(resp.content) |
144 | | - ) |
145 | | - resp.raise_for_status() |
146 | | - offset += len(chunk) |
147 | | - LOG.debug("The next offset will be: %s", offset) |
| 118 | + data = stream.read(self.chunk_size) |
| 119 | + if not data: |
| 120 | + break |
| 121 | + yield data |
| 122 | + |
| 123 | + def _offset_chunks( |
| 124 | + self, chunks: T.Iterable[bytes], offset: int |
| 125 | + ) -> T.Generator[bytes, None, None]: |
| 126 | + assert offset >= 0, f"Expect non-negative offset but got {offset}" |
| 127 | + |
| 128 | + for chunk in chunks: |
| 129 | + if offset: |
| 130 | + if offset < len(chunk): |
| 131 | + yield chunk[offset:] |
| 132 | + offset = 0 |
| 133 | + else: |
| 134 | + offset -= len(chunk) |
| 135 | + else: |
| 136 | + yield chunk |
| 137 | + |
| 138 | + def _attach_callbacks( |
| 139 | + self, chunks: T.Iterable[bytes] |
| 140 | + ) -> T.Generator[bytes, None, None]: |
| 141 | + for chunk in chunks: |
| 142 | + yield chunk |
148 | 143 | for callback in self.callbacks: |
149 | | - callback(chunk, resp) |
150 | | - # we can assert that offset == self.fetch_offset(session_key) |
151 | | - # otherwise, server will throw |
| 144 | + callback(chunk, None) |
152 | 145 |
|
153 | | - if not chunk: |
154 | | - break |
| 146 | + def upload_chunks( |
| 147 | + self, |
| 148 | + chunks: T.Iterable[bytes], |
| 149 | + offset: T.Optional[int] = None, |
| 150 | + ) -> str: |
| 151 | + if offset is None: |
| 152 | + offset = self.fetch_offset() |
155 | 153 |
|
156 | | - assert offset == self.entity_size, ( |
157 | | - f"Offset ends at {offset} but the entity size is {self.entity_size}" |
| 154 | + chunks = self._attach_callbacks(self._offset_chunks(chunks, offset)) |
| 155 | + |
| 156 | + headers = { |
| 157 | + "Authorization": f"OAuth {self.user_access_token}", |
| 158 | + "Offset": f"{offset}", |
| 159 | + "X-Entity-Name": self.session_key, |
| 160 | + "X-Entity-Type": self.MIME_BY_CLUSTER_TYPE[self.cluster_filetype], |
| 161 | + } |
| 162 | + url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}" |
| 163 | + LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers))) |
| 164 | + resp = request_post( |
| 165 | + url, |
| 166 | + headers=headers, |
| 167 | + data=chunks, |
| 168 | + timeout=UPLOAD_REQUESTS_TIMEOUT, |
158 | 169 | ) |
| 170 | + LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content)) |
159 | 171 |
|
160 | 172 | payload = resp.json() |
161 | 173 | try: |
@@ -209,35 +221,30 @@ def __init__(self, *args, **kwargs): |
209 | 221 | ) |
210 | 222 | self._error_ratio = 0.1 |
211 | 223 |
|
212 | | - def upload( |
| 224 | + def upload_chunks( |
213 | 225 | self, |
214 | | - data: T.IO[bytes], |
| 226 | + chunks: T.Iterable[bytes], |
215 | 227 | offset: T.Optional[int] = None, |
216 | 228 | ) -> str: |
217 | 229 | if offset is None: |
218 | 230 | offset = self.fetch_offset() |
| 231 | + |
| 232 | + chunks = self._attach_callbacks(self._offset_chunks(chunks, offset)) |
| 233 | + |
219 | 234 | os.makedirs(self._upload_path, exist_ok=True) |
220 | 235 | filename = os.path.join(self._upload_path, self.session_key) |
221 | 236 | with open(filename, "ab") as fp: |
222 | | - data.seek(offset, io.SEEK_CUR) |
223 | | - while True: |
224 | | - chunk = data.read(self.chunk_size) |
225 | | - if not chunk: |
226 | | - break |
227 | | - # fail here means nothing uploaded |
| 237 | + for chunk in chunks: |
228 | 238 | if random.random() <= self._error_ratio: |
229 | 239 | raise requests.ConnectionError( |
230 | 240 | f"TEST ONLY: Failed to upload with error ratio {self._error_ratio}" |
231 | 241 | ) |
232 | 242 | fp.write(chunk) |
233 | | - # fail here means patially uploaded |
234 | 243 | if random.random() <= self._error_ratio: |
235 | 244 | raise requests.ConnectionError( |
236 | 245 | f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}" |
237 | 246 | ) |
238 | | - for callback in self.callbacks: |
239 | | - callback(chunk, None) |
240 | | - return self.session_key |
| 247 | + return uuid.uuid4().hex |
241 | 248 |
|
242 | 249 | def finish(self, _: str) -> str: |
243 | 250 | return "0" |
|
0 commit comments