Skip to content

Commit 8fad5fa

Browse files
authored
fix downloading larger files with onedrive (#364)
1 parent e3a2930 commit 8fad5fa

File tree

4 files changed

+17
-4
lines changed

4 files changed

+17
-4
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 0.4.5
2+
3+
### Fixes
4+
5+
* **Fix downloading large files for OneDrive**
6+
17
## 0.4.4
28

39
### Fixes

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.4.4" # pragma: no cover
1+
__version__ = "0.4.5" # pragma: no cover

unstructured_ingest/v2/pipeline/pipeline.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,14 @@ def apply_filter(self, records: list[dict]) -> list[dict]:
203203

204204
def get_indices(self) -> list[dict]:
205205
if self.indexer_step.process.is_async():
206-
indices = asyncio.run(self.indexer_step.run_async())
206+
207+
async def run_async():
208+
output = []
209+
async for i in self.indexer_step.run_async():
210+
output.append(i)
211+
return output
212+
213+
indices = asyncio.run(run_async())
207214
else:
208215
indices = self.indexer_step.run()
209216
indices_inputs = [{"file_data_path": i} for i in indices]

unstructured_ingest/v2/processes/connectors/onedrive.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ class OnedriveDownloader(Downloader):
223223
download_config: OnedriveDownloaderConfig
224224

225225
@SourceConnectionNetworkError.wrap
226-
def _fetch_file(self, file_data: FileData):
226+
def _fetch_file(self, file_data: FileData) -> DriveItem:
227227
if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath:
228228
raise ValueError(
229229
f"file data doesn't have enough information to get "
@@ -257,7 +257,7 @@ def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
257257
file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
258258
else:
259259
with download_path.open(mode="wb") as f:
260-
file.download(f).execute_query()
260+
file.download_session(f).execute_query()
261261
return self.generate_download_response(file_data=file_data, download_path=download_path)
262262
except Exception as e:
263263
logger.error(f"[{CONNECTOR_TYPE}] Exception during downloading: {e}", exc_info=True)

0 commit comments

Comments
 (0)