Skip to content

Commit 66c89c1

Browse files
committed
use dask futures for files download instead of multiprocessing module
1 parent dd38b8c commit 66c89c1

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

jupyter_scheduler/extension.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def initialize_settings(self):
8383
dask_client_future=dask_client_future,
8484
)
8585

86-
job_files_manager = self.job_files_manager_class(scheduler=scheduler)
86+
job_files_manager = self.job_files_manager_class(
87+
scheduler=scheduler, dask_client_future=dask_client_future
88+
)
8789

8890
self.settings.update(
8991
environments_manager=environments_manager,

jupyter_scheduler/handlers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def job_files_manager(self):
402402
if not self._job_files_manager:
403403
self._job_files_manager = self.settings.get("job_files_manager", None)
404404

405-
return self._job_files_manager
405+
return self._job_files_managerdela
406406

407407
@authenticated
408408
async def get(self, job_id):

jupyter_scheduler/job_files_manager.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
22
import random
33
import tarfile
4-
from multiprocessing import Process
5-
from typing import Dict, List, Optional, Type
4+
from typing import Awaitable, Dict, List, Optional, Type
65

76
import fsspec
7+
from dask.distributed import Client as DaskClient
88
from jupyter_server.utils import ensure_async
99

1010
from jupyter_scheduler.exceptions import SchedulerError
@@ -14,17 +14,23 @@
1414
class JobFilesManager:
1515
scheduler = None
1616

17-
def __init__(self, scheduler: Type[BaseScheduler]):
17+
def __init__(
18+
self,
19+
scheduler: Type[BaseScheduler],
20+
dask_client_future: Awaitable[DaskClient],
21+
):
1822
self.scheduler = scheduler
23+
self.dask_client_future = dask_client_future
1924

2025
async def copy_from_staging(self, job_id: str, redownload: Optional[bool] = False):
2126
job = await ensure_async(self.scheduler.get_job(job_id, False))
2227
staging_paths = await ensure_async(self.scheduler.get_staging_paths(job))
2328
output_filenames = self.scheduler.get_job_filenames(job)
2429
output_dir = self.scheduler.get_local_output_path(model=job, root_dir_relative=True)
2530

26-
p = Process(
27-
target=Downloader(
31+
dask_client: DaskClient = await self.dask_client_future
32+
dask_client.submit(
33+
Downloader(
2834
output_formats=job.output_formats,
2935
output_filenames=output_filenames,
3036
staging_paths=staging_paths,
@@ -33,7 +39,6 @@ async def copy_from_staging(self, job_id: str, redownload: Optional[bool] = Fals
3339
include_staging_files=job.package_input_folder,
3440
).download
3541
)
36-
p.start()
3742

3843

3944
class Downloader:

0 commit comments

Comments
 (0)