40
40
)
41
41
from jupyter_scheduler .orm import Job , JobDefinition , create_session
42
42
from jupyter_scheduler .utils import (
43
- copy_directory ,
43
+ copy_input_file ,
44
+ copy_input_folder ,
44
45
create_output_directory ,
45
46
create_output_filename ,
46
47
)
@@ -427,22 +428,6 @@ def db_session(self):
427
428
428
429
return self ._db_session
429
430
430
- def copy_input_file (self , input_uri : str , copy_to_path : str ):
431
- """Copies the input file to the staging directory"""
432
- input_filepath = os .path .join (self .root_dir , input_uri )
433
- with fsspec .open (input_filepath ) as input_file :
434
- with fsspec .open (copy_to_path , "wb" ) as output_file :
435
- output_file .write (input_file .read ())
436
-
437
- def copy_input_folder (self , input_uri : str , nb_copy_to_path : str ) -> List [str ]:
438
- """Copies the input file along with the input directory to the staging directory, returns the list of copied files relative to the staging directory"""
439
- input_dir_path = os .path .dirname (os .path .join (self .root_dir , input_uri ))
440
- staging_dir = os .path .dirname (nb_copy_to_path )
441
- return copy_directory (
442
- source_dir = input_dir_path ,
443
- destination_dir = staging_dir ,
444
- )
445
-
446
431
async def create_job (self , model : CreateJob ) -> str :
447
432
if not model .job_definition_id and not self .file_exists (model .input_uri ):
448
433
raise InputUriError (model .input_uri )
@@ -473,19 +458,26 @@ async def create_job(self, model: CreateJob) -> str:
473
458
session .add (job )
474
459
session .commit ()
475
460
461
+ dask : DaskClient = await self .dask_client_future
462
+
476
463
staging_paths = self .get_staging_paths (DescribeJob .from_orm (job ))
477
464
if model .package_input_folder :
478
- copied_files = self .copy_input_folder (model .input_uri , staging_paths ["input" ])
465
+ copy_future = dask .submit (
466
+ copy_input_folder , self .root_dir , model .input_uri , staging_paths ["input" ]
467
+ )
468
+ copied_files = await dask .gather (copy_future )
479
469
input_notebook_filename = os .path .basename (model .input_uri )
480
470
job .packaged_files = [
481
471
file for file in copied_files if file != input_notebook_filename
482
472
]
483
473
session .commit ()
484
474
else :
485
- self .copy_input_file (model .input_uri , staging_paths ["input" ])
475
+ copy_future = dask .submit (
476
+ copy_input_file , self .root_dir , model .input_uri , staging_paths ["input" ]
477
+ )
478
+ await dask .gather (copy_future )
486
479
487
- dask_client : DaskClient = await self .dask_client_future
488
- future = dask_client .submit (
480
+ process_future = dask .submit (
489
481
self .execution_manager_class (
490
482
job_id = job .job_id ,
491
483
staging_paths = staging_paths ,
@@ -494,7 +486,7 @@ async def create_job(self, model: CreateJob) -> str:
494
486
).process
495
487
)
496
488
497
- job .pid = future .key
489
+ job .pid = process_future .key
498
490
session .commit ()
499
491
500
492
job_id = job .job_id
0 commit comments