2
2
import os
3
3
import random
4
4
import shutil
5
- import signal
6
- import subprocess
7
5
from typing import Dict , List , Optional , Type , Union
8
- import sys
9
- from uuid import uuid4
6
+ import subprocess
10
7
11
8
import fsspec
12
- import mlflow
13
9
import psutil
14
10
from jupyter_core .paths import jupyter_data_dir
15
11
from jupyter_server .transutils import _i18n
50
46
create_output_filename ,
51
47
)
52
48
53
- MLFLOW_SERVER_HOST = "127.0.0.1"
54
- MLFLOW_SERVER_PORT = "5000"
55
- MLFLOW_SERVER_URI = f"http://{ MLFLOW_SERVER_HOST } :{ MLFLOW_SERVER_PORT } "
56
-
57
49
58
50
class BaseScheduler (LoggingConfigurable ):
59
51
"""Base class for schedulers. A default implementation
@@ -409,31 +401,20 @@ class Scheduler(BaseScheduler):
409
401
task_runner = Instance (allow_none = True , klass = "jupyter_scheduler.task_runner.BaseTaskRunner" )
410
402
411
403
def start_mlflow_server (self ):
412
- mlflow_process = subprocess .Popen (
404
+ subprocess .Popen (
413
405
[
414
406
"mlflow" ,
415
407
"server" ,
408
+ "--backend-store-uri" ,
409
+ "./mlruns" ,
410
+ "--default-artifact-root" ,
411
+ "./mlartifacts" ,
416
412
"--host" ,
417
- MLFLOW_SERVER_HOST ,
413
+ "0.0.0.0" ,
418
414
"--port" ,
419
- MLFLOW_SERVER_PORT ,
420
- ],
421
- preexec_fn = os .setsid ,
415
+ "5000" ,
416
+ ]
422
417
)
423
- mlflow .set_tracking_uri (MLFLOW_SERVER_URI )
424
- return mlflow_process
425
-
426
- def stop_mlflow_server (self ):
427
- if self .mlflow_process is not None :
428
- os .killpg (os .getpgid (self .mlflow_process .pid ), signal .SIGTERM )
429
- self .mlflow_process .wait ()
430
- self .mlflow_process = None
431
- print ("MLFlow server stopped" )
432
-
433
- def mlflow_signal_handler (self , signum , frame ):
434
- print ("Shutting down MLFlow server" )
435
- self .stop_mlflow_server ()
436
- sys .exit (0 )
437
418
438
419
def __init__ (
439
420
self ,
@@ -450,9 +431,7 @@ def __init__(
450
431
if self .task_runner_class :
451
432
self .task_runner = self .task_runner_class (scheduler = self , config = config )
452
433
453
- self .mlflow_process = self .start_mlflow_server ()
454
- signal .signal (signal .SIGINT , self .mlflow_signal_handler )
455
- signal .signal (signal .SIGTERM , self .mlflow_signal_handler )
434
+ self .start_mlflow_server ()
456
435
457
436
@property
458
437
def db_session (self ):
@@ -502,21 +481,6 @@ def create_job(self, model: CreateJob) -> str:
502
481
if not model .output_formats :
503
482
model .output_formats = []
504
483
505
- mlflow_client = mlflow .MlflowClient ()
506
-
507
- if model .job_definition_id and model .mlflow_experiment_id :
508
- experiment_id = model .mlflow_experiment_id
509
- else :
510
- experiment_id = mlflow_client .create_experiment (f"{ model .input_filename } -{ uuid4 ()} " )
511
- model .mlflow_experiment_id = experiment_id
512
- input_file_path = os .path .join (self .root_dir , model .input_uri )
513
- mlflow .log_artifact (input_file_path , "input" )
514
-
515
- mlflow_run = mlflow_client .create_run (
516
- experiment_id = experiment_id , run_name = f"{ model .input_filename } -{ uuid4 ()} "
517
- )
518
- model .mlflow_run_id = mlflow_run .info .run_id
519
-
520
484
job = Job (** model .dict (exclude_none = True , exclude = {"input_uri" }))
521
485
522
486
session .add (job )
@@ -664,12 +628,6 @@ def create_job_definition(self, model: CreateJobDefinition) -> str:
664
628
if not self .file_exists (model .input_uri ):
665
629
raise InputUriError (model .input_uri )
666
630
667
- mlflow_client = mlflow .MlflowClient ()
668
- experiment_id = mlflow_client .create_experiment (f"{ model .input_filename } -{ uuid4 ()} " )
669
- model .mlflow_experiment_id = experiment_id
670
- input_file_path = os .path .join (self .root_dir , model .input_uri )
671
- mlflow .log_artifact (input_file_path , "input" )
672
-
673
631
job_definition = JobDefinition (** model .dict (exclude_none = True , exclude = {"input_uri" }))
674
632
session .add (job_definition )
675
633
session .commit ()
0 commit comments