Skip to content

Commit 9994767

Browse files
committed
add workflow execution handlers and endpoints
1 parent 6466c48 commit 9994767

File tree

5 files changed

+206
-27
lines changed

5 files changed

+206
-27
lines changed

jupyter_scheduler/executors.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
from prefect.futures import as_completed
1717

1818
from jupyter_scheduler.models import CreateJob, DescribeJob, JobFeature, Status
19-
from jupyter_scheduler.orm import Job, Workflow, create_session
19+
from jupyter_scheduler.orm import Job, Workflow, WorkflowDefinition, create_session
2020
from jupyter_scheduler.parameterize import add_parameters
2121
from jupyter_scheduler.scheduler import Scheduler
2222
from jupyter_scheduler.utils import get_utc_timestamp
23-
from jupyter_scheduler.workflows import DescribeWorkflow
23+
from jupyter_scheduler.workflows import DescribeWorkflow, DescribeWorkflowDefinition
2424

2525

2626
class ExecutionManager(ABC):
@@ -40,11 +40,13 @@ def __init__(
4040
db_url: str,
4141
job_id: str = None,
4242
workflow_id: str = None,
43+
workflow_definition_id: str = None,
4344
root_dir: str = None,
4445
staging_paths: Dict[str, str] = None,
4546
):
4647
self.job_id = job_id
4748
self.workflow_id = workflow_id
49+
self.workflow_definition_id = workflow_definition_id
4850
self.staging_paths = staging_paths
4951
self.root_dir = root_dir
5052
self.db_url = db_url
@@ -58,6 +60,17 @@ def model(self):
5860
)
5961
self._model = DescribeWorkflow.from_orm(workflow)
6062
return self._model
63+
if self.workflow_definition_id:
64+
with self.db_session() as session:
65+
workflow_definition = (
66+
session.query(WorkflowDefinition)
67+
.filter(
68+
WorkflowDefinition.workflow_definition_id == self.workflow_definition_id
69+
)
70+
.first()
71+
)
72+
self._model = DescribeWorkflowDefinition.from_orm(workflow_definition)
73+
return self._model
6174
if self._model is None:
6275
with self.db_session() as session:
6376
job = session.query(Job).filter(Job.job_id == self.job_id).first()
@@ -187,6 +200,23 @@ def on_complete_workflow(self):
187200
class DefaultExecutionManager(ExecutionManager):
188201
"""Default execution manager that executes notebooks"""
189202

203+
def activate_workflow_definition(self):
204+
workflow_definition = self.model
205+
with self.db_session() as session:
206+
session.query(WorkflowDefinition).filter(
207+
WorkflowDefinition.workflow_definition_id
208+
== workflow_definition.workflow_definition_id
209+
).update({"active": True})
210+
session.commit()
211+
workflow_definition = (
212+
session.query(WorkflowDefinition)
213+
.filter(
214+
WorkflowDefinition.workflow_definition_id
215+
== workflow_definition.workflow_definition_id
216+
)
217+
.first()
218+
)
219+
190220
@task(name="Execute workflow task")
191221
def execute_task(self, job: Job):
192222
with self.db_session() as session:

jupyter_scheduler/extension.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from jupyter_scheduler.orm import create_tables
99
from jupyter_scheduler.workflows import (
10+
WorkflowDefinitionsActivationHandler,
1011
WorkflowDefinitionsHandler,
1112
WorkflowDefinitionsTasksHandler,
1213
WorkflowsHandler,
@@ -54,16 +55,17 @@ class SchedulerApp(ExtensionApp):
5455
rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks",
5556
WorkflowsTasksHandler,
5657
),
58+
(r"scheduler/worklow_definitions", WorkflowDefinitionsHandler),
5759
(
5860
rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}",
5961
WorkflowDefinitionsHandler,
6062
),
6163
(
62-
rf"scheduler/worklows/{WORKFLOW_DEFINITION_ID_REGEX}/run",
63-
WorkflowDefinitionsHandler,
64+
rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/activate",
65+
WorkflowDefinitionsActivationHandler,
6466
),
6567
(
66-
rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks",
68+
rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/tasks",
6769
WorkflowDefinitionsTasksHandler,
6870
),
6971
]

jupyter_scheduler/orm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class Workflow(Base):
123123
class WorkflowDefinition(Base):
124124
__tablename__ = "workflow_definitions"
125125
__table_args__ = {"extend_existing": True}
126-
workflow_id = Column(String(36), primary_key=True, default=generate_uuid)
126+
workflow_definition_id = Column(String(36), primary_key=True, default=generate_uuid)
127127
tasks = Column(JsonType)
128128
status = Column(String(64), default=Status.CREATED)
129129
active = Column(Boolean, default=False)

jupyter_scheduler/scheduler.py

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,20 @@
3737
UpdateJob,
3838
UpdateJobDefinition,
3939
)
40-
from jupyter_scheduler.orm import Job, JobDefinition, Workflow, create_session
40+
from jupyter_scheduler.orm import Job, JobDefinition, Workflow, WorkflowDefinition, create_session
4141
from jupyter_scheduler.utils import (
4242
copy_directory,
4343
create_output_directory,
4444
create_output_filename,
4545
)
46-
from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow, UpdateWorkflow
46+
from jupyter_scheduler.workflows import (
47+
CreateWorkflow,
48+
CreateWorkflowDefinition,
49+
DescribeWorkflow,
50+
DescribeWorkflowDefinition,
51+
UpdateWorkflow,
52+
UpdateWorkflowDefinition,
53+
)
4754

4855

4956
class BaseScheduler(LoggingConfigurable):
@@ -117,6 +124,10 @@ def run_workflow(self, workflow_id: str) -> str:
117124
"""Triggers execution of the workflow."""
118125
raise NotImplementedError("must be implemented by subclass")
119126

127+
def activate_workflow_definition(self, workflow_definition_id: str) -> str:
128+
"""Activates workflow marking it as ready for execution."""
129+
raise NotImplementedError("must be implemented by subclass")
130+
120131
def get_workflow(self, workflow_id: str) -> DescribeWorkflow:
121132
"""Returns workflow record for a single workflow."""
122133
raise NotImplementedError("must be implemented by subclass")
@@ -125,6 +136,12 @@ def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str:
125136
"""Adds a task to a workflow."""
126137
raise NotImplementedError("must be implemented by subclass")
127138

139+
def create_workflow_definition_task(
140+
self, workflow_definition_id: str, model: CreateJobDefinition
141+
) -> str:
142+
"""Adds a task to a workflow definition."""
143+
raise NotImplementedError("must be implemented by subclass")
144+
128145
def update_job(self, job_id: str, model: UpdateJob):
129146
"""Updates job metadata in the persistence store,
130147
for example name, status etc. In case of status
@@ -176,6 +193,13 @@ def create_job_definition(self, model: CreateJobDefinition) -> str:
176193
"""
177194
raise NotImplementedError("must be implemented by subclass")
178195

196+
def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str:
197+
"""Creates a new workflow definition record,
198+
consider this as the template for creating
199+
recurring/scheduled workflows.
200+
"""
201+
raise NotImplementedError("must be implemented by subclass")
202+
179203
def update_job_definition(self, job_definition_id: str, model: UpdateJobDefinition):
180204
"""Updates job definition metadata in the persistence store,
181205
should only impact all future jobs.
@@ -192,6 +216,10 @@ def get_job_definition(self, job_definition_id: str) -> DescribeJobDefinition:
192216
"""Returns job definition record for a single job definition"""
193217
raise NotImplementedError("must be implemented by subclass")
194218

219+
def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkflowDefinition:
220+
"""Returns workflow definition record for a single workflow definition"""
221+
raise NotImplementedError("must be implemented by subclass")
222+
195223
def list_job_definitions(self, query: ListJobDefinitionsQuery) -> ListJobDefinitionsResponse:
196224
"""Returns list of all job definitions filtered by query"""
197225
raise NotImplementedError("must be implemented by subclass")
@@ -524,6 +552,13 @@ def create_workflow(self, model: CreateWorkflow) -> str:
524552
session.commit()
525553
return workflow.workflow_id
526554

555+
def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str:
556+
with self.db_session() as session:
557+
workflow_definition = WorkflowDefinition(**model.dict(exclude_none=True))
558+
session.add(workflow_definition)
559+
session.commit()
560+
return workflow_definition.workflow_definition_id
561+
527562
def run_workflow(self, workflow_id: str) -> str:
528563
execution_manager = self.execution_manager_class(
529564
workflow_id=workflow_id,
@@ -533,6 +568,15 @@ def run_workflow(self, workflow_id: str) -> str:
533568
execution_manager.process_workflow()
534569
return workflow_id
535570

571+
def activate_workflow_definition(self, workflow_definition_id: str) -> str:
572+
execution_manager = self.execution_manager_class(
573+
workflow_definition_id=workflow_definition_id,
574+
root_dir=self.root_dir,
575+
db_url=self.db_url,
576+
)
577+
execution_manager.activate_workflow_definition()
578+
return workflow_definition_id
579+
536580
def get_workflow(self, workflow_id: str) -> DescribeWorkflow:
537581
with self.db_session() as session:
538582
workflow_record = (
@@ -541,6 +585,16 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow:
541585
model = DescribeWorkflow.from_orm(workflow_record)
542586
return model
543587

588+
def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkflowDefinition:
589+
with self.db_session() as session:
590+
workflow_definition_record = (
591+
session.query(WorkflowDefinition)
592+
.filter(WorkflowDefinition.workflow_definition_id == workflow_definition_id)
593+
.one()
594+
)
595+
model = DescribeWorkflowDefinition.from_orm(workflow_definition_record)
596+
return model
597+
544598
def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str:
545599
job_id = self.create_job(model, run=False)
546600
workflow: DescribeWorkflow = self.get_workflow(workflow_id)
@@ -549,13 +603,36 @@ def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str:
549603
self.update_workflow(workflow_id, UpdateWorkflow(tasks=updated_tasks))
550604
return job_id
551605

606+
def create_workflow_definition_task(
607+
self, workflow_definition_id: str, model: CreateJobDefinition
608+
) -> str:
609+
job_definition_id = self.create_job_definition(model, add_to_task_runner=False)
610+
workflow_definition: DescribeWorkflowDefinition = self.get_workflow_definition(
611+
workflow_definition_id
612+
)
613+
updated_tasks = (workflow_definition.tasks or [])[:]
614+
updated_tasks.append(job_definition_id)
615+
self.update_workflow_definition(
616+
workflow_definition_id, UpdateWorkflowDefinition(tasks=updated_tasks)
617+
)
618+
return job_definition_id
619+
552620
def update_workflow(self, workflow_id: str, model: UpdateWorkflow):
553621
with self.db_session() as session:
554622
session.query(Workflow).filter(Workflow.workflow_id == workflow_id).update(
555623
model.dict(exclude_none=True)
556624
)
557625
session.commit()
558626

627+
def update_workflow_definition(
628+
self, workflow_definition_id: str, model: UpdateWorkflowDefinition
629+
):
630+
with self.db_session() as session:
631+
session.query(WorkflowDefinition).filter(
632+
WorkflowDefinition.workflow_definition_id == workflow_definition_id
633+
).update(model.dict(exclude_none=True))
634+
session.commit()
635+
559636
def update_job(self, job_id: str, model: UpdateJob):
560637
with self.db_session() as session:
561638
session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True))
@@ -657,7 +734,9 @@ def stop_job(self, job_id):
657734
session.commit()
658735
break
659736

660-
def create_job_definition(self, model: CreateJobDefinition) -> str:
737+
def create_job_definition(
738+
self, model: CreateJobDefinition, add_to_task_runner: bool = True
739+
) -> str:
661740
with self.db_session() as session:
662741
if not self.file_exists(model.input_uri):
663742
raise InputUriError(model.input_uri)
@@ -681,7 +760,7 @@ def create_job_definition(self, model: CreateJobDefinition) -> str:
681760
else:
682761
self.copy_input_file(model.input_uri, staging_paths["input"])
683762

684-
if self.task_runner and job_definition_schedule:
763+
if add_to_task_runner and self.task_runner and job_definition_schedule:
685764
self.task_runner.add_job_definition(job_definition_id)
686765

687766
return job_definition_id

0 commit comments

Comments
 (0)