Skip to content

Commit 758ca22

Browse files
authored
R1 475 deeploy api fixes (#290)
* fix: update the update pipeline logic to remove and re-create the pipeline * fix: add comments * chore: inc version
1 parent 45fd62f commit 758ca22

File tree

3 files changed

+152
-62
lines changed

3 files changed

+152
-62
lines changed

extensions/business/deeploy/deeploy_manager_api.py

Lines changed: 77 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -208,58 +208,90 @@ def _process_pipeline_request(
208208

209209
# Get nodes based on operation type
210210
discovered_plugin_instances = []
211+
deployment_nodes = []
212+
confirmation_nodes = []
213+
deeploy_specs_for_update = None
211214
if is_create:
212-
nodes = self._check_nodes_availability(inputs)
215+
deployment_nodes = self._check_nodes_availability(inputs)
216+
confirmation_nodes = list(deployment_nodes)
213217
else:
214-
discovered_plugin_instances = self._discover_plugin_instances(app_id=app_id, job_id=job_id, owner=sender)
215-
218+
# Discover the live deployment so we can validate node affinity and reuse existing specs.
219+
pipeline_context = self._gather_running_pipeline_context(
220+
owner=sender,
221+
app_id=app_id,
222+
job_id=job_id,
223+
)
224+
discovered_plugin_instances = pipeline_context["discovered_instances"]
225+
current_nodes = pipeline_context["nodes"]
226+
deeploy_specs_for_update = pipeline_context["deeploy_specs"]
216227
self.P(f"Discovered plugin instances: {self.json_dumps(discovered_plugin_instances)}")
217-
deeploy_specs_for_update = None
218-
if job_app_type in (JOB_APP_TYPES.NATIVE, JOB_APP_TYPES.GENERIC, JOB_APP_TYPES.SERVICE):
219-
discovered_plugin_instances = self._ensure_plugin_instance_ids(
220-
inputs=inputs,
221-
discovered_plugin_instances=discovered_plugin_instances,
222-
owner=sender,
223-
app_id=app_id,
224-
job_id=job_id,
225-
)
226-
deeploy_specs_for_update = self._prepare_updated_deeploy_specs(
227-
owner=sender,
228-
app_id=app_id,
229-
job_id=job_id,
230-
discovered_plugin_instances=discovered_plugin_instances,
228+
229+
requested_nodes = inputs.get(DEEPLOY_KEYS.TARGET_NODES, None) or []
230+
normalized_requested_nodes = [
231+
self._check_and_maybe_convert_address(node) for node in requested_nodes
232+
] if requested_nodes else []
233+
234+
if normalized_requested_nodes:
235+
# Reject updates that request a different node set than the one currently running.
236+
if set(normalized_requested_nodes) != set(current_nodes):
237+
msg = (
238+
f"{DEEPLOY_ERRORS.NODES2}: Update request must target existing nodes {current_nodes}. "
239+
f"Received {normalized_requested_nodes}."
240+
)
241+
raise ValueError(msg)
242+
243+
requested_nodes_count = inputs.get(DEEPLOY_KEYS.TARGET_NODES_COUNT, 0)
244+
if requested_nodes_count and requested_nodes_count != len(current_nodes):
245+
msg = (
246+
f"{DEEPLOY_ERRORS.NODES2}: Update request must keep the original number of nodes "
247+
f"({len(current_nodes)}). Received {requested_nodes_count}."
231248
)
232-
nodes = [instance[DEEPLOY_PLUGIN_DATA.NODE] for instance in discovered_plugin_instances]
249+
raise ValueError(msg)
233250

234-
if is_create:
235-
dct_status, str_status = self.check_and_deploy_pipelines(
236-
sender=sender,
237-
inputs=inputs,
238-
app_id=app_id,
239-
app_alias=app_alias,
240-
app_type=app_type,
241-
new_nodes=nodes,
242-
update_nodes=[],
243-
discovered_plugin_instances=discovered_plugin_instances,
244-
job_app_type=job_app_type,
245-
)
246-
else:
247-
dct_status, str_status = self.check_and_deploy_pipelines(
248-
sender=sender,
249-
inputs=inputs,
251+
inputs[DEEPLOY_KEYS.TARGET_NODES] = current_nodes
252+
inputs.target_nodes = current_nodes
253+
inputs[DEEPLOY_KEYS.TARGET_NODES_COUNT] = len(current_nodes)
254+
inputs.target_nodes_count = len(current_nodes)
255+
256+
# TODO: Assess whether removing the running pipeline before redeploying is safe when the new launch fails.
257+
self.delete_pipeline_from_nodes(
250258
app_id=app_id,
251-
app_alias=app_alias,
252-
app_type=app_type,
253-
new_nodes=[],
254-
update_nodes=nodes,
255-
discovered_plugin_instances=discovered_plugin_instances,
256-
dct_deeploy_specs=deeploy_specs_for_update,
257-
job_app_type=job_app_type,
259+
job_id=job_id,
260+
owner=sender,
261+
discovered_instances=discovered_plugin_instances,
258262
)
263+
264+
deployment_nodes = self._check_nodes_availability(inputs)
265+
if set(deployment_nodes) != set(current_nodes):
266+
msg = (
267+
f"{DEEPLOY_ERRORS.NODES2}: Failed to validate that update runs on existing nodes. "
268+
f"Expected {current_nodes}, validated {deployment_nodes}."
269+
)
270+
raise ValueError(msg)
271+
confirmation_nodes = list(deployment_nodes)
272+
discovered_plugin_instances = []
273+
274+
inputs[DEEPLOY_KEYS.TARGET_NODES] = deployment_nodes
275+
inputs.target_nodes = deployment_nodes
276+
inputs[DEEPLOY_KEYS.TARGET_NODES_COUNT] = len(deployment_nodes)
277+
inputs.target_nodes_count = len(deployment_nodes)
278+
279+
dct_status, str_status = self.check_and_deploy_pipelines(
280+
sender=sender,
281+
inputs=inputs,
282+
app_id=app_id,
283+
app_alias=app_alias,
284+
app_type=app_type,
285+
new_nodes=deployment_nodes,
286+
update_nodes=[],
287+
discovered_plugin_instances=discovered_plugin_instances,
288+
dct_deeploy_specs_create=deeploy_specs_for_update,
289+
job_app_type=job_app_type,
290+
)
259291

260-
if str_status in [DEEPLOY_STATUS.SUCCESS, DEEPLOY_STATUS.COMMAND_DELIVERED]:
261-
if (dct_status is not None and is_confirmable_job and len(nodes) == len(dct_status)) or not is_confirmable_job:
262-
eth_nodes = [self.bc.node_addr_to_eth_addr(node) for node in nodes]
292+
if is_create and str_status in [DEEPLOY_STATUS.SUCCESS, DEEPLOY_STATUS.COMMAND_DELIVERED]:
293+
if (dct_status is not None and is_confirmable_job and len(confirmation_nodes) == len(dct_status)) or not is_confirmable_job:
294+
eth_nodes = [self.bc.node_addr_to_eth_addr(node) for node in confirmation_nodes]
263295
eth_nodes = sorted(eth_nodes)
264296
self.bc.submit_node_update(
265297
job_id=job_id,
@@ -483,6 +515,7 @@ def update_pipeline(
483515
484516
Notes
485517
-----
518+
- Existing pipelines are stopped and redeployed in place; requests must reference the active node set.
486519
- Updates are applied to existing plugin instances on the same nodes
487520
- For multi-plugin pipelines, all plugins are updated with new configurations
488521
- Resource validation applies the same as create operations

extensions/business/deeploy/deeploy_mixin.py

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def __check_is_oracle(self, inputs):
110110
raise ValueError("Sender {} is not an oracle".format(sender))
111111
return True
112112

113-
def __create_pipeline_on_nodes(self, nodes, inputs, app_id, app_alias, app_type, sender, job_app_type=None):
113+
def __create_pipeline_on_nodes(self, nodes, inputs, app_id, app_alias, app_type, sender, job_app_type=None, dct_deeploy_specs=None):
114114
"""
115115
Create new pipelines on each node and set CSTORE `response_key` for the "callback" action
116116
"""
@@ -124,18 +124,34 @@ def __create_pipeline_on_nodes(self, nodes, inputs, app_id, app_alias, app_type,
124124
response_keys = self.defaultdict(list)
125125

126126
ts = self.time()
127-
dct_deeploy_specs = {
128-
DEEPLOY_KEYS.JOB_ID: job_id,
129-
DEEPLOY_KEYS.PROJECT_ID: project_id,
130-
DEEPLOY_KEYS.PROJECT_NAME: project_name,
131-
DEEPLOY_KEYS.NR_TARGET_NODES: len(nodes),
132-
DEEPLOY_KEYS.CURRENT_TARGET_NODES: nodes,
133-
DEEPLOY_KEYS.JOB_TAGS: job_tags,
134-
DEEPLOY_KEYS.DATE_CREATED: ts,
135-
DEEPLOY_KEYS.DATE_UPDATED: ts,
136-
DEEPLOY_KEYS.SPARE_NODES: spare_nodes,
137-
DEEPLOY_KEYS.ALLOW_REPLICATION_IN_THE_WILD: allow_replication_in_the_wild,
138-
}
127+
if dct_deeploy_specs:
128+
dct_deeploy_specs = self.deepcopy(dct_deeploy_specs)
129+
dct_deeploy_specs[DEEPLOY_KEYS.DATE_UPDATED] = ts
130+
if DEEPLOY_KEYS.DATE_CREATED not in dct_deeploy_specs:
131+
dct_deeploy_specs[DEEPLOY_KEYS.DATE_CREATED] = ts
132+
else:
133+
dct_deeploy_specs = {
134+
DEEPLOY_KEYS.NR_TARGET_NODES: len(nodes),
135+
DEEPLOY_KEYS.CURRENT_TARGET_NODES: nodes,
136+
DEEPLOY_KEYS.JOB_TAGS: job_tags,
137+
DEEPLOY_KEYS.DATE_CREATED: ts,
138+
DEEPLOY_KEYS.DATE_UPDATED: ts,
139+
DEEPLOY_KEYS.SPARE_NODES: spare_nodes,
140+
DEEPLOY_KEYS.ALLOW_REPLICATION_IN_THE_WILD: allow_replication_in_the_wild,
141+
}
142+
143+
if job_id is not None or DEEPLOY_KEYS.JOB_ID not in dct_deeploy_specs:
144+
dct_deeploy_specs[DEEPLOY_KEYS.JOB_ID] = job_id
145+
if project_id is not None or DEEPLOY_KEYS.PROJECT_ID not in dct_deeploy_specs:
146+
dct_deeploy_specs[DEEPLOY_KEYS.PROJECT_ID] = project_id
147+
if project_name is not None or DEEPLOY_KEYS.PROJECT_NAME not in dct_deeploy_specs:
148+
dct_deeploy_specs[DEEPLOY_KEYS.PROJECT_NAME] = project_name
149+
dct_deeploy_specs[DEEPLOY_KEYS.NR_TARGET_NODES] = len(nodes)
150+
dct_deeploy_specs[DEEPLOY_KEYS.CURRENT_TARGET_NODES] = nodes
151+
dct_deeploy_specs[DEEPLOY_KEYS.JOB_TAGS] = job_tags
152+
dct_deeploy_specs[DEEPLOY_KEYS.SPARE_NODES] = spare_nodes
153+
dct_deeploy_specs[DEEPLOY_KEYS.ALLOW_REPLICATION_IN_THE_WILD] = allow_replication_in_the_wild
154+
139155
detected_job_app_type = job_app_type or self.deeploy_detect_job_app_type(plugins)
140156
if detected_job_app_type in JOB_APP_TYPES_ALL:
141157
dct_deeploy_specs[DEEPLOY_KEYS.JOB_APP_TYPE] = detected_job_app_type
@@ -419,6 +435,46 @@ def _prepare_updated_deeploy_specs(self, owner, app_id, job_id, discovered_plugi
419435
refreshed_specs[DEEPLOY_KEYS.DATE_UPDATED] = self.time()
420436
return refreshed_specs
421437

438+
def _gather_running_pipeline_context(self, owner, app_id=None, job_id=None):
439+
"""
440+
Collect information about currently running pipeline instances for a job/app.
441+
442+
Ensures follow-up operations keep parity with the active deployment state.
443+
444+
Returns
445+
-------
446+
dict
447+
{
448+
'discovered_instances': list,
449+
'nodes': list[str],
450+
'deeploy_specs': dict | None,
451+
}
452+
"""
453+
discovered_instances = self._discover_plugin_instances(app_id=app_id, job_id=job_id, owner=owner)
454+
nodes = []
455+
for instance in discovered_instances:
456+
node_addr = instance.get(DEEPLOY_PLUGIN_DATA.NODE)
457+
if node_addr and node_addr not in nodes:
458+
nodes.append(node_addr)
459+
460+
if not nodes:
461+
msg = f"{DEEPLOY_ERRORS.NODES3}: No running workers found for provided "
462+
msg += f"{f'app_id {app_id}' if app_id else f'job_id {job_id}'} and owner '{owner}'."
463+
raise ValueError(msg)
464+
465+
deeploy_specs = self._prepare_updated_deeploy_specs(
466+
owner=owner,
467+
app_id=app_id,
468+
job_id=job_id,
469+
discovered_plugin_instances=discovered_instances,
470+
)
471+
472+
return {
473+
"discovered_instances": discovered_instances,
474+
"nodes": nodes,
475+
"deeploy_specs": deeploy_specs,
476+
}
477+
422478
def __prepare_plugins_for_update(self, inputs, discovered_plugin_instances):
423479
"""
424480
Prepare plugins for update using discovered instances instead of creating new ones
@@ -1333,7 +1389,7 @@ def deeploy_prepare_plugins(self, inputs):
13331389
plugins = [plugin]
13341390
return plugins
13351391

1336-
def check_and_deploy_pipelines(self, sender, inputs, app_id, app_alias, app_type, update_nodes, new_nodes, discovered_plugin_instances=[], dct_deeploy_specs=None, job_app_type=None):
1392+
def check_and_deploy_pipelines(self, sender, inputs, app_id, app_alias, app_type, update_nodes, new_nodes, discovered_plugin_instances=[], dct_deeploy_specs=None, job_app_type=None, dct_deeploy_specs_create=None):
13371393
"""
13381394
Validate the inputs and deploy the pipeline on the target nodes.
13391395
"""
@@ -1349,7 +1405,7 @@ def check_and_deploy_pipelines(self, sender, inputs, app_id, app_alias, app_type
13491405
update_response_keys = self.__update_pipeline_on_nodes(update_nodes, inputs, app_id, app_alias, app_type, sender, discovered_plugin_instances, dct_deeploy_specs, job_app_type=job_app_type)
13501406
response_keys.update(update_response_keys)
13511407
if len(new_nodes) > 0:
1352-
new_response_keys = self.__create_pipeline_on_nodes(new_nodes, inputs, app_id, app_alias, app_type, sender, job_app_type=job_app_type)
1408+
new_response_keys = self.__create_pipeline_on_nodes(new_nodes, inputs, app_id, app_alias, app_type, sender, job_app_type=job_app_type, dct_deeploy_specs=dct_deeploy_specs_create)
13531409
response_keys.update(new_response_keys)
13541410

13551411
# Phase 3: Wait until all the responses are received via CSTORE and compose status response
@@ -1873,8 +1929,9 @@ def check_running_pipelines_and_add_to_r1fs(self):
18731929

18741930
return netmon_job_ids
18751931

1876-
def delete_pipeline_from_nodes(self, app_id=None, job_id=None, owner=None, allow_missing=False):
1877-
discovered_instances = self._discover_plugin_instances(app_id=app_id, job_id=job_id, owner=owner)
1932+
def delete_pipeline_from_nodes(self, app_id=None, job_id=None, owner=None, allow_missing=False, discovered_instances=None):
1933+
if discovered_instances is None:
1934+
discovered_instances = self._discover_plugin_instances(app_id=app_id, job_id=job_id, owner=owner)
18781935

18791936
if len(discovered_instances) == 0:
18801937
if allow_missing:

ver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__VER__ = '2.9.813'
1+
__VER__ = '2.9.814'

0 commit comments

Comments
 (0)