Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.

Commit 3c614c7

Browse files
author
Denes Bodo
committed
OOZIE-3717 When fork actions parallel submit, becasue ForkedActionStartXCommand and ActionStartXCommand has the same name, so ForkedActionStartXCommand would be lost, and cause deadlock (chenhd via dionusos)
1 parent 6039007 commit 3c614c7

File tree

4 files changed

+175
-7
lines changed

4 files changed

+175
-7
lines changed

core/src/main/java/org/apache/oozie/command/wf/ActionStartXCommand.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
import org.apache.oozie.service.JPAService;
5757
import org.apache.oozie.service.Services;
5858
import org.apache.oozie.service.UUIDService;
59-
import org.apache.oozie.util.DateUtils;
6059
import org.apache.oozie.util.ELEvaluationException;
6160
import org.apache.oozie.util.Instrumentation;
6261
import org.apache.oozie.util.JobUtils;
@@ -96,6 +95,19 @@ public ActionStartXCommand(WorkflowJobBean job, String actionId, String type) {
9695
this.jobId = wfJob.getId();
9796
}
9897

98+
public ActionStartXCommand(String actionId, String type, String name) {
99+
super(name, type, 0);
100+
this.actionId = actionId;
101+
this.jobId = Services.get().get(UUIDService.class).getId(actionId);
102+
}
103+
104+
public ActionStartXCommand(WorkflowJobBean job, String actionId, String type, String name) {
105+
super(name, type, 0);
106+
this.actionId = actionId;
107+
this.wfJob = job;
108+
this.jobId = wfJob.getId();
109+
}
110+
99111
@Override
100112
protected void setLogInfo() {
101113
LogUtils.setLogInfo(actionId);

core/src/main/java/org/apache/oozie/command/wf/ForkedActionStartXCommand.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,17 @@
2525
import org.apache.oozie.client.WorkflowAction;
2626
import org.apache.oozie.command.CommandException;
2727
import org.apache.oozie.command.XCommand;
28-
import org.apache.oozie.command.wf.ActionXCommand.ActionExecutorContext;
2928

3029
public class ForkedActionStartXCommand extends ActionStartXCommand {
3130

31+
private final static String FORKED_ACTION_START_NAME = "action.forkedstart";
32+
3233
public ForkedActionStartXCommand(String actionId, String type) {
33-
super(actionId, type);
34+
super(actionId, type, FORKED_ACTION_START_NAME);
3435
}
3536

3637
public ForkedActionStartXCommand(WorkflowJobBean wfJob, String id, String type) {
37-
super(wfJob, id, type);
38+
super(wfJob, id, type, FORKED_ACTION_START_NAME);
3839
}
3940

4041
protected ActionExecutorContext execute() throws CommandException {

core/src/test/java/org/apache/oozie/command/wf/TestSignalXCommand.java

Lines changed: 157 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,8 @@
4545
import org.apache.oozie.client.OozieClient;
4646
import org.apache.oozie.client.WorkflowAction;
4747
import org.apache.oozie.client.WorkflowJob;
48-
import org.apache.oozie.executor.jpa.WorkflowActionQueryExecutor;
4948
import org.apache.oozie.executor.jpa.WorkflowActionsGetForJobJPAExecutor;
5049
import org.apache.oozie.executor.jpa.WorkflowJobQueryExecutor;
51-
import org.apache.oozie.executor.jpa.WorkflowJobsGetForPurgeJPAExecutor;
5250
import org.apache.oozie.local.LocalOozie;
5351
import org.apache.oozie.service.CallableQueueService;
5452
import org.apache.oozie.service.ConfigurationService;
@@ -62,8 +60,15 @@
6260
import org.apache.oozie.util.IOUtils;
6361
import org.apache.oozie.util.XConfiguration;
6462
import org.apache.oozie.workflow.lite.LiteWorkflowAppParser;
63+
import java.lang.reflect.InvocationTargetException;
64+
import java.lang.reflect.Method;
65+
import java.util.concurrent.Future;
66+
import java.util.concurrent.TimeUnit;
67+
import org.apache.oozie.util.XLog;
68+
import org.apache.oozie.util.XCallable;
69+
import org.apache.oozie.service.RecoveryService;
70+
6571

66-
import javax.persistence.EntityManager;
6772

6873
public class TestSignalXCommand extends XDataTestCase {
6974

@@ -554,4 +559,153 @@ public boolean evaluate() throws Exception {
554559
}
555560
}
556561
}
562+
563+
/**
564+
* for test {@link #testDeadlockForForkParallelSubmit()}
565+
*/
566+
public static class TestRecoverForkStartActionCallableQueueService extends CallableQueueService{
567+
private final XLog log = XLog.getLog(getClass());
568+
public static TestSignalXCommand testSignalXCommand;
569+
570+
/**
571+
* Overwrite for test the same action's ActionStartXCommand in queue and the ForkedActionStartXCommand wouldn't lose,
572+
* if ActionStartXCommand and ForkedActionStartXCommand has the same name, ForkedActionStartXCommand couldn't enqueue
573+
* after a ActionStartXCommand in queue waiting for running.
574+
*/
575+
public class CallableWrapper<E> extends CallableQueueService.CallableWrapper<E> {
576+
577+
boolean forkedActionStartXCommandFirstEnter;
578+
public CallableWrapper(XCallable<E> callable, long delay) {
579+
super(callable,delay);
580+
forkedActionStartXCommandFirstEnter = callable instanceof ForkedActionStartXCommand;
581+
}
582+
583+
584+
public void run() {
585+
XCallable<?> callable = getElement();
586+
if (forkedActionStartXCommandFirstEnter && callable instanceof ForkedActionStartXCommand){
587+
// make sure there has a ActionStartXCommand in the queue wait to run,
588+
// and then ForkedActionStartXCommand enqueue
589+
testSignalXCommand.waitFor(15 * 1000, new Predicate() {
590+
@Override
591+
public boolean evaluate() throws Exception {
592+
return !filterDuplicates();
593+
}
594+
},200);
595+
596+
log.warn("max concurrency for callable [{0}] exceeded, enqueueing with [{1}]ms delay", callable
597+
.getType(), CONCURRENCY_DELAY);
598+
setDelay(CONCURRENCY_DELAY, TimeUnit.MILLISECONDS);
599+
600+
try {
601+
Method queue = CallableQueueService.class.getDeclaredMethod("queue",
602+
CallableQueueService.CallableWrapper.class, boolean.class);
603+
queue.setAccessible(true);
604+
queue.invoke(TestRecoverForkStartActionCallableQueueService.this,this,true);
605+
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
606+
throw new RuntimeException(e);
607+
}
608+
forkedActionStartXCommandFirstEnter = false;
609+
}else {
610+
super.run();
611+
}
612+
}
613+
}
614+
615+
/**
616+
* Replace CallableQueueService.CallableWrapper to TestRecoverForkStartActionCallableQueueService.CallableWrapper
617+
* for text, in order to call TestRecoverForkStartActionCallableQueueService.CallableWrapper for wait
618+
* ActionStartXCommand enqueue before.
619+
*
620+
*/
621+
public <T> Future<T> submit(CallableQueueService.CallableWrapper<T> task) throws InterruptedException {
622+
return super.submit(new TestRecoverForkStartActionCallableQueueService.CallableWrapper<T>(task.getElement(),
623+
task.getInitialDelay()));
624+
}
625+
}
626+
627+
628+
/**
629+
* Test : fork parallel submit, the action has the same XCommand in queue, there will skip enqueue by
630+
* {@link CallableQueueService.CallableWrapper#filterDuplicates()} so if the ActionStartXCommand and
631+
* ForkedActionStartXCommand has the same name, it would be lost.
632+
*
633+
* Note : RecoveryService will check the pending action and try to start it. So if the action's ForkedActionStartXCommand
634+
* wait for run, there may be a ActionStartXCommand add for the same action.
635+
*
636+
*/
637+
public void testDeadlockForForkParallelSubmit() throws Exception {
638+
setSystemProperty(Services.CONF_SERVICE_EXT_CLASSES, TestRecoverForkStartActionCallableQueueService.class.getName());
639+
TestRecoverForkStartActionCallableQueueService.testSignalXCommand = this;
640+
641+
services = new Services();
642+
Configuration servicesConf = services.getConf();
643+
servicesConf.setInt(RecoveryService.CONF_WF_ACTIONS_OLDER_THAN, 0);
644+
servicesConf.setInt(RecoveryService.CONF_SERVICE_INTERVAL, 10);
645+
services.init();
646+
647+
ConfigurationService.setBoolean(SignalXCommand.FORK_PARALLEL_JOBSUBMISSION, true);
648+
649+
Configuration conf = new XConfiguration();
650+
String workflowUri = getTestCaseFileUri("workflow.xml");
651+
//@formatter:off
652+
String appXml = "<workflow-app xmlns=\"uri:oozie:workflow:1.0\" name=\"wf-fork\">"
653+
+ "<start to=\"fork1\"/>"
654+
+ "<fork name=\"fork1\">"
655+
+ "<path start=\"action1\"/>"
656+
+ "<path start=\"action2\"/>"
657+
+ "<path start=\"action3\"/>"
658+
+ "<path start=\"action4\"/>"
659+
+ "<path start=\"action5\"/>"
660+
+ "</fork>"
661+
+ "<action name=\"action1\">"
662+
+ "<fs></fs>"
663+
+ "<ok to=\"join1\"/>"
664+
+ "<error to=\"kill\"/>"
665+
+ "</action>"
666+
+ "<action name=\"action2\">"
667+
+ "<fs></fs><ok to=\"join1\"/>"
668+
+ "<error to=\"kill\"/>"
669+
+ "</action>"
670+
+ "<action name=\"action3\">"
671+
+ "<fs></fs><ok to=\"join1\"/>"
672+
+ "<error to=\"kill\"/>"
673+
+ "</action>"
674+
+ "<action name=\"action4\">"
675+
+ "<fs></fs><ok to=\"join1\"/>"
676+
+ "<error to=\"kill\"/>"
677+
+ "</action>"
678+
+ "<action name=\"action5\">"
679+
+ "<fs></fs><ok to=\"join1\"/>"
680+
+ "<error to=\"kill\"/>"
681+
+ "</action>"
682+
+ "<join name=\"join1\" to=\"end\"/>"
683+
+ "<kill name=\"kill\"><message>killed</message>"
684+
+ "</kill><"
685+
+ "end name=\"end\"/>"
686+
+ "</workflow-app>";
687+
//@Formatter:on
688+
689+
writeToFile(appXml, workflowUri);
690+
conf.set(OozieClient.APP_PATH, workflowUri);
691+
conf.set(OozieClient.USER_NAME, getTestUser());
692+
693+
SubmitXCommand sc = new SubmitXCommand(conf);
694+
final String jobId = sc.call();
695+
new StartXCommand(jobId).call();
696+
697+
waitFor(30 * 1000, new Predicate() {
698+
@Override
699+
public boolean evaluate() throws Exception {
700+
return WorkflowJobQueryExecutor.getInstance()
701+
.get(WorkflowJobQueryExecutor.WorkflowJobQuery.GET_WORKFLOW, jobId)
702+
.getStatus() == WorkflowJob.Status.SUCCEEDED;
703+
}
704+
});
705+
706+
assertEquals(WorkflowJobQueryExecutor.getInstance()
707+
.get(WorkflowJobQueryExecutor.WorkflowJobQuery.GET_WORKFLOW, jobId)
708+
.getStatus(),
709+
WorkflowJob.Status.SUCCEEDED);
710+
}
557711
}

release-log.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-- Oozie 5.3.0 release (trunk - unreleased)
22

3+
OOZIE-3717 When fork actions parallel submit, becasue ForkedActionStartXCommand and ActionStartXCommand has the same name, so ForkedActionStartXCommand would be lost, and cause deadlock (chenhd via dionusos)
34
OOZIE-3715 Fix fork out more than one transitions submit , one transition submit fail can't execute KillXCommand (chenhd via dionusos)
45
OOZIE-3716 Invocation of Main class completed Message is skipped when LauncherSecurityManager calls system exit (khr9603 via dionusos)
56
OOZIE-3695 [sharelib-hive2] Fix current SpotBugs discovered issues in Oozie's sharelib-hive2 module (jmakai via dionusos)

0 commit comments

Comments
 (0)