Skip to content

Commit 717ee05

Browse files
Release WF slot on UnableToAcquireLockException (#1871)
1 parent aa5cb41 commit 717ee05

File tree

2 files changed

+29
-28
lines changed

2 files changed

+29
-28
lines changed

temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowWorker.java

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -281,37 +281,38 @@ public void handle(WorkflowTask task) throws Exception {
281281
MDC.put(LoggerTag.RUN_ID, runId);
282282

283283
boolean locked = false;
284-
if (!Strings.isNullOrEmpty(stickyTaskQueueName)) {
285-
// Serialize workflow task processing for a particular workflow run.
286-
// This is used to make sure that query tasks and real workflow tasks
287-
// are serialized when sticky is on.
288-
//
289-
// Acquiring a lock with a timeout to avoid having lots of workflow tasks for the same run
290-
// id waiting for a lock and consuming threads in case if lock is unavailable.
291-
//
292-
// Throws interrupted exception which is propagated. It's a correct way to handle it here.
293-
//
294-
// TODO 1: 5 seconds is chosen as a half of normal workflow task timeout.
295-
// This value should be dynamically configured.
296-
// TODO 2: Does "consider increasing workflow task timeout" advice in this exception makes
297-
// any sense?
298-
// This MAYBE makes sense only if a previous workflow task timed out, it's still in
299-
// progress on the worker and the next workflow task got picked up by the same exact
300-
// worker from the general non-sticky task queue.
301-
// Even in this case, this advice looks misleading, something else is going on
302-
// (like an extreme network latency).
303-
locked = runLocks.tryLock(runId, 5, TimeUnit.SECONDS);
304-
305-
if (!locked) {
306-
throw new UnableToAcquireLockException(
307-
"Workflow lock for the run id hasn't been released by one of previous execution attempts, "
308-
+ "consider increasing workflow task timeout.");
309-
}
310-
}
311284

312285
Stopwatch swTotal =
313286
workflowTypeScope.timer(MetricsType.WORKFLOW_TASK_EXECUTION_TOTAL_LATENCY).start();
314287
try {
288+
if (!Strings.isNullOrEmpty(stickyTaskQueueName)) {
289+
// Serialize workflow task processing for a particular workflow run.
290+
// This is used to make sure that query tasks and real workflow tasks
291+
// are serialized when sticky is on.
292+
//
293+
// Acquiring a lock with a timeout to avoid having lots of workflow tasks for the same run
294+
// id waiting for a lock and consuming threads in case if lock is unavailable.
295+
//
296+
// Throws interrupted exception which is propagated. It's a correct way to handle it here.
297+
//
298+
// TODO 1: 5 seconds is chosen as a half of normal workflow task timeout.
299+
// This value should be dynamically configured.
300+
// TODO 2: Does "consider increasing workflow task timeout" advice in this exception makes
301+
// any sense?
302+
// This MAYBE makes sense only if a previous workflow task timed out, it's still in
303+
// progress on the worker and the next workflow task got picked up by the same exact
304+
// worker from the general non-sticky task queue.
305+
// Even in this case, this advice looks misleading, something else is going on
306+
// (like an extreme network latency).
307+
locked = runLocks.tryLock(runId, 5, TimeUnit.SECONDS);
308+
309+
if (!locked) {
310+
throw new UnableToAcquireLockException(
311+
"Workflow lock for the run id hasn't been released by one of previous execution attempts, "
312+
+ "consider increasing workflow task timeout.");
313+
}
314+
}
315+
315316
Optional<PollWorkflowTaskQueueResponse> nextWFTResponse = Optional.of(workflowTaskResponse);
316317
do {
317318
PollWorkflowTaskQueueResponse currentTask = nextWFTResponse.get();

temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ public void concurrentPollRequestLockTest() throws Exception {
189189
ImmutableMap.of("worker_type", "WorkflowWorker"),
190190
100.0);
191191
// Cleanup
192-
worker.shutdown(new ShutdownManager(), true).get();
192+
worker.shutdown(new ShutdownManager(), false).get();
193193
// Verify we only handled two tasks
194194
verify(taskHandler, times(2)).handleWorkflowTask(any());
195195
}

0 commit comments

Comments
 (0)