Skip to content

Commit 12aacfe

Browse files
authored
Fix context propagation bug that would link two parents in some cases (#906)
What changed? ignore active span in ExecuteWorkflow and ExecuteActivity. Why? Because workflowThreads are coroutines, it's possible that two workflows are referencing each other as parents.
1 parent 93104af commit 12aacfe

File tree

2 files changed

+99
-4
lines changed

2 files changed

+99
-4
lines changed

src/main/java/com/uber/cadence/internal/tracing/TracingPropagator.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public TracingPropagator(Tracer tracer) {
5454
}
5555

5656
public Span spanByServiceMethod(String serviceMethod) {
57-
return tracer.buildSpan(serviceMethod).asChildOf(tracer.activeSpan()).start();
57+
return tracer.buildSpan(serviceMethod).start();
5858
}
5959

6060
public Span spanForExecuteWorkflow(DecisionContext context) {
@@ -64,6 +64,8 @@ public Span spanForExecuteWorkflow(DecisionContext context) {
6464

6565
return tracer
6666
.buildSpan(EXECUTE_WORKFLOW)
67+
.ignoreActiveSpan() // ignore active span to start a new trace that ONLY links the start
68+
// workflow context
6769
.addReference(
6870
References.FOLLOWS_FROM, parent != NoopSpan.INSTANCE.context() ? parent : null)
6971
.withTag(TAG_WORKFLOW_TYPE, context.getWorkflowType().getName())
@@ -76,6 +78,8 @@ public Span spanForExecuteActivity(PollForActivityTaskResponse task) {
7678
SpanContext parent = extract(task.getHeader());
7779
return tracer
7880
.buildSpan(EXECUTE_ACTIVITY)
81+
.ignoreActiveSpan() // ignore active span to start a new trace that ONLY links the execute
82+
// workflow context
7983
.addReference(
8084
References.FOLLOWS_FROM, parent != NoopSpan.INSTANCE.context() ? parent : null)
8185
.withTag(
@@ -100,6 +104,7 @@ public Span spanForExecuteLocalActivity(Task task) {
100104
Span span =
101105
tracer
102106
.buildSpan(EXECUTE_LOCAL_ACTIVITY)
107+
.ignoreActiveSpan()
103108
.addReference(References.FOLLOWS_FROM, parent)
104109
.withTag(TAG_WORKFLOW_ID, params.getWorkflowExecution().getWorkflowId())
105110
.withTag(TAG_WORKFLOW_RUN_ID, params.getWorkflowExecution().getRunId())

src/test/java/com/uber/cadence/internal/tracing/StartWorkflowTest.java

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import com.uber.cadence.DomainAlreadyExistsError;
2323
import com.uber.cadence.RegisterDomainRequest;
2424
import com.uber.cadence.activity.ActivityMethod;
25+
import com.uber.cadence.activity.ActivityOptions;
2526
import com.uber.cadence.client.*;
27+
import com.uber.cadence.common.RetryOptions;
2628
import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter;
2729
import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs;
2830
import com.uber.cadence.serviceclient.ClientOptions;
@@ -41,8 +43,8 @@
4143
import io.opentracing.mock.MockSpan;
4244
import io.opentracing.mock.MockTracer;
4345
import java.time.Duration;
44-
import java.util.List;
45-
import java.util.Objects;
46+
import java.util.*;
47+
import java.util.concurrent.CompletableFuture;
4648
import java.util.stream.Collectors;
4749
import org.junit.Assume;
4850
import org.junit.Test;
@@ -108,7 +110,16 @@ public Integer Double(Integer i) {
108110
}
109111

110112
public static class TestWorkflowImpl implements TestWorkflow {
111-
private final TestActivity activities = Workflow.newActivityStub(TestActivity.class);
113+
private final TestActivity activities =
114+
Workflow.newActivityStub(
115+
TestActivity.class,
116+
new ActivityOptions.Builder()
117+
.setRetryOptions(
118+
new RetryOptions.Builder()
119+
.setInitialInterval(Duration.ofSeconds(10))
120+
.setMaximumAttempts(2)
121+
.build())
122+
.build());
112123

113124
@Override
114125
public Integer AddOneThenDouble(Integer n) {
@@ -153,6 +164,85 @@ public void testStartWorkflowGRPC() {
153164
testStartWorkflowHelper(service, mockTracer, true);
154165
}
155166

167+
@Test
168+
public void testStartMultipleWorkflowGRPC() {
169+
Assume.assumeTrue(useDockerService);
170+
MockTracer mockTracer = new MockTracer();
171+
IWorkflowService service =
172+
new Thrift2ProtoAdapter(
173+
IGrpcServiceStubs.newInstance(
174+
ClientOptions.newBuilder().setTracer(mockTracer).setPort(7833).build()));
175+
try {
176+
service.RegisterDomain(new RegisterDomainRequest().setName(DOMAIN));
177+
} catch (DomainAlreadyExistsError e) {
178+
logger.info("domain already registered");
179+
} catch (Exception e) {
180+
fail("fail to register domain: " + e);
181+
}
182+
183+
WorkflowClient client =
184+
WorkflowClient.newInstance(
185+
service, WorkflowClientOptions.newBuilder().setDomain(DOMAIN).build());
186+
187+
WorkerFactory workerFactory =
188+
WorkerFactory.newInstance(
189+
client, WorkerFactoryOptions.newBuilder().setMaxWorkflowThreadCount(2).build());
190+
Worker worker;
191+
worker =
192+
workerFactory.newWorker(
193+
TASK_LIST, WorkerOptions.newBuilder().setMaxConcurrentWorkflowExecutionSize(2).build());
194+
worker.registerActivitiesImplementations(new TestActivityImpl(mockTracer, true));
195+
worker.registerWorkflowImplementationTypes(TestWorkflowImpl.class, DoubleWorkflowImpl.class);
196+
workerFactory.start();
197+
198+
List<CompletableFuture<Void>> futures = new ArrayList<>();
199+
200+
for (int i = 0; i < 100; i++) {
201+
int finalI = i;
202+
futures.add(
203+
CompletableFuture.runAsync(
204+
() -> {
205+
Span rootSpan = mockTracer.buildSpan("workflow=" + finalI).start();
206+
rootSpan.setBaggageItem(CONTEXT_KEY, CONTEXT_VALUE);
207+
mockTracer.activateSpan(rootSpan);
208+
client.newWorkflowStub(TestWorkflow.class).AddOneThenDouble(finalI);
209+
rootSpan.finish();
210+
}));
211+
}
212+
try {
213+
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get();
214+
} catch (Exception e) {
215+
fail("workflow failure: " + e);
216+
} finally {
217+
// test debug log
218+
StringBuilder sb = new StringBuilder();
219+
220+
List<MockSpan> spans = mockTracer.finishedSpans();
221+
spans.forEach(
222+
span -> {
223+
sb.append(span.toString()).append("\n");
224+
});
225+
logger.info("spans: " + sb);
226+
workerFactory.shutdown();
227+
228+
// assert activity span should have only 1 parent
229+
List<MockSpan> filtered =
230+
spans
231+
.stream()
232+
.filter(
233+
s ->
234+
s.operationName().contains("ExecuteActivity")
235+
|| s.operationName().contains("ExecuteLocalActivity")
236+
|| s.operationName().contains("ExecuteWorkflow"))
237+
.collect(Collectors.toList());
238+
assertFalse(filtered.isEmpty());
239+
filtered.forEach(
240+
s -> {
241+
assertEquals(1, s.references().size());
242+
});
243+
}
244+
}
245+
156246
@Test
157247
public void testSignalWithStartWorkflowTchannel() {
158248
Assume.assumeTrue(useDockerService);

0 commit comments

Comments
 (0)