@@ -28,18 +28,19 @@ import (
28
28
"fmt"
29
29
"reflect"
30
30
"strings"
31
+ "sync"
31
32
"testing"
32
33
"time"
33
34
35
+ "github.com/golang/mock/gomock"
34
36
"github.com/opentracing/opentracing-go"
35
37
"github.com/pborman/uuid"
36
- "github.com/stretchr/testify/suite"
37
-
38
- "github.com/golang/mock/gomock"
39
38
"github.com/stretchr/testify/require"
39
+ "github.com/stretchr/testify/suite"
40
40
"go.uber.org/cadence/.gen/go/cadence/workflowservicetest"
41
41
s "go.uber.org/cadence/.gen/go/shared"
42
42
"go.uber.org/cadence/internal/common"
43
+ "go.uber.org/goleak"
43
44
"go.uber.org/zap"
44
45
)
45
46
@@ -416,7 +417,7 @@ func (t *TaskHandlersTestSuite) TestWorkflowTask_ActivityTaskScheduled() {
416
417
t .NotNil (response .Decisions [0 ].CompleteWorkflowExecutionDecisionAttributes )
417
418
}
418
419
419
- func (t * TaskHandlersTestSuite ) TestWorkflowTask_QueryWorkflow_Sticky () {
420
+ func (t * TaskHandlersTestSuite ) TestWorkflowTask_QueryWorkflow () {
420
421
// Schedule an activity and see if we complete workflow.
421
422
taskList := "sticky-tl"
422
423
execution := & s.WorkflowExecution {
@@ -463,8 +464,12 @@ func (t *TaskHandlersTestSuite) TestWorkflowTask_QueryWorkflow_Sticky() {
463
464
t .verifyQueryResult (queryResp , "waiting-activity-result" )
464
465
}
465
466
466
- func (t * TaskHandlersTestSuite ) TestWorkflowTask_QueryWorkflow_NonSticky () {
467
+ func (t * TaskHandlersTestSuite ) TestWorkflowTask_QueryWorkflow_2 () {
467
468
// Schedule an activity and see if we complete workflow.
469
+
470
+ // This test appears to be just a finer-grained version of TestWorkflowTask_QueryWorkflow, though the older names
471
+ // for them implied entirely different purposes. Likely it can be combined with TestWorkflowTask_QueryWorkflow
472
+ // without losing anything useful.
468
473
taskList := "tl1"
469
474
testEvents := []* s.HistoryEvent {
470
475
createTestEventWorkflowExecutionStarted (1 , & s.WorkflowExecutionStartedEventAttributes {TaskList : & s.TaskList {Name : & taskList }}),
@@ -478,6 +483,8 @@ func (t *TaskHandlersTestSuite) TestWorkflowTask_QueryWorkflow_NonSticky() {
478
483
}),
479
484
createTestEventActivityTaskStarted (6 , & s.ActivityTaskStartedEventAttributes {}),
480
485
createTestEventActivityTaskCompleted (7 , & s.ActivityTaskCompletedEventAttributes {ScheduledEventId : common .Int64Ptr (5 )}),
486
+ // TODO: below seems irrational. there's a start without a schedule, and this workflow does not respond to signals.
487
+ // aside from this, the list of tasks is the same as TestWorkflowTask_QueryWorkflow
481
488
createTestEventDecisionTaskStarted (8 ),
482
489
createTestEventWorkflowExecutionSignaled (9 , "test-signal" ),
483
490
}
@@ -1449,6 +1456,104 @@ func (t *TaskHandlersTestSuite) TestActivityExecutionWorkerStop() {
1449
1456
t .NotNil (r )
1450
1457
}
1451
1458
1459
+ // a regrettably-hacky func to use goleak to count leaking goroutines.
1460
+ // ideally there will be a structured way to do this in the future, rather than string parsing
1461
+ func countLeaks (leaks error ) int {
1462
+ if leaks == nil {
1463
+ return 0
1464
+ }
1465
+ // leak messages look something like:
1466
+ // Goroutine 23 in state chan receive, with go.uber.org/cadence/internal.(*coroutineState).initialYield on top of the stack:
1467
+ // ... stacktrace ...
1468
+ //
1469
+ // Goroutine 28 ... on top of the stack:
1470
+ // ... stacktrace ...
1471
+ return strings .Count (leaks .Error (), "on top of the stack" )
1472
+ }
1473
+
1474
+ func (t * TaskHandlersTestSuite ) TestRegression_QueriesDoNotLeakGoroutines () {
1475
+ // this test must not be run in parallel with most other tests, as it mutates global vars
1476
+ var ridsToCleanUp []string
1477
+ originalLeaks := goleak .Find ()
1478
+ defer func (size int ) {
1479
+ // empty the cache to clear out any newly-introduced leaks
1480
+ current := getWorkflowCache ()
1481
+ for _ , rid := range ridsToCleanUp {
1482
+ current .Delete (rid )
1483
+ }
1484
+ // check the cleanup
1485
+ currentLeaks := goleak .Find ()
1486
+ if countLeaks (currentLeaks ) != countLeaks (originalLeaks ) {
1487
+ t .T ().Errorf ("failed to clean up goroutines.\n Original state:\n %v\n \n Current state:\n %v" , originalLeaks , currentLeaks )
1488
+ }
1489
+
1490
+ // reset everything to make it "normal".
1491
+ // this does NOT restore the original workflow cache - that cannot be done correctly, initCacheOnce is not safe to copy (thus restore).
1492
+ stickyCacheSize = size
1493
+ workflowCache = nil
1494
+ initCacheOnce = sync.Once {}
1495
+ }(stickyCacheSize )
1496
+ workflowCache = nil
1497
+ initCacheOnce = sync.Once {}
1498
+ // cache is intentionally not *disabled*, as that would go down no-cache code paths.
1499
+ // also, there is an LRU-cache bug where the size allows N to enter, but then removes until N-1 remain,
1500
+ // so a size of 2 actually means a size of 1.
1501
+ SetStickyWorkflowCacheSize (2 )
1502
+
1503
+ taskList := "tl1"
1504
+ params := workerExecutionParameters {
1505
+ TaskList : taskList ,
1506
+ Identity : "test-id-1" ,
1507
+ Logger : t .logger ,
1508
+ DisableStickyExecution : false ,
1509
+ }
1510
+ taskHandler := newWorkflowTaskHandler (testDomain , params , nil , t .registry )
1511
+
1512
+ // process a throw-away workflow to fill the cache. this is copied from TestWorkflowTask_QueryWorkflow since it's
1513
+ // relatively simple, but any should work fine, as long as it can be queried.
1514
+ testEvents := []* s.HistoryEvent {
1515
+ createTestEventWorkflowExecutionStarted (1 , & s.WorkflowExecutionStartedEventAttributes {TaskList : & s.TaskList {Name : & taskList }}),
1516
+ createTestEventDecisionTaskScheduled (2 , & s.DecisionTaskScheduledEventAttributes {TaskList : & s.TaskList {Name : & taskList }}),
1517
+ createTestEventDecisionTaskStarted (3 ),
1518
+ createTestEventDecisionTaskCompleted (4 , & s.DecisionTaskCompletedEventAttributes {ScheduledEventId : common .Int64Ptr (2 )}),
1519
+ createTestEventActivityTaskScheduled (5 , & s.ActivityTaskScheduledEventAttributes {
1520
+ ActivityId : common .StringPtr ("0" ),
1521
+ ActivityType : & s.ActivityType {Name : common .StringPtr ("Greeter_Activity" )},
1522
+ TaskList : & s.TaskList {Name : & taskList },
1523
+ }),
1524
+ }
1525
+ cachedTask := createWorkflowTask (testEvents [0 :1 ], 1 , "HelloWorld_Workflow" )
1526
+ cachedTask .WorkflowExecution .WorkflowId = common .StringPtr ("cache-filling workflow id" )
1527
+ ridsToCleanUp = append (ridsToCleanUp , * cachedTask .WorkflowExecution .RunId )
1528
+ _ , err := taskHandler .ProcessWorkflowTask (& workflowTask {task : cachedTask }, nil )
1529
+
1530
+ // sanity check that the cache was indeed filled, and that it has created a goroutine
1531
+ require .NoError (t .T (), err , "cache-filling must succeed" )
1532
+ require .Equal (t .T (), 1 , getWorkflowCache ().Size (), "workflow should be cached, but was not" )
1533
+ oneCachedLeak := goleak .Find ()
1534
+ require .Error (t .T (), oneCachedLeak , "expected at least one leaking goroutine" )
1535
+ require .Equal (t .T (), countLeaks (originalLeaks )+ 1 , countLeaks (oneCachedLeak ), // ideally == 1, but currently there are other leaks
1536
+ "expected the cached workflow to leak one goroutine. original leaks:\n %v\n \n leaks after one workflow:\n %v" , originalLeaks , oneCachedLeak )
1537
+
1538
+ // now query a different workflow ID / run ID
1539
+ uncachedTask := createQueryTask (testEvents , 5 , "HelloWorld_Workflow" , queryType )
1540
+ uncachedTask .WorkflowExecution .WorkflowId = common .StringPtr ("should not leak this workflow id" )
1541
+ ridsToCleanUp = append (ridsToCleanUp , * uncachedTask .WorkflowExecution .RunId ) // only necessary if the test fails
1542
+ result , err := taskHandler .ProcessWorkflowTask (& workflowTask {task : uncachedTask }, nil )
1543
+ require .NoError (t .T (), err )
1544
+ t .verifyQueryResult (result , "waiting-activity-result" ) // largely a sanity check
1545
+
1546
+ // and finally the purpose of this test:
1547
+ // verify that the cache has not been modified, and that there is no new leak
1548
+ t .Equal (1 , getWorkflowCache ().Size (), "workflow cache should be the same size" )
1549
+ t .True (getWorkflowCache ().Exist (cachedTask .WorkflowExecution .GetRunId ()), "originally-cached workflow should still be cached" )
1550
+ t .False (getWorkflowCache ().Exist (uncachedTask .WorkflowExecution .GetRunId ()), "queried workflow should not be cached" )
1551
+ newLeaks := goleak .Find ()
1552
+ t .Error (newLeaks , "expected at least one leaking goroutine" )
1553
+ t .Equal (countLeaks (oneCachedLeak ), countLeaks (newLeaks ),
1554
+ "expected the query to leak no new goroutines. before query:\n %v\n \n after query:\n %v" , oneCachedLeak , newLeaks )
1555
+ }
1556
+
1452
1557
func Test_NonDeterministicCheck (t * testing.T ) {
1453
1558
decisionTypes := s .DecisionType_Values ()
1454
1559
require .Equal (t , 13 , len (decisionTypes ), "If you see this error, you are adding new decision type. " +
0 commit comments