1
1
package ca .uhn .fhir .jpa .batch2 ;
2
2
3
- import static org .junit .jupiter .api .Assertions .assertEquals ;
4
- import static org .junit .jupiter .api .Assertions .assertFalse ;
5
- import static org .junit .jupiter .api .Assertions .assertTrue ;
6
- import static org .junit .jupiter .api .Assertions .assertNotNull ;
7
3
import ca .uhn .fhir .batch2 .api .ChunkExecutionDetails ;
8
4
import ca .uhn .fhir .batch2 .api .IJobCompletionHandler ;
9
5
import ca .uhn .fhir .batch2 .api .IJobCoordinator ;
25
21
import ca .uhn .fhir .batch2 .model .JobInstanceStartRequest ;
26
22
import ca .uhn .fhir .batch2 .model .JobWorkNotificationJsonMessage ;
27
23
import ca .uhn .fhir .batch2 .model .StatusEnum ;
24
+ import ca .uhn .fhir .batch2 .model .WorkChunk ;
25
+ import ca .uhn .fhir .batch2 .model .WorkChunkStatusEnum ;
28
26
import ca .uhn .fhir .batch2 .models .JobInstanceFetchRequest ;
29
27
import ca .uhn .fhir .jpa .batch .models .Batch2JobStartResponse ;
30
28
import ca .uhn .fhir .jpa .subscription .channel .api .ChannelConsumerSettings ;
31
29
import ca .uhn .fhir .jpa .subscription .channel .api .IChannelFactory ;
32
30
import ca .uhn .fhir .jpa .subscription .channel .impl .LinkedBlockingChannel ;
31
+ import ca .uhn .fhir .jpa .subscription .channel .impl .RetryPolicyProvider ;
33
32
import ca .uhn .fhir .jpa .test .BaseJpaR4Test ;
34
33
import ca .uhn .fhir .jpa .test .Batch2JobHelper ;
35
34
import ca .uhn .fhir .jpa .test .config .Batch2FastSchedulerConfig ;
36
35
import ca .uhn .fhir .jpa .test .config .TestR4Config ;
36
+ import ca .uhn .fhir .jpa .util .RandomTextUtils ;
37
37
import ca .uhn .fhir .model .api .IModelJson ;
38
38
import ca .uhn .fhir .rest .api .server .SystemRequestDetails ;
39
39
import ca .uhn .fhir .test .utilities .UnregisterScheduledProcessor ;
51
51
import org .slf4j .Logger ;
52
52
import org .slf4j .LoggerFactory ;
53
53
import org .springframework .beans .factory .annotation .Autowired ;
54
+ import org .springframework .context .annotation .Bean ;
55
+ import org .springframework .context .annotation .Configuration ;
56
+ import org .springframework .context .annotation .Primary ;
54
57
import org .springframework .data .domain .Page ;
55
58
import org .springframework .data .domain .Sort ;
56
59
import org .springframework .messaging .MessageHandler ;
60
+ import org .springframework .retry .RetryPolicy ;
61
+ import org .springframework .retry .backoff .BackOffPolicy ;
62
+ import org .springframework .retry .backoff .NoBackOffPolicy ;
63
+ import org .springframework .retry .policy .MaxAttemptsRetryPolicy ;
57
64
import org .springframework .test .context .ContextConfiguration ;
58
65
import org .springframework .test .context .TestPropertySource ;
59
66
import org .testcontainers .shaded .org .awaitility .Awaitility ;
73
80
74
81
import static ca .uhn .fhir .batch2 .config .BaseBatch2Config .CHANNEL_NAME ;
75
82
import static ca .uhn .fhir .batch2 .coordinator .WorkChunkProcessor .MAX_CHUNK_ERROR_COUNT ;
83
+ import static ca .uhn .fhir .jpa .entity .Batch2WorkChunkEntity .ERROR_MSG_MAX_LENGTH ;
76
84
import static org .assertj .core .api .Assertions .assertThat ;
77
- import static org .junit .jupiter .api .Assertions .fail ;
85
+ import static org .awaitility .Awaitility .await ;
86
+ import static org .junit .jupiter .api .Assertions .assertEquals ;
87
+ import static org .junit .jupiter .api .Assertions .assertFalse ;
88
+ import static org .junit .jupiter .api .Assertions .assertNotNull ;
89
+ import static org .junit .jupiter .api .Assertions .assertTrue ;
78
90
import static org .junit .jupiter .api .Assertions .fail ;
79
91
80
92
81
93
@ ContextConfiguration (classes = {
82
- Batch2FastSchedulerConfig .class
94
+ Batch2FastSchedulerConfig .class ,
95
+ Batch2CoordinatorIT .RPConfig .class
83
96
})
84
97
@ TestPropertySource (properties = {
85
98
// These tests require scheduling to work
86
99
UnregisterScheduledProcessor .SCHEDULING_DISABLED_EQUALS_FALSE
87
100
})
88
101
public class Batch2CoordinatorIT extends BaseJpaR4Test {
102
+
103
+ /***
104
+ * Our internal configuration of Retry Mechanism is
105
+ * with exponential backoff, and infinite retries.
106
+ *
107
+ * This isn't ideal for tests; so we will override
108
+ * the retry mechanism for tests that require it to
109
+ * make them run faster and more 'predictably'
110
+ */
111
+ public static class RetryProviderOverride extends RetryPolicyProvider {
112
+
113
+ private RetryPolicy myRetryPolicy ;
114
+
115
+ private BackOffPolicy myBackOffPolicy ;
116
+
117
+ public void setPolicies (RetryPolicy theRetryPolicy , BackOffPolicy theBackOffPolicy ) {
118
+ myRetryPolicy = theRetryPolicy ;
119
+ myBackOffPolicy = theBackOffPolicy ;
120
+ }
121
+
122
+ @ Override
123
+ protected RetryPolicy retryPolicy () {
124
+ if (myRetryPolicy != null ) {
125
+ return myRetryPolicy ;
126
+ }
127
+ return super .retryPolicy ();
128
+ }
129
+
130
+ @ Override
131
+ protected BackOffPolicy backOffPolicy () {
132
+ if (myBackOffPolicy != null ) {
133
+ return myBackOffPolicy ;
134
+ }
135
+ return super .backOffPolicy ();
136
+ }
137
+ }
138
+
139
+ @ Configuration
140
+ public static class RPConfig {
141
+ @ Primary
142
+ @ Bean
143
+ public RetryPolicyProvider retryPolicyProvider () {
144
+ return new RetryProviderOverride ();
145
+ }
146
+ }
147
+
89
148
private static final Logger ourLog = LoggerFactory .getLogger (Batch2CoordinatorIT .class );
90
149
91
150
public static final int TEST_JOB_VERSION = 1 ;
@@ -106,6 +165,9 @@ public class Batch2CoordinatorIT extends BaseJpaR4Test {
106
165
@ Autowired
107
166
IJobPersistence myJobPersistence ;
108
167
168
+ @ Autowired
169
+ private RetryPolicyProvider myRetryPolicyProvider ;
170
+
109
171
@ RegisterExtension
110
172
LogbackTestExtension myLogbackTestExtension = new LogbackTestExtension ();
111
173
@@ -132,6 +194,11 @@ public void before() throws Exception {
132
194
};
133
195
myWorkChannel = (LinkedBlockingChannel ) myChannelFactory .getOrCreateReceiver (CHANNEL_NAME , JobWorkNotificationJsonMessage .class , new ChannelConsumerSettings ());
134
196
myStorageSettings .setJobFastTrackingEnabled (true );
197
+
198
+ // reset
199
+ if (myRetryPolicyProvider instanceof RetryProviderOverride rp ) {
200
+ rp .setPolicies (null , null );
201
+ }
135
202
}
136
203
137
204
@ AfterEach
@@ -589,6 +656,84 @@ private void complete(
589
656
assertEquals (1.0 , jobInstance .getProgress ());
590
657
}
591
658
659
+ @ Test
660
+ public void failingWorkChunks_withLargeErrorMsgs_shouldNotErrorOutTheJob () {
661
+ // setup
662
+ assertTrue (myRetryPolicyProvider instanceof RetryProviderOverride );
663
+
664
+ String jobId = getMethodNameForJobId ();
665
+ AtomicInteger counter = new AtomicInteger ();
666
+
667
+ // we want an error message larger than can be contained in the db
668
+ String errorMsg = RandomTextUtils .newSecureRandomAlphaNumericString (ERROR_MSG_MAX_LENGTH + 100 );
669
+
670
+ // we want 1 more error than the allowed maximum
671
+ // otherwise we won't be updating the error chunk to have
672
+ // "Too many errors" error
673
+ int errorCount = MAX_CHUNK_ERROR_COUNT + 1 ;
674
+
675
+ MaxAttemptsRetryPolicy retryPolicy = new MaxAttemptsRetryPolicy ();
676
+ retryPolicy .setMaxAttempts (errorCount );
677
+ RetryProviderOverride overrideRetryProvider = (RetryProviderOverride ) myRetryPolicyProvider ;
678
+ overrideRetryProvider .setPolicies (retryPolicy , new NoBackOffPolicy ());
679
+
680
+ // create a job that fails and throws a large error
681
+ IJobStepWorker <TestJobParameters , VoidModel , FirstStepOutput > first = (step , sink ) -> {
682
+ counter .getAndIncrement ();
683
+ throw new RuntimeException (errorMsg );
684
+ };
685
+ IJobStepWorker <TestJobParameters , FirstStepOutput , VoidModel > last = (step , sink ) -> {
686
+ // we don't care; we'll never get here
687
+ return RunOutcome .SUCCESS ;
688
+ };
689
+
690
+ JobDefinition <? extends IModelJson > jd = JobDefinition .newBuilder ()
691
+ .setJobDefinitionId (jobId )
692
+ .setJobDescription ("test job" )
693
+ .setJobDefinitionVersion (TEST_JOB_VERSION )
694
+ .setParametersType (TestJobParameters .class )
695
+ .gatedExecution ()
696
+ .addFirstStep (
697
+ FIRST_STEP_ID ,
698
+ "First step" ,
699
+ FirstStepOutput .class ,
700
+ first
701
+ )
702
+ .addLastStep (
703
+ LAST_STEP_ID ,
704
+ "Final step" ,
705
+ last
706
+ )
707
+ .completionHandler (myCompletionHandler )
708
+ .build ();
709
+ myJobDefinitionRegistry .addJobDefinition (jd );
710
+
711
+ // test
712
+ JobInstanceStartRequest request = buildRequest (jobId );
713
+ Batch2JobStartResponse startResponse = myJobCoordinator .startInstance (new SystemRequestDetails (), request );
714
+ String instanceId = startResponse .getInstanceId ();
715
+
716
+ // waiting for the multitude of failures
717
+ await ().until (() -> {
718
+ myJobMaintenanceService .runMaintenancePass ();
719
+ JobInstance instance = myJobCoordinator .getInstance (instanceId );
720
+ ourLog .info ("Attempt " + counter .get () + " for "
721
+ + instance .getInstanceId () + ". Status: " + instance .getStatus ());
722
+ return counter .get () > errorCount - 1 ;
723
+ });
724
+
725
+ // verify
726
+ Iterator <WorkChunk > iterator = myJobPersistence .fetchAllWorkChunksIterator (instanceId , true );
727
+ List <WorkChunk > listOfChunks = new ArrayList <>();
728
+ iterator .forEachRemaining (listOfChunks ::add );
729
+ assertEquals (1 , listOfChunks .size ());
730
+ WorkChunk workChunk = listOfChunks .get (0 );
731
+ assertEquals (WorkChunkStatusEnum .FAILED , workChunk .getStatus ());
732
+ // should contain some of the original error msg, but not all
733
+ assertTrue (workChunk .getErrorMessage ().contains (errorMsg .substring (0 , 100 )));
734
+ assertTrue (workChunk .getErrorMessage ().startsWith ("Too many errors" ));
735
+ }
736
+
592
737
@ Test
593
738
public void testJobWithLongPollingStep () throws InterruptedException {
594
739
// create job definition
@@ -745,7 +890,7 @@ public void testUnknownException_KeepsInProgress_CanCancelManually() throws Inte
745
890
callLatch (myFirstStepLatch , step );
746
891
throw new RuntimeException ("Expected Test Exception" );
747
892
};
748
- IJobStepWorker <TestJobParameters , FirstStepOutput , VoidModel > lastStep = (step , sink ) -> fail ();
893
+ IJobStepWorker <TestJobParameters , FirstStepOutput , VoidModel > lastStep = (step , sink ) -> fail ();
749
894
750
895
String jobDefId = getMethodNameForJobId ();
751
896
JobDefinition <? extends IModelJson > definition = buildGatedJobDefinition (jobDefId , firstStep , lastStep );
0 commit comments