Skip to content

Commit b5b710a

Browse files
Use retry logic and real file system in file settings ITs (#116392) (#116710)
Several file-settings ITs fail (rarely) with exceptions like: ``` java.nio.file.AccessDeniedException: C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempFile-001.tmp -> C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempDir-002\config\operator\settings.json |   at sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:89) |   -- | --   |   | at sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) |     |   | at sun.nio.fs.WindowsFileCopy.move(WindowsFileCopy.java:317) |     |   | at sun.nio.fs.WindowsFileSystemProvider.move(WindowsFileSystemProvider.java:293) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at java.nio.file.Files.move(Files.java:1430) |     |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.writeJSONFile(SnaphotsAndFileSettingsIT.java:86) |     |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.testRestoreWithPersistedFileSettings(SnaphotsAndFileSettingsIT.java:321) ``` This happens in Windows file systems, due to a race condition where the file settings service is reading the settings file concurrently with the test trying to modify it (a no-go in Windows). It turns out we have already addressed this with a retry for one test suite (#91863), plus addressed a related issue around mock windows file-systems misbehaving (#92653). This PR extends the above fixes to all file-settings related ITs. (cherry picked from commit 91559da) Co-authored-by: Elastic Machine <[email protected]>
1 parent d40c2cc commit b5b710a

File tree

7 files changed

+101
-128
lines changed

7 files changed

+101
-128
lines changed

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.reservedstate.service;
1111

12+
import org.apache.lucene.tests.util.LuceneTestCase;
1213
import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
1314
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
1415
import org.elasticsearch.action.admin.indices.template.get.GetComponentTemplateAction;
@@ -26,16 +27,12 @@
2627
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
2728
import org.elasticsearch.cluster.service.ClusterService;
2829
import org.elasticsearch.common.settings.Settings;
29-
import org.elasticsearch.core.Strings;
3030
import org.elasticsearch.core.Tuple;
3131
import org.elasticsearch.test.ESIntegTestCase;
3232
import org.elasticsearch.xcontent.XContentParserConfiguration;
3333

3434
import java.io.ByteArrayInputStream;
3535
import java.nio.charset.StandardCharsets;
36-
import java.nio.file.Files;
37-
import java.nio.file.Path;
38-
import java.nio.file.StandardCopyOption;
3936
import java.util.Map;
4037
import java.util.concurrent.CountDownLatch;
4138
import java.util.concurrent.ExecutionException;
@@ -54,6 +51,7 @@
5451
import static org.hamcrest.Matchers.notNullValue;
5552

5653
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
54+
@LuceneTestCase.SuppressFileSystems("*")
5755
public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase {
5856

5957
private static AtomicLong versionCounter = new AtomicLong(1);
@@ -359,15 +357,7 @@ private void assertMasterNode(Client client, String node) throws ExecutionExcept
359357
}
360358

361359
private void writeJSONFile(String node, String json) throws Exception {
362-
long version = versionCounter.incrementAndGet();
363-
364-
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
365-
366-
Files.createDirectories(fileSettingsService.watchedFileDir());
367-
Path tempFilePath = createTempFile();
368-
369-
Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
370-
Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
360+
FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
371361
}
372362

373363
private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
package org.elasticsearch.reservedstate.service;
1111

1212
import org.apache.logging.log4j.Logger;
13+
import org.apache.lucene.tests.util.LuceneTestCase;
1314
import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
1415
import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
1516
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -20,14 +21,15 @@
2021
import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
2122
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
2223
import org.elasticsearch.cluster.service.ClusterService;
24+
import org.elasticsearch.common.Randomness;
2325
import org.elasticsearch.common.settings.Settings;
2426
import org.elasticsearch.core.Strings;
2527
import org.elasticsearch.core.Tuple;
2628
import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
2729
import org.elasticsearch.test.ESIntegTestCase;
2830
import org.junit.Before;
2931

30-
import java.nio.charset.StandardCharsets;
32+
import java.io.IOException;
3133
import java.nio.file.Files;
3234
import java.nio.file.Path;
3335
import java.nio.file.StandardCopyOption;
@@ -50,6 +52,7 @@
5052
import static org.hamcrest.Matchers.nullValue;
5153

5254
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
55+
@LuceneTestCase.SuppressFileSystems("*")
5356
public class FileSettingsServiceIT extends ESIntegTestCase {
5457

5558
private final AtomicLong versionCounter = new AtomicLong(1);
@@ -129,29 +132,37 @@ private void assertMasterNode(Client client, String node) {
129132
);
130133
}
131134

132-
public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger, boolean incrementVersion)
133-
throws Exception {
134-
long version = incrementVersion ? versionCounter.incrementAndGet() : versionCounter.get();
135-
135+
public static void writeJSONFile(String node, String json, Logger logger, Long version) throws Exception {
136136
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
137137

138138
Files.createDirectories(fileSettingsService.watchedFileDir());
139139
Path tempFilePath = createTempFile();
140140

141-
String settingsFileContent = Strings.format(json, version);
142-
Files.write(tempFilePath, settingsFileContent.getBytes(StandardCharsets.UTF_8));
143-
logger.info("--> Before writing new settings file with version [{}]", version);
144-
Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
145-
logger.info("--> After writing new settings file: [{}]", settingsFileContent);
146-
}
147-
148-
public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger) throws Exception {
149-
writeJSONFile(node, json, versionCounter, logger, true);
141+
String jsonWithVersion = Strings.format(json, version);
142+
logger.info("--> before writing JSON config to node {} with path {}", node, tempFilePath);
143+
logger.info(jsonWithVersion);
144+
145+
Files.writeString(tempFilePath, jsonWithVersion);
146+
int retryCount = 0;
147+
do {
148+
try {
149+
// this can fail on Windows because of timing
150+
Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
151+
logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath);
152+
return;
153+
} catch (IOException e) {
154+
logger.info("--> retrying writing a settings file [{}]", retryCount);
155+
if (retryCount == 4) { // retry 5 times
156+
throw e;
157+
}
158+
Thread.sleep(retryDelay(retryCount));
159+
retryCount++;
160+
}
161+
} while (true);
150162
}
151163

152-
public static void writeJSONFileWithoutVersionIncrement(String node, String json, AtomicLong versionCounter, Logger logger)
153-
throws Exception {
154-
writeJSONFile(node, json, versionCounter, logger, false);
164+
private static long retryDelay(int retryCount) {
165+
return 100 * (1 << retryCount) + Randomness.get().nextInt(10);
155166
}
156167

157168
private Tuple<CountDownLatch, AtomicLong> setupCleanupClusterStateListener(String node) {
@@ -245,7 +256,7 @@ public void testSettingsApplied() throws Exception {
245256
assertTrue(masterFileSettingsService.watching());
246257
assertFalse(dataFileSettingsService.watching());
247258

248-
writeJSONFile(masterNode, testJSON, versionCounter, logger);
259+
writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
249260
assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
250261
}
251262

@@ -260,7 +271,7 @@ public void testSettingsAppliedOnStart() throws Exception {
260271

261272
// In internal cluster tests, the nodes share the config directory, so when we write with the data node path
262273
// the master will pick it up on start
263-
writeJSONFile(dataNode, testJSON, versionCounter, logger);
274+
writeJSONFile(dataNode, testJSON, logger, versionCounter.incrementAndGet());
264275

265276
logger.info("--> start master node");
266277
final String masterNode = internalCluster().startMasterOnlyNode();
@@ -288,7 +299,7 @@ public void testReservedStatePersistsOnRestart() throws Exception {
288299
assertBusy(() -> assertTrue(masterFileSettingsService.watching()));
289300

290301
logger.info("--> write some settings");
291-
writeJSONFile(masterNode, testJSON, versionCounter, logger);
302+
writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
292303
assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
293304

294305
logger.info("--> restart master");
@@ -366,7 +377,7 @@ public void testErrorSaved() throws Exception {
366377
assertTrue(masterFileSettingsService.watching());
367378
assertFalse(dataFileSettingsService.watching());
368379

369-
writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
380+
writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
370381
assertClusterStateNotSaved(savedClusterState.v1(), savedClusterState.v2());
371382
}
372383

@@ -390,14 +401,14 @@ public void testErrorCanRecoverOnRestart() throws Exception {
390401
assertTrue(masterFileSettingsService.watching());
391402
assertFalse(dataFileSettingsService.watching());
392403

393-
writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
404+
writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
394405
AtomicLong metadataVersion = savedClusterState.v2();
395406
assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion);
396407
assertHasErrors(metadataVersion, "not_cluster_settings");
397408

398409
// write valid json without version increment to simulate ES being able to process settings after a restart (usually, this would be
399410
// due to a code change)
400-
writeJSONFileWithoutVersionIncrement(masterNode, testJSON, versionCounter, logger);
411+
writeJSONFile(masterNode, testJSON, logger, versionCounter.get());
401412
internalCluster().restartNode(masterNode);
402413
ensureGreen();
403414

@@ -426,14 +437,14 @@ public void testNewErrorOnRestartReprocessing() throws Exception {
426437
assertTrue(masterFileSettingsService.watching());
427438
assertFalse(dataFileSettingsService.watching());
428439

429-
writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
440+
writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
430441
AtomicLong metadataVersion = savedClusterState.v2();
431442
assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion);
432443
assertHasErrors(metadataVersion, "not_cluster_settings");
433444

434445
// write json with new error without version increment to simulate ES failing to process settings after a restart for a new reason
435446
// (usually, this would be due to a code change)
436-
writeJSONFileWithoutVersionIncrement(masterNode, testOtherErrorJSON, versionCounter, logger);
447+
writeJSONFile(masterNode, testOtherErrorJSON, logger, versionCounter.get());
437448
assertHasErrors(metadataVersion, "not_cluster_settings");
438449
internalCluster().restartNode(masterNode);
439450
ensureGreen();
@@ -461,7 +472,7 @@ public void testSettingsAppliedOnMasterReElection() throws Exception {
461472

462473
assertTrue(masterFileSettingsService.watching());
463474

464-
writeJSONFile(masterNode, testJSON, versionCounter, logger);
475+
writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
465476
assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
466477

467478
internalCluster().stopCurrentMasterNode();
@@ -476,13 +487,13 @@ public void testSettingsAppliedOnMasterReElection() throws Exception {
476487
ensureStableCluster(3);
477488

478489
savedClusterState = setupCleanupClusterStateListener(internalCluster().getMasterName());
479-
writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, versionCounter, logger);
490+
writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, logger, versionCounter.incrementAndGet());
480491

481492
boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
482493
assertTrue(awaitSuccessful);
483494

484495
savedClusterState = setupClusterStateListener(internalCluster().getMasterName());
485-
writeJSONFile(internalCluster().getMasterName(), testJSON43mb, versionCounter, logger);
496+
writeJSONFile(internalCluster().getMasterName(), testJSON43mb, logger, versionCounter.incrementAndGet());
486497

487498
assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "43mb");
488499
}

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.reservedstate.service;
1111

12+
import org.apache.lucene.tests.util.LuceneTestCase;
1213
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesAction;
1314
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
1415
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
@@ -22,17 +23,13 @@
2223
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
2324
import org.elasticsearch.cluster.service.ClusterService;
2425
import org.elasticsearch.common.settings.Settings;
25-
import org.elasticsearch.core.Strings;
2626
import org.elasticsearch.core.Tuple;
2727
import org.elasticsearch.repositories.RepositoryMissingException;
2828
import org.elasticsearch.test.ESIntegTestCase;
2929
import org.elasticsearch.xcontent.XContentParserConfiguration;
3030

3131
import java.io.ByteArrayInputStream;
3232
import java.nio.charset.StandardCharsets;
33-
import java.nio.file.Files;
34-
import java.nio.file.Path;
35-
import java.nio.file.StandardCopyOption;
3633
import java.util.concurrent.CountDownLatch;
3734
import java.util.concurrent.ExecutionException;
3835
import java.util.concurrent.TimeUnit;
@@ -49,6 +46,7 @@
4946
import static org.hamcrest.Matchers.notNullValue;
5047

5148
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
49+
@LuceneTestCase.SuppressFileSystems("*")
5250
public class RepositoriesFileSettingsIT extends ESIntegTestCase {
5351
private static AtomicLong versionCounter = new AtomicLong(1);
5452

@@ -102,15 +100,7 @@ private void assertMasterNode(Client client, String node) throws ExecutionExcept
102100
}
103101

104102
private void writeJSONFile(String node, String json) throws Exception {
105-
long version = versionCounter.incrementAndGet();
106-
107-
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
108-
109-
Files.createDirectories(fileSettingsService.watchedFileDir());
110-
Path tempFilePath = createTempFile();
111-
112-
Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
113-
Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
103+
FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
114104
}
115105

116106
private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,15 @@
1919
import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
2020
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
2121
import org.elasticsearch.cluster.service.ClusterService;
22-
import org.elasticsearch.common.Randomness;
2322
import org.elasticsearch.common.settings.Settings;
24-
import org.elasticsearch.core.Strings;
2523
import org.elasticsearch.core.TimeValue;
2624
import org.elasticsearch.core.Tuple;
2725
import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
2826
import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase;
2927
import org.elasticsearch.snapshots.SnapshotState;
3028
import org.junit.After;
3129

32-
import java.io.IOException;
33-
import java.nio.charset.StandardCharsets;
3430
import java.nio.file.Files;
35-
import java.nio.file.Path;
36-
import java.nio.file.StandardCopyOption;
3731
import java.util.concurrent.CountDownLatch;
3832
import java.util.concurrent.TimeUnit;
3933
import java.util.concurrent.atomic.AtomicLong;
@@ -78,34 +72,8 @@ public void cleanUp() throws Exception {
7872
awaitNoMoreRunningOperations();
7973
}
8074

81-
private long retryDelay(int retryCount) {
82-
return 100 * (1 << retryCount) + Randomness.get().nextInt(10);
83-
}
84-
8575
private void writeJSONFile(String node, String json) throws Exception {
86-
long version = versionCounter.incrementAndGet();
87-
88-
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
89-
90-
Files.createDirectories(fileSettingsService.watchedFileDir());
91-
Path tempFilePath = createTempFile();
92-
93-
Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
94-
int retryCount = 0;
95-
do {
96-
try {
97-
// this can fail on Windows because of timing
98-
Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
99-
return;
100-
} catch (IOException e) {
101-
logger.info("--> retrying writing a settings file [" + retryCount + "]");
102-
if (retryCount == 4) { // retry 5 times
103-
throw e;
104-
}
105-
Thread.sleep(retryDelay(retryCount));
106-
retryCount++;
107-
}
108-
} while (true);
76+
FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
10977
}
11078

11179
private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

0 commit comments

Comments
 (0)