Skip to content

Commit 4abe673

Browse files
committed
[fix JENKINS-66898] make the cache thread safe
there are several conditions where we have race conditions in the shared library cache : - when multiple builds start at the same time and try to use an expired cached shared library. The first one will start deleting the cache dir. The second one will see that the cachedir was modified and consider it up-to-date while the first is still deleting - when multiple builds start at the same time with no cache entry available. The first has created the cachedir but not yet the lock file, the second will not see the lockfile but the cachedir - the background cleaner is about to clean when a new build starts - an administrator decides to clear the cache right before a new build wants to use the cache library All these can lead to the situtation that the library for a build is copied only partially and fails the build This PR fixes the race conditions by using a ReadWriteLock to ensure nobody reads when a cache entry is created/updated/deleted
1 parent bab4aa8 commit 4abe673

File tree

3 files changed

+106
-34
lines changed

3 files changed

+106
-34
lines changed

src/main/java/org/jenkinsci/plugins/workflow/libs/LibraryAdder.java

Lines changed: 82 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@
4343
import java.util.LinkedHashMap;
4444
import java.util.List;
4545
import java.util.Map;
46-
import java.util.Set;
4746
import java.util.TreeMap;
47+
import java.util.concurrent.ConcurrentHashMap;
48+
import java.util.concurrent.locks.ReentrantReadWriteLock;
4849
import java.util.logging.Level;
4950
import java.util.logging.Logger;
5051
import edu.umd.cs.findbugs.annotations.CheckForNull;
@@ -64,7 +65,13 @@
6465
@Extension public class LibraryAdder extends ClasspathAdder {
6566

6667
private static final Logger LOGGER = Logger.getLogger(LibraryAdder.class.getName());
68+
69+
private static ConcurrentHashMap<String, ReentrantReadWriteLock> cacheRetrieveLock = new ConcurrentHashMap<>();
6770

71+
static @NonNull ReentrantReadWriteLock getReadWriteLockFor(@NonNull String name) {
72+
return cacheRetrieveLock.computeIfAbsent(name, s -> new ReentrantReadWriteLock(true));
73+
}
74+
6875
@Override public List<Addition> add(CpsFlowExecution execution, List<String> libraries, HashMap<String, Boolean> changelogs) throws Exception {
6976
Queue.Executable executable = execution.getOwner().getExecutable();
7077
Run<?,?> build;
@@ -155,6 +162,36 @@
155162
}
156163
}
157164

165+
private enum CacheStatus {
166+
VALID,
167+
DOES_NOT_EXIST,
168+
EXPIRED;
169+
}
170+
171+
private static CacheStatus getCacheStatus(@NonNull LibraryCachingConfiguration cachingConfiguration, @NonNull final FilePath versionCacheDir)
172+
throws IOException, InterruptedException
173+
{
174+
if (cachingConfiguration.isRefreshEnabled()) {
175+
final long cachingMilliseconds = cachingConfiguration.getRefreshTimeMilliseconds();
176+
177+
if(versionCacheDir.exists()) {
178+
if ((versionCacheDir.lastModified() + cachingMilliseconds) > System.currentTimeMillis()) {
179+
return CacheStatus.VALID;
180+
} else {
181+
return CacheStatus.EXPIRED;
182+
}
183+
} else {
184+
return CacheStatus.DOES_NOT_EXIST;
185+
}
186+
} else {
187+
if (versionCacheDir.exists()) {
188+
return CacheStatus.VALID;
189+
} else {
190+
return CacheStatus.DOES_NOT_EXIST;
191+
}
192+
}
193+
}
194+
158195
/** Retrieve library files. */
159196
static List<URL> retrieve(@NonNull LibraryRecord record, @NonNull LibraryRetriever retriever, @NonNull TaskListener listener, @NonNull Run<?,?> run, @NonNull CpsFlowExecution execution) throws Exception {
160197
String name = record.name;
@@ -164,42 +201,60 @@ static List<URL> retrieve(@NonNull LibraryRecord record, @NonNull LibraryRetriev
164201
FilePath libDir = new FilePath(execution.getOwner().getRootDir()).child("libs/" + record.getDirectoryName());
165202
Boolean shouldCache = cachingConfiguration != null;
166203
final FilePath versionCacheDir = new FilePath(LibraryCachingConfiguration.getGlobalLibrariesCacheDir(), record.getDirectoryName());
167-
final FilePath retrieveLockFile = new FilePath(versionCacheDir, LibraryCachingConfiguration.RETRIEVE_LOCK_FILE);
204+
ReentrantReadWriteLock retrieveLock = getReadWriteLockFor(record.getDirectoryName());
168205
final FilePath lastReadFile = new FilePath(versionCacheDir, LibraryCachingConfiguration.LAST_READ_FILE);
169206

170207
if(shouldCache && cachingConfiguration.isExcluded(version)) {
171208
listener.getLogger().println("Library " + name + "@" + version + " is excluded from caching.");
172209
shouldCache = false;
173210
}
174211

175-
if(shouldCache && retrieveLockFile.exists()) {
176-
listener.getLogger().println("Library " + name + "@" + version + " is currently being cached by another job, retrieving without cache.");
177-
shouldCache = false;
178-
}
179-
180212
if(shouldCache) {
181-
if (cachingConfiguration.isRefreshEnabled()) {
182-
final long cachingMinutes = cachingConfiguration.getRefreshTimeMinutes();
183-
final long cachingMilliseconds = cachingConfiguration.getRefreshTimeMilliseconds();
184-
185-
if(versionCacheDir.exists() && (versionCacheDir.lastModified() + cachingMilliseconds) < System.currentTimeMillis()) {
186-
listener.getLogger().println("Library " + name + "@" + version + " is due for a refresh after " + cachingMinutes + " minutes, clearing.");
187-
versionCacheDir.deleteRecursive();
213+
retrieveLock.readLock().lockInterruptibly();
214+
try {
215+
CacheStatus cacheStatus = getCacheStatus(cachingConfiguration, versionCacheDir);
216+
if (cacheStatus == CacheStatus.DOES_NOT_EXIST || cacheStatus == CacheStatus.EXPIRED) {
217+
retrieveLock.readLock().unlock();
218+
retrieveLock.writeLock().lockInterruptibly();
219+
try {
220+
boolean retrieve = false;
221+
switch (getCacheStatus(cachingConfiguration, versionCacheDir)) {
222+
case VALID:
223+
listener.getLogger().println("Library " + name + "@" + version + " is cached. Copying from home.");
224+
break;
225+
case DOES_NOT_EXIST:
226+
retrieve = true;
227+
break;
228+
case EXPIRED:
229+
long cachingMinutes = cachingConfiguration.getRefreshTimeMinutes();
230+
listener.getLogger().println("Library " + name + "@" + version + " is due for a refresh after " + cachingMinutes + " minutes, clearing.");
231+
if (versionCacheDir.exists()) {
232+
versionCacheDir.deleteRecursive();
233+
versionCacheDir.withSuffix("-name.txt").delete();
234+
}
235+
retrieve = true;
236+
break;
237+
}
238+
239+
if (retrieve) {
240+
listener.getLogger().println("Caching library " + name + "@" + version);
241+
versionCacheDir.mkdirs();
242+
retriever.retrieve(name, version, changelog, versionCacheDir, run, listener);
243+
}
244+
retrieveLock.readLock().lock();
245+
} finally {
246+
retrieveLock.writeLock().unlock();
247+
}
248+
} else {
249+
listener.getLogger().println("Library " + name + "@" + version + " is cached. Copying from home.");
188250
}
251+
252+
lastReadFile.touch(System.currentTimeMillis());
253+
versionCacheDir.withSuffix("-name.txt").write(name, "UTF-8");
254+
versionCacheDir.copyRecursiveTo(libDir);
255+
} finally {
256+
retrieveLock.readLock().unlock();
189257
}
190-
191-
if(versionCacheDir.exists()) {
192-
listener.getLogger().println("Library " + name + "@" + version + " is cached. Copying from home.");
193-
} else {
194-
listener.getLogger().println("Caching library " + name + "@" + version);
195-
versionCacheDir.mkdirs();
196-
retrieveLockFile.touch(System.currentTimeMillis());
197-
retriever.retrieve(name, version, changelog, versionCacheDir, run, listener);
198-
retrieveLockFile.delete();
199-
}
200-
lastReadFile.touch(System.currentTimeMillis());
201-
versionCacheDir.withSuffix("-name.txt").write(name, "UTF-8");
202-
versionCacheDir.copyRecursiveTo(libDir);
203258
} else {
204259
retriever.retrieve(name, version, changelog, libDir, run, listener);
205260
}

src/main/java/org/jenkinsci/plugins/workflow/libs/LibraryCachingCleanup.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import java.io.IOException;
1010
import java.util.concurrent.TimeUnit;
11+
import java.util.concurrent.locks.ReentrantReadWriteLock;
1112

1213
import jenkins.util.SystemProperties;
1314

@@ -48,9 +49,16 @@ public LibraryCachingCleanup() {
4849
private boolean removeIfExpiredCacheDirectory(FilePath library) throws IOException, InterruptedException {
4950
final FilePath lastReadFile = new FilePath(library, LibraryCachingConfiguration.LAST_READ_FILE);
5051
if (lastReadFile.exists()) {
51-
if (System.currentTimeMillis() - lastReadFile.lastModified() > TimeUnit.DAYS.toMillis(EXPIRE_AFTER_READ_DAYS)) {
52-
library.deleteRecursive();
53-
library.withSuffix("-name.txt").delete();
52+
ReentrantReadWriteLock retrieveLock = LibraryAdder.getReadWriteLockFor(library.getName());
53+
retrieveLock.writeLock().lockInterruptibly();
54+
try {
55+
if (System.currentTimeMillis() - lastReadFile.lastModified() > TimeUnit.DAYS.toMillis(EXPIRE_AFTER_READ_DAYS)) {
56+
57+
library.deleteRecursive();
58+
library.withSuffix("-name.txt").delete();
59+
}
60+
} finally {
61+
retrieveLock.writeLock().unlock();
5462
}
5563
return true;
5664
}

src/main/java/org/jenkinsci/plugins/workflow/libs/LibraryCachingConfiguration.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
import org.kohsuke.stapler.DataBoundConstructor;
1010
import org.kohsuke.stapler.QueryParameter;
1111

12-
import java.io.File;
1312
import java.io.IOException;
1413
import java.io.InputStream;
1514
import java.nio.charset.StandardCharsets;
1615
import java.util.Arrays;
1716
import java.util.Collections;
1817
import java.util.List;
18+
import java.util.concurrent.TimeUnit;
19+
import java.util.concurrent.locks.ReentrantReadWriteLock;
1920
import org.apache.commons.io.IOUtils;
2021
import org.apache.commons.lang.StringUtils;
2122

@@ -26,7 +27,6 @@ public final class LibraryCachingConfiguration extends AbstractDescribableImpl<L
2627
private static final String VERSIONS_SEPARATOR = " ";
2728
public static final String GLOBAL_LIBRARIES_DIR = "global-libraries-cache";
2829
public static final String LAST_READ_FILE = "last_read";
29-
public static final String RETRIEVE_LOCK_FILE = "retrieve.lock";
3030

3131
@DataBoundConstructor public LibraryCachingConfiguration(int refreshTimeMinutes, String excludedVersionsStr) {
3232
this.refreshTimeMinutes = refreshTimeMinutes;
@@ -97,8 +97,17 @@ public FormValidation doClearCache(@QueryParameter String name) throws Interrupt
9797
if (libraryNamePath.readToString().equals(name)) {
9898
FilePath libraryCachePath = LibraryCachingConfiguration.getGlobalLibrariesCacheDir()
9999
.child(libraryNamePath.getName().replace("-name.txt", ""));
100-
libraryCachePath.deleteRecursive();
101-
libraryNamePath.delete();
100+
ReentrantReadWriteLock retrieveLock = LibraryAdder.getReadWriteLockFor(libraryCachePath.getName());
101+
if (retrieveLock.writeLock().tryLock(10, TimeUnit.SECONDS)) {
102+
try {
103+
libraryCachePath.deleteRecursive();
104+
libraryNamePath.delete();
105+
} finally {
106+
retrieveLock.writeLock().unlock();
107+
}
108+
} else {
109+
return FormValidation.error("The cache dir is currently used by another thread, so deletion was not possibly. Please try again");
110+
}
102111
}
103112
}
104113
}

0 commit comments

Comments
 (0)