Skip to content

Commit 78e1bc8

Browse files
committed
Improve data store handling with thread safety and resource management enhancements
This commit addresses several improvements in the data store processing implementation: **Thread Safety Improvements:** - DataStoreFactory: Changed dataStoreMap from LinkedHashMap to ConcurrentHashMap for thread-safe concurrent access - DataStoreFactory: Made dataStoreNames and lastLoadedTime volatile to ensure visibility across threads - DataStoreFactory: Made getDataStoreNames() synchronized to prevent race conditions during cache refresh - AbstractDataStore: Made alive field volatile to ensure proper visibility in multi-threaded scenarios - FileListIndexUpdateCallbackImpl: Changed deleteUrlList from ArrayList to CopyOnWriteArrayList for thread-safe access from executor threads **Resource Handling:** - DataStoreFactory: Added null check for jarFiles array to prevent NullPointerException - DataStoreFactory: Added existence check for XML configuration files before attempting to read them, reducing unnecessary exception handling **Code Quality:** - AbstractDataStore: Moved static logger declaration before instance fields following Java best practices - FileListIndexUpdateCallbackImpl: Fixed incorrect StringUtil import (was using Apache POI's StringUtil instead of Fess's) These improvements enhance the reliability and thread safety of the data store processing layer, particularly important for concurrent crawling operations.
1 parent 2f1b2e6 commit 78e1bc8

File tree

3 files changed

+29
-15
lines changed

3 files changed

+29
-15
lines changed

src/main/java/org/codelibs/fess/ds/AbstractDataStore.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,6 @@
4343
*/
4444
public abstract class AbstractDataStore implements DataStore {
4545

46-
/**
47-
* Default constructor.
48-
*/
49-
public AbstractDataStore() {
50-
// nothing
51-
}
52-
5346
private static final Logger logger = LogManager.getLogger(AbstractDataStore.class);
5447

5548
/**
@@ -64,8 +57,16 @@ public AbstractDataStore() {
6457

6558
/**
6659
* The flag to check if the data store is alive.
60+
* Volatile to ensure visibility across threads.
6761
*/
68-
protected boolean alive = true;
62+
protected volatile boolean alive = true;
63+
64+
/**
65+
* Default constructor.
66+
*/
67+
public AbstractDataStore() {
68+
// nothing
69+
}
6970

7071
/**
7172
* Register this data store.

src/main/java/org/codelibs/fess/ds/DataStoreFactory.java

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.Locale;
2828
import java.util.Map;
2929
import java.util.Set;
30+
import java.util.concurrent.ConcurrentHashMap;
3031
import java.util.stream.Collectors;
3132

3233
import javax.xml.parsers.DocumentBuilder;
@@ -64,20 +65,22 @@ public class DataStoreFactory {
6465
/**
6566
* Map containing registered data store instances indexed by their names and class simple names.
6667
* All keys are stored in lowercase for case-insensitive lookup.
68+
* Thread-safe implementation using ConcurrentHashMap for concurrent access.
6769
*/
68-
protected Map<String, DataStore> dataStoreMap = new LinkedHashMap<>();
70+
protected Map<String, DataStore> dataStoreMap = new ConcurrentHashMap<>();
6971

7072
/**
7173
* Cached array of available data store names discovered from plugin JAR files.
7274
* This cache is refreshed periodically based on the lastLoadedTime.
7375
*/
74-
protected String[] dataStoreNames = StringUtil.EMPTY_STRINGS;
76+
protected volatile String[] dataStoreNames = StringUtil.EMPTY_STRINGS;
7577

7678
/**
7779
* Timestamp of the last time data store names were loaded from plugin files.
7880
* Used to implement a time-based cache refresh mechanism.
81+
* Volatile to ensure visibility across threads.
7982
*/
80-
protected long lastLoadedTime = 0;
83+
protected volatile long lastLoadedTime = 0;
8184

8285
/**
8386
* Creates a new instance of DataStoreFactory.
@@ -130,7 +133,7 @@ public DataStore getDataStore(final String name) {
130133
*
131134
* @return array of data store names sorted alphabetically, never null
132135
*/
133-
public String[] getDataStoreNames() {
136+
public synchronized String[] getDataStoreNames() {
134137
final long now = ComponentUtil.getSystemHelper().getCurrentTimeAsLong();
135138
if (now - lastLoadedTime > 60000L) {
136139
final List<String> nameList = loadDataStoreNameList();
@@ -154,9 +157,18 @@ public String[] getDataStoreNames() {
154157
protected List<String> loadDataStoreNameList() {
155158
final Set<String> nameSet = new HashSet<>();
156159
final File[] jarFiles = ResourceUtil.getPluginJarFiles(PluginHelper.ArtifactType.DATA_STORE.getId());
160+
if (jarFiles == null) {
161+
return nameSet.stream().sorted().collect(Collectors.toList());
162+
}
157163
for (final File jarFile : jarFiles) {
158164
try (FileSystem fs = FileSystems.newFileSystem(jarFile.toPath(), ClassLoader.getSystemClassLoader())) {
159165
final Path xmlPath = fs.getPath("fess_ds++.xml");
166+
if (!Files.exists(xmlPath)) {
167+
if (logger.isDebugEnabled()) {
168+
logger.debug("Configuration file not found in {}", jarFile.getAbsolutePath());
169+
}
170+
continue;
171+
}
160172
try (InputStream is = Files.newInputStream(xmlPath)) {
161173
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
162174
factory.setFeature(org.codelibs.fess.crawler.Constants.FEATURE_SECURE_PROCESSING, true);

src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.Map;
2626
import java.util.Objects;
2727
import java.util.Set;
28+
import java.util.concurrent.CopyOnWriteArrayList;
2829
import java.util.concurrent.ExecutorService;
2930
import java.util.concurrent.LinkedBlockingQueue;
3031
import java.util.concurrent.ThreadPoolExecutor;
@@ -34,7 +35,7 @@
3435

3536
import org.apache.logging.log4j.LogManager;
3637
import org.apache.logging.log4j.Logger;
37-
import org.apache.poi.util.StringUtil;
38+
import org.codelibs.core.lang.StringUtil;
3839
import org.codelibs.fess.Constants;
3940
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
4041
import org.codelibs.fess.crawler.client.CrawlerClient;
@@ -89,8 +90,8 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
8990
/** Factory for creating crawler clients to handle different URL schemes. */
9091
protected CrawlerClientFactory crawlerClientFactory;
9192

92-
/** List of URLs to be deleted, cached for batch processing. */
93-
protected List<String> deleteUrlList = new ArrayList<>(100);
93+
/** List of URLs to be deleted, cached for batch processing. Thread-safe using CopyOnWriteArrayList. */
94+
protected List<String> deleteUrlList = new CopyOnWriteArrayList<>();
9495

9596
/** Maximum size of the delete URL cache before batch deletion is triggered. */
9697
protected int maxDeleteDocumentCacheSize;

0 commit comments

Comments
 (0)