Skip to content

Commit 18ce934

Browse files
committed
(fix)(headless)Fix concurrent read/write search trie issue.
1 parent 6fe0ebc commit 18ce934

File tree

3 files changed

+132
-40
lines changed

3 files changed

+132
-40
lines changed

headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java

Lines changed: 130 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,107 @@
88
import org.springframework.stereotype.Service;
99

1010
import java.util.ArrayList;
11+
import java.util.Collections;
1112
import java.util.HashMap;
1213
import java.util.List;
1314
import java.util.Map;
1415
import java.util.Set;
16+
import java.util.concurrent.ConcurrentHashMap;
17+
import java.util.concurrent.locks.ReentrantReadWriteLock;
1518
import java.util.stream.Collectors;
1619

1720
@Service
1821
@Slf4j
1922
public class KnowledgeBaseService {
20-
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>();
21-
23+
private static final Map<Long, List<DictWord>> dimValueAliasMap = new ConcurrentHashMap<>();
24+
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
25+
26+
/**
27+
* Get dimension value alias map (read-only).
28+
*
29+
* @return unmodifiable view of the map
30+
*/
2231
public static Map<Long, List<DictWord>> getDimValueAlias() {
23-
return dimValueAliasMap;
32+
return Collections.unmodifiableMap(dimValueAliasMap);
2433
}
2534

35+
/**
36+
* Add dimension value aliases with deduplication. Thread-safe implementation using
37+
* ConcurrentHashMap.
38+
*
39+
* @param dimId dimension ID
40+
* @param newWords new words to add
41+
* @return updated list of aliases for the dimension
42+
*/
2643
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
27-
List<DictWord> dimValueAlias =
28-
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId)
29-
: new ArrayList<>();
30-
Set<String> wordSet =
31-
dimValueAlias
32-
.stream().map(word -> String.format("%s_%s_%s",
33-
word.getNatureWithFrequency(), word.getWord(), word.getAlias()))
34-
.collect(Collectors.toSet());
35-
for (DictWord dictWord : newWords) {
36-
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(),
37-
dictWord.getWord(), dictWord.getAlias());
38-
if (!wordSet.contains(key)) {
39-
dimValueAlias.add(dictWord);
44+
if (dimId == null || CollectionUtils.isEmpty(newWords)) {
45+
return dimValueAliasMap.get(dimId);
46+
}
47+
48+
// Use computeIfAbsent and synchronized block for thread safety
49+
synchronized (dimValueAliasMap) {
50+
List<DictWord> dimValueAlias =
51+
dimValueAliasMap.computeIfAbsent(dimId, k -> new ArrayList<>());
52+
53+
// Build deduplication key set
54+
Set<String> existingKeys = dimValueAlias.stream().map(word -> buildDedupKey(word))
55+
.collect(Collectors.toSet());
56+
57+
// Add new words with deduplication
58+
for (DictWord dictWord : newWords) {
59+
String key = buildDedupKey(dictWord);
60+
if (!existingKeys.contains(key)) {
61+
dimValueAlias.add(dictWord);
62+
existingKeys.add(key);
63+
}
4064
}
65+
66+
return dimValueAlias;
67+
}
68+
}
69+
70+
/**
71+
* Remove dimension value aliases by dimension ID.
72+
*
73+
* @param dimId dimension ID to remove, or null to clear all
74+
*/
75+
public static void removeDimValueAlias(Long dimId) {
76+
if (dimId == null) {
77+
dimValueAliasMap.clear();
78+
log.info("Cleared all dimension value aliases");
79+
} else {
80+
dimValueAliasMap.remove(dimId);
81+
log.info("Removed dimension value alias for dimId: {}", dimId);
4182
}
42-
dimValueAliasMap.put(dimId, dimValueAlias);
43-
return dimValueAlias;
4483
}
4584

85+
/**
86+
* Build deduplication key for DictWord.
87+
*
88+
* @param word the DictWord object
89+
* @return deduplication key string
90+
*/
91+
private static String buildDedupKey(DictWord word) {
92+
return String.format("%s_%s_%s", word.getNatureWithFrequency(), word.getWord(),
93+
word.getAlias());
94+
}
95+
96+
/**
97+
* Update semantic knowledge (incremental add, no clearing). Use this method to add new words
98+
* without removing existing data.
99+
*
100+
* @param natures the words to add
101+
*/
46102
public void updateSemanticKnowledge(List<DictWord> natures) {
103+
lock.writeLock().lock();
104+
try {
105+
updateSemanticKnowledgeInternal(natures);
106+
} finally {
107+
lock.writeLock().unlock();
108+
}
109+
}
47110

111+
private void updateSemanticKnowledgeInternal(List<DictWord> natures) {
48112
List<DictWord> prefixes = natures.stream().filter(
49113
entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
50114
.collect(Collectors.toList());
@@ -60,52 +124,82 @@ public void updateSemanticKnowledge(List<DictWord> natures) {
60124
SearchService.loadSuffix(suffixes);
61125
}
62126

127+
/**
128+
* Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire
129+
* knowledge base.
130+
*
131+
* @param natures all words to load
132+
*/
63133
public void reloadAllData(List<DictWord> natures) {
64-
// 1. reload custom knowledge
134+
// 1. reload custom knowledge (executed outside lock to avoid long blocking)
65135
try {
66136
HanlpHelper.reloadCustomDictionary();
67137
} catch (Exception e) {
68138
log.error("reloadCustomDictionary error", e);
69139
}
70140

71-
// 2. update online knowledge
72-
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
73-
for (Long dimId : dimValueAliasMap.keySet()) {
74-
natures.addAll(dimValueAliasMap.get(dimId));
141+
// 2. acquire write lock, clear trie and rebuild (short operation)
142+
lock.writeLock().lock();
143+
try {
144+
SearchService.clear();
145+
146+
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
147+
for (Long dimId : dimValueAliasMap.keySet()) {
148+
natures.addAll(dimValueAliasMap.get(dimId));
149+
}
75150
}
151+
updateSemanticKnowledgeInternal(natures);
152+
} finally {
153+
lock.writeLock().unlock();
76154
}
77-
updateOnlineKnowledge(natures);
78155
}
79156

80-
private void updateOnlineKnowledge(List<DictWord> natures) {
157+
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
158+
lock.readLock().lock();
81159
try {
82-
updateSemanticKnowledge(natures);
83-
} catch (Exception e) {
84-
log.error("updateSemanticKnowledge error", e);
160+
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
161+
} finally {
162+
lock.readLock().unlock();
85163
}
86164
}
87165

88-
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
89-
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
90-
}
91-
92166
public List<HanlpMapResult> prefixSearch(String key, int limit,
93167
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
94-
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
168+
lock.readLock().lock();
169+
try {
170+
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
171+
} finally {
172+
lock.readLock().unlock();
173+
}
95174
}
96175

97176
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
98177
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
99-
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
178+
lock.readLock().lock();
179+
try {
180+
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
181+
} finally {
182+
lock.readLock().unlock();
183+
}
100184
}
101185

102186
public List<HanlpMapResult> suffixSearch(String key, int limit,
103187
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
104-
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
188+
lock.readLock().lock();
189+
try {
190+
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
191+
} finally {
192+
lock.readLock().unlock();
193+
}
105194
}
106195

107196
public List<HanlpMapResult> suffixSearchByModel(String key, int limit,
108197
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
109-
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
198+
lock.readLock().lock();
199+
try {
200+
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
201+
} finally {
202+
lock.readLock().unlock();
203+
}
110204
}
111205
}

headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/SearchService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
public class SearchService {
3232

3333
public static final int SEARCH_SIZE = 200;
34-
private static BinTrie<List<String>> trie;
35-
private static BinTrie<List<String>> suffixTrie;
34+
private static volatile BinTrie<List<String>> trie;
35+
private static volatile BinTrie<List<String>> suffixTrie;
3636

3737
static {
3838
trie = new BinTrie<>();

headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,6 @@ public static boolean reloadCustomDictionary() throws IOException {
100100
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
101101
FileHelper.resetCustomPath(getDynamicCustomDictionary());
102102
}
103-
// 3.clear trie
104-
SearchService.clear();
105103

106104
boolean reload = getDynamicCustomDictionary().reload();
107105
if (reload) {

0 commit comments

Comments
 (0)