88import org .springframework .stereotype .Service ;
99
1010import java .util .ArrayList ;
11+ import java .util .Collections ;
1112import java .util .HashMap ;
1213import java .util .List ;
1314import java .util .Map ;
1415import java .util .Set ;
16+ import java .util .concurrent .ConcurrentHashMap ;
17+ import java .util .concurrent .locks .ReentrantReadWriteLock ;
1518import java .util .stream .Collectors ;
1619
1720@ Service
1821@ Slf4j
1922public class KnowledgeBaseService {
20- private static volatile Map <Long , List <DictWord >> dimValueAliasMap = new HashMap <>();
21-
23+ private static final Map <Long , List <DictWord >> dimValueAliasMap = new ConcurrentHashMap <>();
24+ private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock ();
25+
26+ /**
27+ * Get dimension value alias map (read-only).
28+ *
29+ * @return unmodifiable view of the map
30+ */
2231 public static Map <Long , List <DictWord >> getDimValueAlias () {
23- return dimValueAliasMap ;
32+ return Collections . unmodifiableMap ( dimValueAliasMap ) ;
2433 }
2534
35+ /**
36+ * Add dimension value aliases with deduplication. Thread-safe implementation using
37+ * ConcurrentHashMap.
38+ *
39+ * @param dimId dimension ID
40+ * @param newWords new words to add
41+ * @return updated list of aliases for the dimension
42+ */
2643 public static List <DictWord > addDimValueAlias (Long dimId , List <DictWord > newWords ) {
27- List <DictWord > dimValueAlias =
28- dimValueAliasMap .containsKey (dimId ) ? dimValueAliasMap .get (dimId )
29- : new ArrayList <>();
30- Set <String > wordSet =
31- dimValueAlias
32- .stream ().map (word -> String .format ("%s_%s_%s" ,
33- word .getNatureWithFrequency (), word .getWord (), word .getAlias ()))
34- .collect (Collectors .toSet ());
35- for (DictWord dictWord : newWords ) {
36- String key = String .format ("%s_%s_%s" , dictWord .getNatureWithFrequency (),
37- dictWord .getWord (), dictWord .getAlias ());
38- if (!wordSet .contains (key )) {
39- dimValueAlias .add (dictWord );
44+ if (dimId == null || CollectionUtils .isEmpty (newWords )) {
45+ return dimValueAliasMap .get (dimId );
46+ }
47+
48+ // Use computeIfAbsent and synchronized block for thread safety
49+ synchronized (dimValueAliasMap ) {
50+ List <DictWord > dimValueAlias =
51+ dimValueAliasMap .computeIfAbsent (dimId , k -> new ArrayList <>());
52+
53+ // Build deduplication key set
54+ Set <String > existingKeys = dimValueAlias .stream ().map (word -> buildDedupKey (word ))
55+ .collect (Collectors .toSet ());
56+
57+ // Add new words with deduplication
58+ for (DictWord dictWord : newWords ) {
59+ String key = buildDedupKey (dictWord );
60+ if (!existingKeys .contains (key )) {
61+ dimValueAlias .add (dictWord );
62+ existingKeys .add (key );
63+ }
4064 }
65+
66+ return dimValueAlias ;
67+ }
68+ }
69+
70+ /**
71+ * Remove dimension value aliases by dimension ID.
72+ *
73+ * @param dimId dimension ID to remove, or null to clear all
74+ */
75+ public static void removeDimValueAlias (Long dimId ) {
76+ if (dimId == null ) {
77+ dimValueAliasMap .clear ();
78+ log .info ("Cleared all dimension value aliases" );
79+ } else {
80+ dimValueAliasMap .remove (dimId );
81+ log .info ("Removed dimension value alias for dimId: {}" , dimId );
4182 }
42- dimValueAliasMap .put (dimId , dimValueAlias );
43- return dimValueAlias ;
4483 }
4584
85+ /**
86+ * Build deduplication key for DictWord.
87+ *
88+ * @param word the DictWord object
89+ * @return deduplication key string
90+ */
91+ private static String buildDedupKey (DictWord word ) {
92+ return String .format ("%s_%s_%s" , word .getNatureWithFrequency (), word .getWord (),
93+ word .getAlias ());
94+ }
95+
96+ /**
97+ * Update semantic knowledge (incremental add, no clearing). Use this method to add new words
98+ * without removing existing data.
99+ *
100+ * @param natures the words to add
101+ */
46102 public void updateSemanticKnowledge (List <DictWord > natures ) {
103+ lock .writeLock ().lock ();
104+ try {
105+ updateSemanticKnowledgeInternal (natures );
106+ } finally {
107+ lock .writeLock ().unlock ();
108+ }
109+ }
47110
111+ private void updateSemanticKnowledgeInternal (List <DictWord > natures ) {
48112 List <DictWord > prefixes = natures .stream ().filter (
49113 entry -> !entry .getNatureWithFrequency ().contains (DictWordType .SUFFIX .getType ()))
50114 .collect (Collectors .toList ());
@@ -60,52 +124,82 @@ public void updateSemanticKnowledge(List<DictWord> natures) {
60124 SearchService .loadSuffix (suffixes );
61125 }
62126
127+ /**
128+ * Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire
129+ * knowledge base.
130+ *
131+ * @param natures all words to load
132+ */
63133 public void reloadAllData (List <DictWord > natures ) {
64- // 1. reload custom knowledge
134+ // 1. reload custom knowledge (executed outside lock to avoid long blocking)
65135 try {
66136 HanlpHelper .reloadCustomDictionary ();
67137 } catch (Exception e ) {
68138 log .error ("reloadCustomDictionary error" , e );
69139 }
70140
71- // 2. update online knowledge
72- if (CollectionUtils .isNotEmpty (dimValueAliasMap )) {
73- for (Long dimId : dimValueAliasMap .keySet ()) {
74- natures .addAll (dimValueAliasMap .get (dimId ));
141+ // 2. acquire write lock, clear trie and rebuild (short operation)
142+ lock .writeLock ().lock ();
143+ try {
144+ SearchService .clear ();
145+
146+ if (CollectionUtils .isNotEmpty (dimValueAliasMap )) {
147+ for (Long dimId : dimValueAliasMap .keySet ()) {
148+ natures .addAll (dimValueAliasMap .get (dimId ));
149+ }
75150 }
151+ updateSemanticKnowledgeInternal (natures );
152+ } finally {
153+ lock .writeLock ().unlock ();
76154 }
77- updateOnlineKnowledge (natures );
78155 }
79156
80- private void updateOnlineKnowledge (List <DictWord > natures ) {
157+ public List <S2Term > getTerms (String text , Map <Long , List <Long >> modelIdToDataSetIds ) {
158+ lock .readLock ().lock ();
81159 try {
82- updateSemanticKnowledge ( natures );
83- } catch ( Exception e ) {
84- log . error ( "updateSemanticKnowledge error" , e );
160+ return HanlpHelper . getTerms ( text , modelIdToDataSetIds );
161+ } finally {
162+ lock . readLock (). unlock ( );
85163 }
86164 }
87165
88- public List <S2Term > getTerms (String text , Map <Long , List <Long >> modelIdToDataSetIds ) {
89- return HanlpHelper .getTerms (text , modelIdToDataSetIds );
90- }
91-
92166 public List <HanlpMapResult > prefixSearch (String key , int limit ,
93167 Map <Long , List <Long >> modelIdToDataSetIds , Set <Long > detectDataSetIds ) {
94- return prefixSearchByModel (key , limit , modelIdToDataSetIds , detectDataSetIds );
168+ lock .readLock ().lock ();
169+ try {
170+ return prefixSearchByModel (key , limit , modelIdToDataSetIds , detectDataSetIds );
171+ } finally {
172+ lock .readLock ().unlock ();
173+ }
95174 }
96175
97176 public List <HanlpMapResult > prefixSearchByModel (String key , int limit ,
98177 Map <Long , List <Long >> modelIdToDataSetIds , Set <Long > detectDataSetIds ) {
99- return SearchService .prefixSearch (key , limit , modelIdToDataSetIds , detectDataSetIds );
178+ lock .readLock ().lock ();
179+ try {
180+ return SearchService .prefixSearch (key , limit , modelIdToDataSetIds , detectDataSetIds );
181+ } finally {
182+ lock .readLock ().unlock ();
183+ }
100184 }
101185
102186 public List <HanlpMapResult > suffixSearch (String key , int limit ,
103187 Map <Long , List <Long >> modelIdToDataSetIds , Set <Long > detectDataSetIds ) {
104- return suffixSearchByModel (key , limit , modelIdToDataSetIds , detectDataSetIds );
188+ lock .readLock ().lock ();
189+ try {
190+ return suffixSearchByModel (key , limit , modelIdToDataSetIds , detectDataSetIds );
191+ } finally {
192+ lock .readLock ().unlock ();
193+ }
105194 }
106195
107196 public List <HanlpMapResult > suffixSearchByModel (String key , int limit ,
108197 Map <Long , List <Long >> modelIdToDataSetIds , Set <Long > detectDataSetIds ) {
109- return SearchService .suffixSearch (key , limit , modelIdToDataSetIds , detectDataSetIds );
198+ lock .readLock ().lock ();
199+ try {
200+ return SearchService .suffixSearch (key , limit , modelIdToDataSetIds , detectDataSetIds );
201+ } finally {
202+ lock .readLock ().unlock ();
203+ }
110204 }
111205}
0 commit comments