Skip to content

Commit 7ad70ea

Browse files
committed
feat: optimize semantic similarity calculation with cached base words
1 parent de2777f commit 7ad70ea

File tree

1 file changed

+108
-24
lines changed

1 file changed

+108
-24
lines changed

packages/core/src/suggest/community.ts

Lines changed: 108 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,57 @@ interface BuildWeightedGraphOptions {
115115
similarityThreshold?: number
116116
}
117117

118+
/**
119+
* Calculate semantic similarity using cached base words.
120+
* Returns a value between 0 and 1.
121+
*/
122+
function calculateSemanticSimilarityCached(
123+
labelA: string,
124+
labelB: string,
125+
wordsA: string[],
126+
wordsB: string[],
127+
): number {
128+
if (labelA === labelB) {
129+
return 1;
130+
}
131+
132+
const lowerA = labelA.toLowerCase();
133+
const lowerB = labelB.toLowerCase();
134+
135+
if (lowerA.includes(lowerB) || lowerB.includes(lowerA)) {
136+
const shorter = lowerA.length < lowerB.length ? lowerA : lowerB;
137+
const longer = lowerA.length < lowerB.length ? lowerB : lowerA;
138+
return shorter.length / longer.length;
139+
}
140+
141+
if (wordsA.length === 0 || wordsB.length === 0) {
142+
return 0;
143+
}
144+
145+
const setA = new Set(wordsA);
146+
const setB = new Set(wordsB);
147+
148+
let sharedCount = 0;
149+
for (const word of setA) {
150+
if (setB.has(word)) {
151+
sharedCount++;
152+
}
153+
}
154+
155+
if (sharedCount === 0) {
156+
return 0;
157+
}
158+
159+
return sharedCount / (setA.size + setB.size - sharedCount);
160+
}
161+
118162
/**
119163
* Build a weighted graph that combines structural connections with semantic similarity.
164+
*
165+
* Optimized algorithm:
166+
* 1. Cache extractBaseWords results to avoid repeated computation
167+
* 2. Build word-to-nodes bucket map, only compare nodes within same bucket
168+
* This reduces O(N²) to O(B × K²) where B = number of buckets, K = avg nodes per bucket
120169
*/
121170
function buildWeightedGraph(
122171
graph: Map<TypedNode, Set<{ node: TypedNode, type: RelationType }>>,
@@ -155,30 +204,65 @@ function buildWeightedGraph(
155204
}
156205

157206
if (semanticWeight > 0) {
158-
const nodeArray = Array.from(allNodes);
159-
for (let i = 0; i < nodeArray.length; i++) {
160-
for (let j = i + 1; j < nodeArray.length; j++) {
161-
const nodeA = nodeArray[i];
162-
const nodeB = nodeArray[j];
163-
164-
const pairKey = [nodeA.label, nodeB.label].sort().join('|');
165-
const isConnected = connectedPairs.has(pairKey);
166-
167-
const similarity = calculateSemanticSimilarity(nodeA.label, nodeB.label);
168-
if (similarity > similarityThreshold) {
169-
const semanticEdgeWeight = similarity * semanticWeight;
170-
171-
const currentAB = weighted.get(nodeA)!.get(nodeB) || 0;
172-
const newWeightAB = isConnected
173-
? Math.max(currentAB, semanticEdgeWeight)
174-
: currentAB + semanticEdgeWeight;
175-
weighted.get(nodeA)!.set(nodeB, Math.min(newWeightAB, 2.0));
176-
177-
const currentBA = weighted.get(nodeB)!.get(nodeA) || 0;
178-
const newWeightBA = isConnected
179-
? Math.max(currentBA, semanticEdgeWeight)
180-
: currentBA + semanticEdgeWeight;
181-
weighted.get(nodeB)!.set(nodeA, Math.min(newWeightBA, 2.0));
207+
const nodeWordsCache = new Map<TypedNode, string[]>();
208+
const wordToBucket = new Map<string, Set<TypedNode>>();
209+
210+
for (const node of allNodes) {
211+
const words = extractBaseWords(node.label);
212+
nodeWordsCache.set(node, words);
213+
214+
for (const word of words) {
215+
if (!wordToBucket.has(word)) {
216+
wordToBucket.set(word, new Set());
217+
}
218+
wordToBucket.get(word)!.add(node);
219+
}
220+
}
221+
222+
const comparedPairs = new Set<string>();
223+
224+
for (const [_, bucket] of wordToBucket) {
225+
if (bucket.size < 2) {
226+
continue;
227+
}
228+
229+
const bucketNodes = Array.from(bucket);
230+
for (let i = 0; i < bucketNodes.length; i++) {
231+
for (let j = i + 1; j < bucketNodes.length; j++) {
232+
const nodeA = bucketNodes[i];
233+
const nodeB = bucketNodes[j];
234+
235+
const comparedKey = [nodeA.label, nodeB.label].sort().join('|');
236+
if (comparedPairs.has(comparedKey)) {
237+
continue;
238+
}
239+
comparedPairs.add(comparedKey);
240+
241+
const wordsA = nodeWordsCache.get(nodeA)!;
242+
const wordsB = nodeWordsCache.get(nodeB)!;
243+
const similarity = calculateSemanticSimilarityCached(
244+
nodeA.label,
245+
nodeB.label,
246+
wordsA,
247+
wordsB,
248+
);
249+
250+
if (similarity > similarityThreshold) {
251+
const isConnected = connectedPairs.has(comparedKey);
252+
const semanticEdgeWeight = similarity * semanticWeight;
253+
254+
const currentAB = weighted.get(nodeA)!.get(nodeB) || 0;
255+
const newWeightAB = isConnected
256+
? Math.max(currentAB, semanticEdgeWeight)
257+
: currentAB + semanticEdgeWeight;
258+
weighted.get(nodeA)!.set(nodeB, Math.min(newWeightAB, 2.0));
259+
260+
const currentBA = weighted.get(nodeB)!.get(nodeA) || 0;
261+
const newWeightBA = isConnected
262+
? Math.max(currentBA, semanticEdgeWeight)
263+
: currentBA + semanticEdgeWeight;
264+
weighted.get(nodeB)!.set(nodeA, Math.min(newWeightBA, 2.0));
265+
}
182266
}
183267
}
184268
}

0 commit comments

Comments
 (0)