Skip to content

Commit bc5b797

Browse files
committed
Improve string distance measurement
Use the sift4 algorithm for code completions and move the completion-related functiosn into a StringUtils module.
1 parent 35e9e17 commit bc5b797

File tree

3 files changed

+101
-35
lines changed

3 files changed

+101
-35
lines changed

server/src/main/kotlin/org/javacs/kt/completion/Completions.kt

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import org.eclipse.lsp4j.CompletionList
77
import org.javacs.kt.CompiledFile
88
import org.javacs.kt.LOG
99
import org.javacs.kt.CompletionConfiguration
10+
import org.javacs.kt.util.containsCharactersInOrder
1011
import org.javacs.kt.util.findParent
1112
import org.javacs.kt.util.noResult
13+
import org.javacs.kt.util.stringDistance
1214
import org.javacs.kt.util.toPath
1315
import org.javacs.kt.util.onEachIndexed
1416
import org.jetbrains.kotlin.container.get
@@ -43,6 +45,7 @@ import org.jetbrains.kotlin.types.checker.KotlinTypeChecker
4345
import java.util.concurrent.TimeUnit
4446

4547
private const val MAX_COMPLETION_ITEMS = 50
48+
private const val MAX_STRING_DISTANCE = 10
4649

4750
/** Finds completions at the specified position. */
4851
fun completions(file: CompiledFile, cursor: Int, config: CompletionConfiguration): CompletionList {
@@ -74,8 +77,7 @@ private fun elementCompletionItems(file: CompiledFile, cursor: Int, config: Comp
7477
val surroundingElement = completableElement(file, cursor) ?: return emptySequence()
7578
val completions = elementCompletions(file, cursor, surroundingElement)
7679

77-
val nameFilter = matchesPartialIdentifier(partial)
78-
val matchesName = completions.filter(nameFilter)
80+
val matchesName = completions.sortedBy { stringDistance(name(it), partial).takeIf { it < MAX_STRING_DISTANCE || partial.isEmpty() } }
7981
val visible = matchesName.filter(isVisible(file, cursor))
8082

8183
return visible.map { completionItem(it, surroundingElement, file, config) }
@@ -348,15 +350,8 @@ private fun implicitMembers(scope: HierarchicalScope): Sequence<DeclarationDescr
348350
return implicit.type.memberScope.getContributedDescriptors().asSequence()
349351
}
350352

351-
private fun equalsIdentifier(identifier: String): (DeclarationDescriptor) -> Boolean {
352-
return { name(it) == identifier }
353-
}
354-
355-
private fun matchesPartialIdentifier(partialIdentifier: String): (DeclarationDescriptor) -> Boolean {
356-
return {
357-
containsCharactersInOrder(name(it), partialIdentifier, false)
358-
}
359-
}
353+
private fun equalsIdentifier(identifier: String): (DeclarationDescriptor) -> Boolean =
354+
{ name(it) == identifier }
360355

361356
private fun name(d: DeclarationDescriptor): String {
362357
if (d is ConstructorDescriptor)
@@ -365,29 +360,6 @@ private fun name(d: DeclarationDescriptor): String {
365360
return d.name.identifier
366361
}
367362

368-
fun containsCharactersInOrder(
369-
candidate: CharSequence, pattern: CharSequence, caseSensitive: Boolean): Boolean {
370-
var iCandidate = 0
371-
var iPattern = 0
372-
373-
while (iCandidate < candidate.length && iPattern < pattern.length) {
374-
var patternChar = pattern[iPattern]
375-
var testChar = candidate[iCandidate]
376-
377-
if (!caseSensitive) {
378-
patternChar = Character.toLowerCase(patternChar)
379-
testChar = Character.toLowerCase(testChar)
380-
}
381-
382-
if (patternChar == testChar) {
383-
iPattern++
384-
iCandidate++
385-
} else iCandidate++
386-
}
387-
388-
return iPattern == pattern.length
389-
}
390-
391363
private fun isVisible(file: CompiledFile, cursor: Int): (DeclarationDescriptor) -> Boolean {
392364
val el = file.elementAtPoint(cursor) ?: return { true }
393365
val from = el.parentsWithSelf

server/src/main/kotlin/org/javacs/kt/symbols/Symbols.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ import org.eclipse.lsp4j.SymbolKind
77
import org.eclipse.lsp4j.DocumentSymbol
88
import org.eclipse.lsp4j.jsonrpc.messages.Either
99
import org.javacs.kt.SourcePath
10-
import org.javacs.kt.completion.containsCharactersInOrder
1110
import org.javacs.kt.position.range
11+
import org.javacs.kt.util.containsCharactersInOrder
1212
import org.javacs.kt.util.preOrderTraversal
1313
import org.javacs.kt.util.toPath
1414
import org.jetbrains.kotlin.psi.*
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package org.javacs.kt.util
2+
3+
/**
4+
* Computes a string distance using a slightly modified
5+
* variant of the SIFT4 algorithm in linear time.
6+
* Note that the function is asymmetric with respect to
7+
* its two input strings and thus is not a metric in the
8+
* mathematical sense.
9+
*
10+
* Based on the JavaScript implementation from
11+
* https://siderite.dev/blog/super-fast-and-accurate-string-distance.html/
12+
*
13+
* @param candidate The first string
14+
* @param pattern The second string
15+
* @param maxOffset The number of characters to search for matching letters
16+
*/
17+
fun stringDistance(candidate: CharSequence, pattern: CharSequence, maxOffset: Int = 4): Int = when {
18+
candidate.length == 0 -> pattern.length
19+
pattern.length == 0 -> candidate.length
20+
else -> {
21+
val candidateLength = candidate.length
22+
val patternLength = pattern.length
23+
var iCandidate = 0
24+
var iPattern = 0
25+
var longestCommonSubsequence = 0
26+
var localCommonSubstring = 0
27+
28+
while (iCandidate < candidateLength && iPattern < patternLength) {
29+
if (candidate[iCandidate] == pattern[iPattern]) {
30+
localCommonSubstring++
31+
} else {
32+
longestCommonSubsequence += localCommonSubstring
33+
localCommonSubstring = 0
34+
35+
if (iCandidate != iPattern) {
36+
// Using max to bypass the need for computer transpositions ("ab" vs "ba")
37+
val iMax = Math.max(iCandidate, iPattern)
38+
iCandidate = iMax
39+
iPattern = iMax
40+
}
41+
42+
searchWindow@
43+
for (i in 0 until maxOffset) {
44+
when {
45+
(iCandidate + i) < candidateLength -> {
46+
if (candidate[iCandidate + i] == pattern[iPattern]) {
47+
iCandidate += i
48+
localCommonSubstring++
49+
break@searchWindow
50+
}
51+
}
52+
(iPattern + i) < patternLength -> {
53+
if (candidate[iCandidate] == pattern[iPattern + i]) {
54+
iPattern += i
55+
localCommonSubstring++
56+
break@searchWindow
57+
}
58+
}
59+
else -> break@searchWindow
60+
}
61+
}
62+
}
63+
64+
iCandidate++
65+
iPattern++
66+
}
67+
68+
longestCommonSubsequence += localCommonSubstring
69+
Math.max(candidateLength, patternLength) - longestCommonSubsequence
70+
}
71+
}
72+
73+
/** Checks whether the candidate contains the pattern in order. */
74+
fun containsCharactersInOrder(candidate: CharSequence, pattern: CharSequence, caseSensitive: Boolean): Boolean {
75+
var iCandidate = 0
76+
var iPattern = 0
77+
78+
while (iCandidate < candidate.length && iPattern < pattern.length) {
79+
var patternChar = pattern[iPattern]
80+
var testChar = candidate[iCandidate]
81+
82+
if (!caseSensitive) {
83+
patternChar = Character.toLowerCase(patternChar)
84+
testChar = Character.toLowerCase(testChar)
85+
}
86+
87+
if (patternChar == testChar) {
88+
iPattern++
89+
}
90+
iCandidate++
91+
}
92+
93+
return iPattern == pattern.length
94+
}

0 commit comments

Comments
 (0)