Skip to content

Commit 30a5a34

Browse files
committed
Add levenshtein scoring function
1 parent bd58606 commit 30a5a34

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

ghcide/ghcide.cabal

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ library
8383
, list-t
8484
, lsp ^>=2.7
8585
, lsp-types ^>=2.3
86+
, MemoTrie
8687
, mtl
8788
, opentelemetry >=0.6.1
8889
, optparse-applicative
@@ -196,6 +197,7 @@ library
196197
Development.IDE.Types.Shake
197198
Generics.SYB.GHC
198199
Text.Fuzzy.Parallel
200+
Text.Fuzzy.Levenshtein
199201

200202
other-modules:
201203
Development.IDE.Core.FileExists
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
module Text.Fuzzy.Levenshtein where
2+
3+
import Data.Function (fix)
4+
import Data.List (sortOn)
5+
import Data.MemoTrie
6+
import qualified Data.Text as T
7+
import qualified Data.Text.Array as T
8+
import Data.Text.Internal (Text (..))
9+
import Text.Fuzzy.Parallel
10+
11+
-- | Same caveats apply w.r.t. ASCII as in 'Text.Fuzzy.Parallel'.
12+
-- Might be worth optimizing this at some point, but it's good enoughᵗᵐ for now
13+
levenshtein :: Text -> Text -> Int
14+
levenshtein a b | T.null a = T.length b
15+
levenshtein a b | T.null b = T.length a
16+
levenshtein (Text aBuf aOff aLen) (Text bBuf bOff bLen) = do
17+
let aTot = aOff + aLen
18+
bTot = bOff + bLen
19+
go' _ (!aIx, !bIx) | aIx >= aTot || bIx >= bTot = max (aTot - aIx) (bTot - bIx)
20+
go' f (!aIx, !bIx) | T.unsafeIndex aBuf aIx == T.unsafeIndex bBuf bIx = f (aIx + 1, bIx + 1)
21+
go' f (!aIx, !bIx) =
22+
minimum
23+
[ 2 + f (aIx + 1, bIx + 1), -- Give substitutions a heavier cost, so multiple typos cost more
24+
1 + f (aIx + 1, bIx),
25+
1 + f (aIx, bIx + 1)
26+
]
27+
go = fix (memo . go')
28+
go (aOff, bOff)
29+
30+
-- | Sort the given list according to it's levenshtein distance relative to the
31+
-- given string.
32+
levenshteinScored :: Int -> Text -> [Text] -> [Scored Text]
33+
levenshteinScored chunkSize needle haystack =
34+
sortOn score $
35+
matchPar chunkSize needle haystack id $
36+
\a b -> Just $ levenshtein a b

0 commit comments

Comments
 (0)