From 99948e59b5bceeb80516b233daf15cd3ea30306e Mon Sep 17 00:00:00 2001 From: Michael Peyton Jones Date: Sun, 5 Oct 2025 18:43:45 +0100 Subject: [PATCH] Support multiline and overlapping semantic tokens Fixes #542. --- lsp-test/lsp-test.cabal | 1 + lsp-test/test/DummyServer.hs | 3 +- lsp-types/lsp-types.cabal | 2 + .../LSP/Protocol/Types/SemanticTokens.hs | 116 +++++++++++++++++- lsp-types/test/SemanticTokensSpec.hs | 70 +++++++++++ 5 files changed, 186 insertions(+), 6 deletions(-) diff --git a/lsp-test/lsp-test.cabal b/lsp-test/lsp-test.cabal index 0a08dfa3..96e64370 100644 --- a/lsp-test/lsp-test.cabal +++ b/lsp-test/lsp-test.cabal @@ -116,6 +116,7 @@ test-suite tests , parser-combinators , process , text + , text-rope , unliftio test-suite func-test diff --git a/lsp-test/test/DummyServer.hs b/lsp-test/test/DummyServer.hs index bd36551e..9ae424ff 100644 --- a/lsp-test/test/DummyServer.hs +++ b/lsp-test/test/DummyServer.hs @@ -14,6 +14,7 @@ import Data.Map.Strict qualified as M import Data.Proxy import Data.String import Data.Text qualified as T +import Data.Text.Utf16.Rope.Mixed qualified as Rope import Language.LSP.Protocol.Message import Language.LSP.Protocol.Types import Language.LSP.Server @@ -253,7 +254,7 @@ handlers = InL [CallHierarchyOutgoingCall item [Range (Position 4 5) (Position 2 3)]] , requestHandler SMethod_TextDocumentSemanticTokensFull $ \_req resp -> do - let tokens = makeSemanticTokens defaultSemanticTokensLegend [SemanticTokenAbsolute 0 1 2 SemanticTokenTypes_Type []] + let tokens = makeSemanticTokens defaultSemanticTokensLegend Nothing (Rope.fromText "") [SemanticTokenAbsolute 0 1 2 SemanticTokenTypes_Type []] case tokens of Left t -> resp $ Left $ TResponseError (InR ErrorCodes_InternalError) t Nothing Right tokens -> resp $ Right $ InL tokens diff --git a/lsp-types/lsp-types.cabal b/lsp-types/lsp-types.cabal index 31d5c647..ebbfa30f 100644 --- a/lsp-types/lsp-types.cabal +++ b/lsp-types/lsp-types.cabal @@ -79,6 +79,7 @@ library , network-uri ^>=2.6 , prettyprinter ^>=1.7 , row-types ^>=1.0 + , text-rope ^>=0.3 , safe ^>=0.3 , some ^>=1.0 , template-haskell >=2.7 && <2.24 @@ -666,6 +667,7 @@ test-suite lsp-types-test , prettyprinter , QuickCheck , quickcheck-instances + , text-rope , text build-tool-depends: hspec-discover:hspec-discover diff --git a/lsp-types/src/Language/LSP/Protocol/Types/SemanticTokens.hs b/lsp-types/src/Language/LSP/Protocol/Types/SemanticTokens.hs index a66372b5..9c33ab6b 100644 --- a/lsp-types/src/Language/LSP/Protocol/Types/SemanticTokens.hs +++ b/lsp-types/src/Language/LSP/Protocol/Types/SemanticTokens.hs @@ -7,10 +7,13 @@ module Language.LSP.Protocol.Types.SemanticTokens where import Data.Text (Text) import Control.Monad.Except +import Data.Text.Utf16.Rope.Mixed (Rope) +import Data.Text.Utf16.Rope.Mixed qualified as Rope import Language.LSP.Protocol.Internal.Types.SemanticTokenModifiers import Language.LSP.Protocol.Internal.Types.SemanticTokenTypes import Language.LSP.Protocol.Internal.Types.SemanticTokens +import Language.LSP.Protocol.Internal.Types.SemanticTokensClientCapabilities import Language.LSP.Protocol.Internal.Types.SemanticTokensDelta import Language.LSP.Protocol.Internal.Types.SemanticTokensEdit import Language.LSP.Protocol.Internal.Types.SemanticTokensLegend @@ -18,6 +21,7 @@ import Language.LSP.Protocol.Types.Common import Language.LSP.Protocol.Types.LspEnum import Data.Algorithm.Diff qualified as Diff +import Data.List qualified import Data.Bits qualified as Bits import Data.DList qualified as DList import Data.Foldable hiding ( @@ -171,12 +175,114 @@ computeEdits l r = DList.toList $ go 0 Nothing (Diff.getGroupedDiff l r) mempty let bothCount = fromIntegral $ Prelude.length bs in go (ix + bothCount) Nothing rest (acc <> DList.fromList (maybeToList e)) --- | Convenience method for making a 'SemanticTokens' from a list of 'SemanticTokenAbsolute's. An error may be returned if +{- | Split tokens that span multiple lines into separate tokens for each line. +Uses the document rope to accurately determine line break positions. +Rope provides O(log n) line access, making this efficient even for large documents. +-} +splitMultilineTokens :: Rope -> [SemanticTokenAbsolute] -> [SemanticTokenAbsolute] +splitMultilineTokens docRope = concatMap splitSingleToken + where + -- Get the UTF-16 length of a specific line (O(log n) with Rope) + getLineLength :: UInt -> UInt + getLineLength lineNum = + case Rope.splitAtLine (fromIntegral lineNum) docRope of + (_, suffix) -> case Rope.splitAtLine 1 suffix of + (lineRope, _) -> fromIntegral $ Rope.utf16Length lineRope + + splitSingleToken :: SemanticTokenAbsolute -> [SemanticTokenAbsolute] + splitSingleToken token@(SemanticTokenAbsolute line startChar len tokenType tokenMods) = + if len == 0 then [token] + else + let endChar = startChar + len + lineLen = getLineLength line + extendsToNextLine = endChar > lineLen + + splitAcrossLines :: UInt -> UInt -> UInt -> [SemanticTokenAbsolute] + splitAcrossLines currentLine currentStartChar remainingLen + | remainingLen == 0 = [] + | otherwise = + let currentLineLen = getLineLength currentLine + -- If we're past the end of the document, stop + charsOnThisLine = if currentLineLen == 0 && currentStartChar == 0 + then remainingLen -- Include remaining length on this phantom line + else if currentStartChar >= currentLineLen + then 0 + else min remainingLen (currentLineLen - currentStartChar) + newToken = SemanticTokenAbsolute currentLine currentStartChar charsOnThisLine tokenType tokenMods + in if charsOnThisLine > 0 + then newToken : splitAcrossLines (currentLine + 1) 0 (remainingLen - charsOnThisLine) + else [] -- Stop if we can't make progress + + in if extendsToNextLine + then splitAcrossLines line startChar len + else [token] + +{- | Resolve overlapping tokens by splitting them into non-overlapping segments. +When tokens overlap, the second token's type is preserved in the overlap region. +-} +resolveOverlappingTokens :: [SemanticTokenAbsolute] -> [SemanticTokenAbsolute] +resolveOverlappingTokens tokens = + let sortedTokens = Data.List.sortBy compareTokenPosition tokens + in resolveOverlaps sortedTokens [] + where + compareTokenPosition :: SemanticTokenAbsolute -> SemanticTokenAbsolute -> Ordering + compareTokenPosition (SemanticTokenAbsolute l1 c1 _ _ _) (SemanticTokenAbsolute l2 c2 _ _ _) = + compare (l1, c1) (l2, c2) --- The resulting 'SemanticTokens' lacks a result ID, which must be set separately if you are using that. -makeSemanticTokens :: SemanticTokensLegend -> [SemanticTokenAbsolute] -> Either Text SemanticTokens -makeSemanticTokens legend sts = do - encoded <- encodeTokens legend $ relativizeTokens sts + tokenEnd :: SemanticTokenAbsolute -> (UInt, UInt) + tokenEnd (SemanticTokenAbsolute line startChar len _ _) = (line, startChar + len) + + resolveOverlaps :: [SemanticTokenAbsolute] -> [SemanticTokenAbsolute] -> [SemanticTokenAbsolute] + resolveOverlaps [] acc = reverse acc + resolveOverlaps [token] acc = reverse (token : acc) + resolveOverlaps (token1@(SemanticTokenAbsolute l1 c1 len1 ty1 mods1) : token2@(SemanticTokenAbsolute l2 c2 len2 ty2 mods2) : rest) acc = + let (endL1, endC1) = tokenEnd token1 + overlaps = l2 < endL1 || (l2 == endL1 && c2 < endC1) + in if overlaps + then + let (endL2, endC2) = tokenEnd token2 + beforeOverlap = if l1 < l2 || (l1 == l2 && c1 < c2) + then [SemanticTokenAbsolute l1 c1 (if l1 == l2 then c2 - c1 else len1) ty1 mods1] + else [] + overlapToken = SemanticTokenAbsolute l2 c2 len2 ty2 mods2 + afterOverlap = if endL1 > endL2 || (endL1 == endL2 && endC1 > endC2) + then [SemanticTokenAbsolute endL2 endC2 + (if endL1 == endL2 then endC1 - endC2 else len1) ty1 mods1] + else [] + in resolveOverlaps rest (afterOverlap ++ [overlapToken] ++ beforeOverlap ++ acc) + else resolveOverlaps (token2 : rest) (token1 : acc) + +{- | Transform tokens based on client capabilities. +If multiline tokens are not supported, split tokens that span multiple lines. +If overlapping tokens are not supported, resolve overlapping tokens. +-} +transformTokensForCapabilities :: Maybe SemanticTokensClientCapabilities -> Rope -> [SemanticTokenAbsolute] -> [SemanticTokenAbsolute] +transformTokensForCapabilities Nothing _ tokens = tokens +transformTokensForCapabilities (Just caps) docRope tokens = + let supportsMultiline = fromMaybe False (_multilineTokenSupport caps) + supportsOverlapping = fromMaybe False (_overlappingTokenSupport caps) + tokens' = if supportsMultiline then tokens else splitMultilineTokens docRope tokens + tokens'' = if supportsOverlapping then tokens' else resolveOverlappingTokens tokens' + in tokens'' + +{- | Convenience method for making a 'SemanticTokens' from a list of 'SemanticTokenAbsolute's. +Automatically transforms tokens based on client capabilities: +- Splits multiline tokens if client doesn't support them +- Resolves overlapping tokens if client doesn't support them + +The document rope is required to properly split multiline tokens at actual line boundaries. +Using Rope provides O(log n) line access for efficient processing of large documents. +The resulting 'SemanticTokens' lacks a result ID, which must be set separately if you are using that. +-} +makeSemanticTokens :: + SemanticTokensLegend -> + Maybe SemanticTokensClientCapabilities -> + Rope -> + [SemanticTokenAbsolute] -> + Either Text SemanticTokens +makeSemanticTokens legend caps docRope sts = do + let transformedTokens = transformTokensForCapabilities caps docRope sts + encoded <- encodeTokens legend $ relativizeTokens transformedTokens pure $ SemanticTokens Nothing encoded {- | Convenience function for making a 'SemanticTokensDelta' from a previous and current 'SemanticTokens'. diff --git a/lsp-types/test/SemanticTokensSpec.hs b/lsp-types/test/SemanticTokensSpec.hs index 37262ca0..04606d92 100644 --- a/lsp-types/test/SemanticTokensSpec.hs +++ b/lsp-types/test/SemanticTokensSpec.hs @@ -4,6 +4,7 @@ module SemanticTokensSpec where import Data.Either (isRight) import Data.List (unfoldr) +import Data.Text.Utf16.Rope.Mixed qualified as Rope import Language.LSP.Protocol.Types import Test.Hspec @@ -77,3 +78,72 @@ spec = do it "handles big tokens" $ -- It's a little hard to specify a useful predicate here, the main point is that it should not take too long computeEdits @UInt bigInts bigInts2 `shouldSatisfy` (not . null) + + describe "splitMultilineTokens" $ do + it "splits a token spanning two lines" $ do + let docRope = Rope.fromText "hello\nworld" + token = SemanticTokenAbsolute 0 3 7 SemanticTokenTypes_String [] + result = splitMultilineTokens docRope [token] + result `shouldBe` + -- Note: Rope.splitAtLine includes the newline in the first line, so line 0 has length 6 ("hello\n") and line 1 has length 5 ("world") + [ SemanticTokenAbsolute 0 3 3 SemanticTokenTypes_String [] -- "lo\n" on line 0 (chars 3-5, length 3) + , SemanticTokenAbsolute 1 0 4 SemanticTokenTypes_String [] -- "worl" on line 1 (remaining 4 chars of the token) + ] + + it "doesn't split single-line tokens" $ do + let docRope = Rope.fromText "hello world" + token = SemanticTokenAbsolute 0 0 5 SemanticTokenTypes_String [] + result = splitMultilineTokens docRope [token] + result `shouldBe` [token] + + it "handles empty tokens" $ do + let docRope = Rope.fromText "test" + token = SemanticTokenAbsolute 0 0 0 SemanticTokenTypes_String [] + result = splitMultilineTokens docRope [token] + result `shouldBe` [token] + + describe "resolveOverlappingTokens" $ do + it "splits overlapping tokens correctly" $ do + let token1 = SemanticTokenAbsolute 0 0 10 SemanticTokenTypes_String [] + token2 = SemanticTokenAbsolute 0 5 3 SemanticTokenTypes_Variable [] + result = resolveOverlappingTokens [token1, token2] + result `shouldBe` + [ SemanticTokenAbsolute 0 0 5 SemanticTokenTypes_String [] -- before overlap + , SemanticTokenAbsolute 0 5 3 SemanticTokenTypes_Variable [] -- overlap (token2 wins) + , SemanticTokenAbsolute 0 8 2 SemanticTokenTypes_String [] -- after overlap + ] + + it "doesn't modify non-overlapping tokens" $ do + let token1 = SemanticTokenAbsolute 0 0 5 SemanticTokenTypes_String [] + token2 = SemanticTokenAbsolute 0 10 3 SemanticTokenTypes_Variable [] + result = resolveOverlappingTokens [token1, token2] + result `shouldBe` [token1, token2] + + describe "makeSemanticTokens with capabilities" $ do + it "splits multiline when not supported" $ do + let legend = defaultSemanticTokensLegend + caps = Just $ SemanticTokensClientCapabilities + Nothing + (ClientSemanticTokensRequestOptions Nothing Nothing) + [] [] [] + Nothing + (Just False) -- multilineTokenSupport = False + Nothing Nothing + docRope = Rope.fromText "ab\ncd" + tokens = [SemanticTokenAbsolute 0 0 5 SemanticTokenTypes_String []] + result = makeSemanticTokens legend caps docRope tokens + isRight result `shouldBe` True + + it "preserves multiline when supported" $ do + let legend = defaultSemanticTokensLegend + caps = Just $ SemanticTokensClientCapabilities + Nothing + (ClientSemanticTokensRequestOptions Nothing Nothing) + [] [] [] + Nothing + (Just True) -- multilineTokenSupport = True + Nothing Nothing + docRope = Rope.fromText "ab\ncd" + tokens = [SemanticTokenAbsolute 0 0 5 SemanticTokenTypes_String []] + result = makeSemanticTokens legend caps docRope tokens + isRight result `shouldBe` True