@@ -74,10 +74,9 @@ import Data.Ord
74
74
import qualified Data.HashMap.Strict as HashMap
75
75
import qualified Data.Map.Strict as Map
76
76
import Data.Maybe
77
- import qualified Data.Text.Lines as ULines
77
+ import qualified Data.Text.Rope as URope
78
78
import Data.Text.Utf16.Rope ( Rope )
79
79
import qualified Data.Text.Utf16.Rope as Rope
80
- import qualified Data.Text.Utf16.Lines as Lines
81
80
import Data.Text.Prettyprint.Doc
82
81
import qualified Language.LSP.Types as J
83
82
import qualified Language.LSP.Types.Lens as J
@@ -363,32 +362,71 @@ data CodePointPosition =
363
362
, _character :: J. UInt
364
363
} deriving (Show , Read , Eq , Ord )
365
364
365
+ {- Note [Converting between code points and code units]
366
+ This is inherently a somewhat expensive operation, but we take some care to minimize the cost.
367
+ In particular, we use the good asymptotics of 'Rope' to our advantage:
368
+ - We extract the single line that we are interested in in time logarithmic in the number of lines.
369
+ - We then split the line at the given position, and check how long the prefix is, which takes
370
+ linear time in the length of the (single) line.
371
+
372
+ We also may need to convert the line back and forth between ropes with different indexing. Again
373
+ this is linear time in the length of the line.
374
+
375
+ So the overall process is logarithmic in the number of lines, and linear in the length of the specific
376
+ line. Which is okay-ish, so long as we don't have very long lines.
377
+ -}
378
+
379
+ -- | Extracts a specific line from a 'Rope.Rope'.
380
+ -- Logarithmic in the number of lines.
381
+ extractLine :: Rope. Rope -> Word -> Rope. Rope
382
+ extractLine rope l =
383
+ let (_, suffix) = Rope. splitAtLine l rope
384
+ (prefix, _) = Rope. splitAtLine 1 suffix
385
+ in prefix
386
+
366
387
-- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
367
388
--
389
+ -- If the position is out of bounds (i.e. beyond the last line or the last character in a line), then the
390
+ -- greatest valid position less than that will be returned.
391
+ --
368
392
-- We need the file itself because this requires translating between code points and code units.
369
393
codePointPositionToPosition :: VirtualFile -> CodePointPosition -> J. Position
370
- codePointPositionToPosition vFile (CodePointPosition cpl cpc) =
394
+ codePointPositionToPosition vFile (CodePointPosition l cpc) =
395
+ -- See Note [Converting between code points and code units]
371
396
let text = _file_text vFile
372
- lines = Rope. toTextLines text
397
+ utf16Line = extractLine text (fromIntegral l)
398
+
399
+ -- Convert the line a rope using *code points*
400
+ utfLine = URope. fromText $ Rope. toText utf16Line
373
401
-- Split at the given position in *code points*
374
- (prefix, _) = ULines. splitAtPosition (ULines. Position (fromIntegral cpl) (fromIntegral cpc)) lines
402
+ (utfLinePrefix, _) = URope. splitAt (fromIntegral cpc) utfLine
403
+ -- Convert the prefix to a rope using *code units*
404
+ utf16LinePrefix = Rope. fromText $ URope. toText utfLinePrefix
375
405
-- Get the length of the prefix in *code units*
376
- ( Lines. Position cul cuc) = Lines. lengthAsPosition prefix
377
- in J. Position ( fromIntegral cul) (fromIntegral cuc)
406
+ cuc = Rope. length utf16LinePrefix
407
+ in J. Position l (fromIntegral cuc)
378
408
379
409
-- | Given a virtual file, translate a 'J.Position' in that file into a 'CodePointPosition' in that file.
410
+ --
380
411
-- May fail if the requested position lies inside a code point.
381
412
--
413
+ -- If the position is out of bounds (i.e. beyond the last line or the last character in a line), then the
414
+ -- greatest valid position less than that will be returned.
415
+ --
382
416
-- We need the file itself because this requires translating between code unit and code points.
383
417
positionToCodePointPosition :: VirtualFile -> J. Position -> Maybe CodePointPosition
384
- positionToCodePointPosition vFile (J. Position cul cuc) = do
418
+ positionToCodePointPosition vFile (J. Position l cuc) = do
419
+ -- See Note [Converting between code points and code units]
385
420
let text = _file_text vFile
386
- lines = Rope. toTextLines text
387
- -- Split at the given location in *code units*
388
- (prefix, _) <- Lines. splitAtPosition (Lines. Position (fromIntegral cul) (fromIntegral cuc)) lines
389
- -- Get the length of the prefix in *code points*
390
- let (ULines. Position cpl cpc) = ULines. lengthAsPosition prefix
391
- pure $ CodePointPosition (fromIntegral cpl) (fromIntegral cpc)
421
+ utf16Line = extractLine text (fromIntegral l)
422
+
423
+ -- Split at the given position in *code units*
424
+ (utf16LinePrefix, _) <- Rope. splitAt (fromIntegral cuc) utf16Line
425
+ -- Convert the prefixto a rope using *code points*
426
+ let utfLinePrefix = URope. fromText $ Rope. toText utf16LinePrefix
427
+ -- Get the length of the prefix in *code points*
428
+ cpc = URope. length utfLinePrefix
429
+ pure $ CodePointPosition l (fromIntegral cpc)
392
430
393
431
-- ---------------------------------------------------------------------
394
432
0 commit comments