@@ -44,6 +44,18 @@ module Language.LSP.VFS
44
44
, persistFileVFS
45
45
, closeVFS
46
46
47
+ -- * Positions and transformations
48
+ , CodePointPosition (.. )
49
+ , line
50
+ , character
51
+ , codePointPositionToPosition
52
+ , positionToCodePointPosition
53
+ , CodePointRange (.. )
54
+ , start
55
+ , end
56
+ , codePointRangeToRange
57
+ , rangeToCodePointRange
58
+
47
59
-- * manipulating the file contents
48
60
, rangeLinesFromVfs
49
61
, PosPrefixInfo (.. )
@@ -69,9 +81,10 @@ import Data.Ord
69
81
import qualified Data.HashMap.Strict as HashMap
70
82
import qualified Data.Map.Strict as Map
71
83
import Data.Maybe
84
+ import qualified Data.Text.Rope as URope
72
85
import Data.Text.Utf16.Rope ( Rope )
73
86
import qualified Data.Text.Utf16.Rope as Rope
74
- import Data.Text.Prettyprint.Doc
87
+ import Data.Text.Prettyprint.Doc hiding ( line )
75
88
import qualified Language.LSP.Types as J
76
89
import qualified Language.LSP.Types.Lens as J
77
90
import System.FilePath
@@ -346,6 +359,133 @@ changeChars logger str start finish new = do
346
359
347
360
-- ---------------------------------------------------------------------
348
361
362
+ -- | A position, like a 'J.Position', but where the offsets in the line are measured in
363
+ -- Unicode code points instead of UTF-16 code units.
364
+ data CodePointPosition =
365
+ CodePointPosition
366
+ { -- | Line position in a document (zero-based).
367
+ _line :: J. UInt
368
+ -- | Character offset on a line in a document in *code points* (zero-based).
369
+ , _character :: J. UInt
370
+ } deriving (Show , Read , Eq , Ord )
371
+
372
+ -- | A range, like a 'J.Range', but where the offsets in the line are measured in
373
+ -- Unicode code points instead of UTF-16 code units.
374
+ data CodePointRange =
375
+ CodePointRange
376
+ { _start :: CodePointPosition -- ^ The range's start position.
377
+ , _end :: CodePointPosition -- ^ The range's end position.
378
+ } deriving (Show , Read , Eq , Ord )
379
+
380
+ makeFieldsNoPrefix ''CodePointPosition
381
+ makeFieldsNoPrefix ''CodePointRange
382
+
383
+ {- Note [Converting between code points and code units]
384
+ This is inherently a somewhat expensive operation, but we take some care to minimize the cost.
385
+ In particular, we use the good asymptotics of 'Rope' to our advantage:
386
+ - We extract the single line that we are interested in in time logarithmic in the number of lines.
387
+ - We then split the line at the given position, and check how long the prefix is, which takes
388
+ linear time in the length of the (single) line.
389
+
390
+ We also may need to convert the line back and forth between ropes with different indexing. Again
391
+ this is linear time in the length of the line.
392
+
393
+ So the overall process is logarithmic in the number of lines, and linear in the length of the specific
394
+ line. Which is okay-ish, so long as we don't have very long lines.
395
+ -}
396
+
397
+ -- | Extracts a specific line from a 'Rope.Rope'.
398
+ -- Logarithmic in the number of lines.
399
+ extractLine :: Rope. Rope -> Word -> Maybe Rope. Rope
400
+ extractLine rope l = do
401
+ -- Check for the line being out of bounds
402
+ let lastLine = Rope. posLine $ Rope. lengthAsPosition rope
403
+ guard $ l <= lastLine
404
+
405
+ let (_, suffix) = Rope. splitAtLine l rope
406
+ (prefix, _) = Rope. splitAtLine 1 suffix
407
+ pure prefix
408
+
409
+ -- | Translate a code-point offset into a code-unit offset.
410
+ -- Linear in the length of the rope.
411
+ codePointOffsetToCodeUnitOffset :: URope. Rope -> Word -> Maybe Word
412
+ codePointOffsetToCodeUnitOffset rope offset = do
413
+ -- Check for the position being out of bounds
414
+ guard $ offset <= URope. length rope
415
+ -- Split at the given position in *code points*
416
+ let (prefix, _) = URope. splitAt offset rope
417
+ -- Convert the prefix to a rope using *code units*
418
+ utf16Prefix = Rope. fromText $ URope. toText prefix
419
+ -- Get the length of the prefix in *code units*
420
+ pure $ Rope. length utf16Prefix
421
+
422
+ -- | Translate a UTF-16 code-unit offset into a code-point offset.
423
+ -- Linear in the length of the rope.
424
+ codeUnitOffsetToCodePointOffset :: Rope. Rope -> Word -> Maybe Word
425
+ codeUnitOffsetToCodePointOffset rope offset = do
426
+ -- Check for the position being out of bounds
427
+ guard $ offset <= Rope. length rope
428
+ -- Split at the given position in *code units*
429
+ (prefix, _) <- Rope. splitAt offset rope
430
+ -- Convert the prefixto a rope using *code points*
431
+ let utfPrefix = URope. fromText $ Rope. toText prefix
432
+ -- Get the length of the prefix in *code points*
433
+ pure $ URope. length utfPrefix
434
+
435
+ -- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
436
+ --
437
+ -- Will return 'Nothing' if the requested position is out of bounds of the document.
438
+ --
439
+ -- Logarithmic in the number of lines in the document, and linear in the length of the line containing
440
+ -- the position.
441
+ codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J. Position
442
+ codePointPositionToPosition vFile (CodePointPosition l cpc) = do
443
+ -- See Note [Converting between code points and code units]
444
+ let text = _file_text vFile
445
+ utf16Line <- extractLine text (fromIntegral l)
446
+ -- Convert the line a rope using *code points*
447
+ let utfLine = URope. fromText $ Rope. toText utf16Line
448
+
449
+ cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc)
450
+ pure $ J. Position l (fromIntegral cuc)
451
+
452
+ -- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file.
453
+ --
454
+ -- Will return 'Nothing' if any of the positions are out of bounds of the document.
455
+ --
456
+ -- Logarithmic in the number of lines in the document, and linear in the length of the lines containing
457
+ -- the positions.
458
+ codePointRangeToRange :: VirtualFile -> CodePointRange -> Maybe J. Range
459
+ codePointRangeToRange vFile (CodePointRange b e) =
460
+ J. Range <$> codePointPositionToPosition vFile b <*> codePointPositionToPosition vFile e
461
+
462
+ -- | Given a virtual file, translate a 'J.Position' in that file into a 'CodePointPosition' in that file.
463
+ --
464
+ -- Will return 'Nothing' if the requested position lies inside a code point, or if it is out of bounds of the document.
465
+ --
466
+ -- Logarithmic in the number of lines in the document, and linear in the length of the line containing
467
+ -- the position.
468
+ positionToCodePointPosition :: VirtualFile -> J. Position -> Maybe CodePointPosition
469
+ positionToCodePointPosition vFile (J. Position l cuc) = do
470
+ -- See Note [Converting between code points and code units]
471
+ let text = _file_text vFile
472
+ utf16Line <- extractLine text (fromIntegral l)
473
+
474
+ cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc)
475
+ pure $ CodePointPosition l (fromIntegral cpc)
476
+
477
+ -- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file.
478
+ --
479
+ -- Will return 'Nothing' if any of the positions are out of bounds of the document.
480
+ --
481
+ -- Logarithmic in the number of lines in the document, and linear in the length of the lines containing
482
+ -- the positions.
483
+ rangeToCodePointRange :: VirtualFile -> J. Range -> Maybe CodePointRange
484
+ rangeToCodePointRange vFile (J. Range b e) =
485
+ CodePointRange <$> positionToCodePointPosition vFile b <*> positionToCodePointPosition vFile e
486
+
487
+ -- ---------------------------------------------------------------------
488
+
349
489
-- TODO:AZ:move this to somewhere sane
350
490
-- | Describes the line at the current cursor position
351
491
data PosPrefixInfo = PosPrefixInfo
0 commit comments