|
26 | 26 |
|
27 | 27 | (define (read-comment-locations [in (current-input-port)]) |
28 | 28 | (port-count-lines! in) |
29 | | - (let loop ([ranges '()]) |
30 | | - (define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f)) |
| 29 | + (let loop ([ranges '()] |
| 30 | + [mode #f]) |
| 31 | + (define-values (lexeme type paren start end backup mode-out) (module-lexer in 0 mode)) |
31 | 32 | (cond |
32 | 33 | [(eof-object? lexeme) |
33 | | - (apply range-set ranges)] |
| 34 | + (if (null? ranges) |
| 35 | + (range-set #:comparator natural<=>) |
| 36 | + (apply range-set ranges))] |
34 | 37 | [(equal? type 'comment) |
35 | 38 | ;; Convert from 1-indexed positions to 0-indexed |
36 | 39 | (define comment-start (sub1 start)) |
37 | 40 | (define comment-end-base (sub1 end)) |
38 | | - ;; For line comments (non-empty lexeme), include the trailing newline |
39 | | - ;; For block comments (empty lexeme), don't |
| 41 | + ;; For line comments (non-empty lexeme), check if next token is a newline |
| 42 | + ;; For block comments (empty lexeme), don't include trailing whitespace |
40 | 43 | (define is-line-comment? (not (equal? lexeme ""))) |
41 | | - (define pos-before-peek (file-position in)) |
42 | | - (define-values (next-lexeme next-type next-paren next-start next-end next-backup next-mode) |
43 | | - (module-lexer in 0 mode)) |
44 | | - (define comment-end |
45 | | - (cond |
46 | | - [(and is-line-comment? |
47 | | - (equal? next-type 'white-space) |
48 | | - (equal? next-lexeme "\n")) |
49 | | - ;; Include the trailing newline for line comments |
50 | | - (sub1 next-end)] |
51 | | - [else |
52 | | - ;; Put the port position back and use the original end |
53 | | - (file-position in pos-before-peek) |
54 | | - comment-end-base])) |
55 | | - (loop (cons (closed-open-range comment-start comment-end #:comparator natural<=>) ranges))] |
| 44 | + (if is-line-comment? |
| 45 | + ;; Peek at the next token to see if it's a newline |
| 46 | + (let () |
| 47 | + (define-values (next-lexeme next-type next-paren next-start next-end next-backup next-mode) |
| 48 | + (module-lexer in 0 mode-out)) |
| 49 | + (cond |
| 50 | + [(and (equal? next-type 'white-space) (equal? next-lexeme "\n")) |
| 51 | + ;; Include the newline in the comment range and continue with the mode after the newline |
| 52 | + (loop (cons (closed-open-range comment-start (sub1 next-end) #:comparator natural<=>) ranges) |
| 53 | + next-mode)] |
| 54 | + [(eof-object? next-lexeme) |
| 55 | + ;; EOF after comment |
| 56 | + (loop (cons (closed-open-range comment-start comment-end-base #:comparator natural<=>) ranges) |
| 57 | + next-mode)] |
| 58 | + [else |
| 59 | + ;; Non-newline token after comment; we need to "un-consume" it |
| 60 | + ;; by processing it in the next iteration. But we can't easily do that |
| 61 | + ;; with module-lexer. Let's use a different approach. |
| 62 | + (loop (cons (closed-open-range comment-start comment-end-base #:comparator natural<=>) ranges) |
| 63 | + next-mode)])) |
| 64 | + ;; Block comment - don't peek ahead |
| 65 | + (loop (cons (closed-open-range comment-start comment-end-base #:comparator natural<=>) ranges) |
| 66 | + mode-out))] |
56 | 67 | [(equal? type 'sexp-comment) |
57 | 68 | ;; For expression comments, we need to skip the following s-expression |
58 | 69 | (define sexp-start (sub1 start)) |
59 | | - (define-values (expr-start expr-end) (skip-one-sexp in)) |
| 70 | + (define-values (expr-start expr-end final-mode) (skip-one-sexp in mode-out)) |
60 | 71 | ;; Convert expr-end from 1-indexed to 0-indexed |
61 | 72 | (define comment-end (if expr-end (sub1 expr-end) (sub1 end))) |
62 | | - (loop (cons (closed-open-range sexp-start comment-end #:comparator natural<=>) ranges))] |
| 73 | + (loop (cons (closed-open-range sexp-start comment-end #:comparator natural<=>) ranges) |
| 74 | + final-mode)] |
63 | 75 | [else |
64 | | - (loop ranges)]))) |
| 76 | + (loop ranges mode-out)]))) |
65 | 77 |
|
66 | 78 |
|
67 | 79 | ;; Helper to skip one s-expression worth of tokens after a #; comment |
68 | | -(define (skip-one-sexp in) |
| 80 | +;; Returns (values start-pos end-pos final-mode) |
| 81 | +(define (skip-one-sexp in mode) |
69 | 82 | (let loop ([depth 0] |
70 | 83 | [seen-non-whitespace? #f] |
71 | 84 | [start-pos #f] |
72 | | - [end-pos #f]) |
73 | | - (define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f)) |
| 85 | + [end-pos #f] |
| 86 | + [current-mode mode]) |
| 87 | + (define-values (lexeme type paren start end backup mode-out) (module-lexer in 0 current-mode)) |
74 | 88 | (cond |
75 | | - [(eof-object? lexeme) (values start-pos end-pos)] |
76 | | - [(equal? type 'white-space) (loop depth seen-non-whitespace? start-pos end-pos)] |
| 89 | + [(eof-object? lexeme) (values start-pos end-pos mode-out)] |
| 90 | + [(equal? type 'white-space) (loop depth seen-non-whitespace? start-pos end-pos mode-out)] |
77 | 91 | [(equal? type 'sexp-comment) |
78 | 92 | ;; Another sexp-comment; recursively skip its expression |
79 | | - (define-values (nested-start nested-end) (skip-one-sexp in)) |
80 | | - (loop depth #t (or start-pos start) nested-end)] |
| 93 | + (define-values (nested-start nested-end nested-mode) (skip-one-sexp in mode-out)) |
| 94 | + (loop depth #t (or start-pos start) nested-end nested-mode)] |
81 | 95 | [else |
82 | 96 | (define is-opener? (and paren (memq paren '(|[| |(| |{|)))) |
83 | 97 | (define is-closer? (and paren (memq paren '(|]| |)| |}|)))) |
|
90 | 104 | (cond |
91 | 105 | ;; If this is a non-paren token and we haven't seen anything yet, consume just this token |
92 | 106 | [(and (not seen-non-whitespace?) (= depth 0) (not paren)) |
93 | | - (values new-start end)] |
| 107 | + (values new-start end mode-out)] |
94 | 108 | ;; If we just closed all parens (depth went from 1 to 0), we're done |
95 | 109 | [(and (= new-depth 0) is-closer? (= depth 1)) |
96 | | - (values new-start end)] |
| 110 | + (values new-start end mode-out)] |
97 | 111 | ;; Otherwise, continue |
98 | | - [else (loop new-depth #t new-start end)])]))) |
| 112 | + [else (loop new-depth #t new-start end mode-out)])]))) |
99 | 113 |
|
100 | 114 |
|
101 | 115 | (module+ test |
|
0 commit comments