|
9 | 9 | [read-comment-locations (->* () (input-port?) range-set?)])) |
10 | 10 |
|
11 | 11 |
|
12 | | -(require br-parser-tools/lex |
13 | | - racket/sequence |
| 12 | +(require racket/sequence |
14 | 13 | rebellion/base/comparator |
15 | 14 | rebellion/base/range |
16 | | - rebellion/collection/list |
17 | 15 | rebellion/collection/range-set |
18 | | - rebellion/streaming/reducer |
19 | | - rebellion/streaming/transducer |
20 | | - resyntax/private/syntax-traversal |
21 | | - (prefix-in : br-parser-tools/lex-sre)) |
| 16 | + syntax-color/module-lexer) |
22 | 17 |
|
23 | 18 |
|
24 | 19 | (module+ test |
|
31 | 26 |
|
32 | 27 | (define (read-comment-locations [in (current-input-port)]) |
33 | 28 | (port-count-lines! in) |
34 | | - (define (next!) |
35 | | - (comment-lexer in)) |
36 | | - (transduce (in-producer next! eof) |
37 | | - (mapping srcloc-token-srcloc) |
38 | | - (mapping srcloc-range) |
39 | | - #:into (into-range-set natural<=>))) |
40 | | - |
41 | | - |
42 | | -(define (srcloc-range srcloc) |
43 | | - (define start (sub1 (srcloc-position srcloc))) |
44 | | - (define end (+ start (srcloc-span srcloc))) |
45 | | - (closed-open-range start end #:comparator natural<=>)) |
46 | | - |
47 | | - |
48 | | -(define-tokens racket-tokens (LINE-COMMENT BLOCK-COMMENT)) |
49 | | - |
50 | | - |
51 | | -(define-lex-abbrev racket-line-comment |
52 | | - (concatenation ";" (complement (:: any-string "\n" any-string)) "\n")) |
53 | | - |
54 | | - |
55 | | -(define (build-racket-line-comment lexeme) |
56 | | - (token-LINE-COMMENT (string->immutable-string lexeme))) |
57 | | - |
58 | | - |
59 | | -;; Technically not correct because block comments can be nested. |
60 | | -(define-lex-abbrev racket-block-comment |
61 | | - (concatenation "#|" (complement (:: any-string (:or "#|" "#|") any-string)) "|#")) |
62 | | - |
63 | | - |
64 | | -(define (build-racket-block-comment lexeme) |
65 | | - (token-BLOCK-COMMENT (string->immutable-string lexeme))) |
66 | | - |
67 | | - |
68 | | -;; This lexer should also read string literals and discard them, so that comment-starting characters |
69 | | -;; inside string literals are ignored. |
70 | | -(define comment-lexer |
71 | | - (lexer-srcloc |
72 | | - [racket-line-comment (build-racket-line-comment lexeme)] |
73 | | - [racket-block-comment (build-racket-block-comment lexeme)] |
74 | | - [any-char (return-without-srcloc (comment-lexer input-port))])) |
| 29 | + (let loop ([ranges '()]) |
| 30 | + (define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f)) |
| 31 | + (cond |
| 32 | + [(eof-object? lexeme) |
| 33 | + (apply range-set ranges)] |
| 34 | + [(equal? type 'comment) |
| 35 | + ;; Convert from 1-indexed positions to 0-indexed |
| 36 | + (define comment-start (sub1 start)) |
| 37 | + (define comment-end-base (sub1 end)) |
| 38 | + ;; For line comments (non-empty lexeme), include the trailing newline |
| 39 | + ;; For block comments (empty lexeme), don't |
| 40 | + (define is-line-comment? (not (equal? lexeme ""))) |
| 41 | + (define pos-before-peek (file-position in)) |
| 42 | + (define-values (next-lexeme next-type next-paren next-start next-end next-backup next-mode) |
| 43 | + (module-lexer in 0 mode)) |
| 44 | + (define comment-end |
| 45 | + (cond |
| 46 | + [(and is-line-comment? |
| 47 | + (equal? next-type 'white-space) |
| 48 | + (equal? next-lexeme "\n")) |
| 49 | + ;; Include the trailing newline for line comments |
| 50 | + (sub1 next-end)] |
| 51 | + [else |
| 52 | + ;; Put the port position back and use the original end |
| 53 | + (file-position in pos-before-peek) |
| 54 | + comment-end-base])) |
| 55 | + (loop (cons (closed-open-range comment-start comment-end #:comparator natural<=>) ranges))] |
| 56 | + [(equal? type 'sexp-comment) |
| 57 | + ;; For expression comments, we need to skip the following s-expression |
| 58 | + (define sexp-start (sub1 start)) |
| 59 | + (define-values (expr-start expr-end) (skip-one-sexp in)) |
| 60 | + ;; Convert expr-end from 1-indexed to 0-indexed |
| 61 | + (define comment-end (if expr-end (sub1 expr-end) (sub1 end))) |
| 62 | + (loop (cons (closed-open-range sexp-start comment-end #:comparator natural<=>) ranges))] |
| 63 | + [else |
| 64 | + (loop ranges)]))) |
| 65 | + |
| 66 | + |
| 67 | +;; Helper to skip one s-expression worth of tokens after a #; comment |
| 68 | +(define (skip-one-sexp in) |
| 69 | + (let loop ([depth 0] |
| 70 | + [seen-non-whitespace? #f] |
| 71 | + [start-pos #f] |
| 72 | + [end-pos #f]) |
| 73 | + (define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f)) |
| 74 | + (cond |
| 75 | + [(eof-object? lexeme) (values start-pos end-pos)] |
| 76 | + [(equal? type 'white-space) (loop depth seen-non-whitespace? start-pos end-pos)] |
| 77 | + [(equal? type 'sexp-comment) |
| 78 | + ;; Another sexp-comment; recursively skip its expression |
| 79 | + (define-values (nested-start nested-end) (skip-one-sexp in)) |
| 80 | + (loop depth #t (or start-pos start) nested-end)] |
| 81 | + [else |
| 82 | + (define is-opener? (and paren (memq paren '(|[| |(| |{|)))) |
| 83 | + (define is-closer? (and paren (memq paren '(|]| |)| |}|)))) |
| 84 | + (define new-depth |
| 85 | + (cond |
| 86 | + [is-opener? (add1 depth)] |
| 87 | + [is-closer? (sub1 depth)] |
| 88 | + [else depth])) |
| 89 | + (define new-start (or start-pos start)) |
| 90 | + (cond |
| 91 | + ;; If this is a non-paren token and we haven't seen anything yet, consume just this token |
| 92 | + [(and (not seen-non-whitespace?) (= depth 0) (not paren)) |
| 93 | + (values new-start end)] |
| 94 | + ;; If we just closed all parens (depth went from 1 to 0), we're done |
| 95 | + [(and (= new-depth 0) is-closer? (= depth 1)) |
| 96 | + (values new-start end)] |
| 97 | + ;; Otherwise, continue |
| 98 | + [else (loop new-depth #t new-start end)])]))) |
75 | 99 |
|
76 | 100 |
|
77 | 101 | (module+ test |
|
126 | 150 | (test-case "multiple line comments" |
127 | 151 | (define input "; Line 1\n; Line 2\n; Line 3\n") |
128 | 152 | (define expected (range-set (natural-range 0 27))) |
| 153 | + (check-equal? (read-comments-for-test input) expected)) |
| 154 | + |
| 155 | + (test-case "expression comments - simple" |
| 156 | + (define input "#;(foo)\n") |
| 157 | + (define expected (range-set (natural-range 0 7))) |
| 158 | + (check-equal? (read-comments-for-test input) expected)) |
| 159 | + |
| 160 | + (test-case "expression comments - nested" |
| 161 | + (define input "#;(foo (bar baz))\n") |
| 162 | + (define expected (range-set (natural-range 0 17))) |
| 163 | + (check-equal? (read-comments-for-test input) expected)) |
| 164 | + |
| 165 | + (test-case "expression comments - atom" |
| 166 | + (define input "#;atom\n") |
| 167 | + (define expected (range-set (natural-range 0 6))) |
| 168 | + (check-equal? (read-comments-for-test input) expected)) |
| 169 | + |
| 170 | + (test-case "expression comments - double" |
| 171 | + (define input "#; #; (foo) (bar)\n") |
| 172 | + (define expected (range-set (natural-range 0 17))) |
| 173 | + (check-equal? (read-comments-for-test input) expected)) |
| 174 | + |
| 175 | + (test-case "expression comments with code" |
| 176 | + (define input "(define x 1) #;(unused) (define y 2)\n") |
| 177 | + (define expected (range-set (natural-range 13 23))) |
129 | 178 | (check-equal? (read-comments-for-test input) expected)))) |
0 commit comments