Skip to content

Commit 20e8b57

Browse files
Copilotjackfirth
andcommitted
Replace custom comment lexer with module-lexer
- Use syntax-color/module-lexer instead of br-parser-tools/lex - Support expression comments (#;) - Add tests for expression comments - All existing tests pass Co-authored-by: jackfirth <[email protected]>
1 parent 08e1e5b commit 20e8b57

File tree

1 file changed

+97
-48
lines changed

1 file changed

+97
-48
lines changed

private/comment-reader.rkt

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,11 @@
99
[read-comment-locations (->* () (input-port?) range-set?)]))
1010

1111

12-
(require br-parser-tools/lex
13-
racket/sequence
12+
(require racket/sequence
1413
rebellion/base/comparator
1514
rebellion/base/range
16-
rebellion/collection/list
1715
rebellion/collection/range-set
18-
rebellion/streaming/reducer
19-
rebellion/streaming/transducer
20-
resyntax/private/syntax-traversal
21-
(prefix-in : br-parser-tools/lex-sre))
16+
syntax-color/module-lexer)
2217

2318

2419
(module+ test
@@ -31,47 +26,76 @@
3126

3227
(define (read-comment-locations [in (current-input-port)])
3328
(port-count-lines! in)
34-
(define (next!)
35-
(comment-lexer in))
36-
(transduce (in-producer next! eof)
37-
(mapping srcloc-token-srcloc)
38-
(mapping srcloc-range)
39-
#:into (into-range-set natural<=>)))
40-
41-
42-
(define (srcloc-range srcloc)
43-
(define start (sub1 (srcloc-position srcloc)))
44-
(define end (+ start (srcloc-span srcloc)))
45-
(closed-open-range start end #:comparator natural<=>))
46-
47-
48-
(define-tokens racket-tokens (LINE-COMMENT BLOCK-COMMENT))
49-
50-
51-
(define-lex-abbrev racket-line-comment
52-
(concatenation ";" (complement (:: any-string "\n" any-string)) "\n"))
53-
54-
55-
(define (build-racket-line-comment lexeme)
56-
(token-LINE-COMMENT (string->immutable-string lexeme)))
57-
58-
59-
;; Technically not correct because block comments can be nested.
60-
(define-lex-abbrev racket-block-comment
61-
(concatenation "#|" (complement (:: any-string (:or "#|" "#|") any-string)) "|#"))
62-
63-
64-
(define (build-racket-block-comment lexeme)
65-
(token-BLOCK-COMMENT (string->immutable-string lexeme)))
66-
67-
68-
;; This lexer should also read string literals and discard them, so that comment-starting characters
69-
;; inside string literals are ignored.
70-
(define comment-lexer
71-
(lexer-srcloc
72-
[racket-line-comment (build-racket-line-comment lexeme)]
73-
[racket-block-comment (build-racket-block-comment lexeme)]
74-
[any-char (return-without-srcloc (comment-lexer input-port))]))
29+
(let loop ([ranges '()])
30+
(define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f))
31+
(cond
32+
[(eof-object? lexeme)
33+
(apply range-set ranges)]
34+
[(equal? type 'comment)
35+
;; Convert from 1-indexed positions to 0-indexed
36+
(define comment-start (sub1 start))
37+
(define comment-end-base (sub1 end))
38+
;; For line comments (non-empty lexeme), include the trailing newline
39+
;; For block comments (empty lexeme), don't
40+
(define is-line-comment? (not (equal? lexeme "")))
41+
(define pos-before-peek (file-position in))
42+
(define-values (next-lexeme next-type next-paren next-start next-end next-backup next-mode)
43+
(module-lexer in 0 mode))
44+
(define comment-end
45+
(cond
46+
[(and is-line-comment?
47+
(equal? next-type 'white-space)
48+
(equal? next-lexeme "\n"))
49+
;; Include the trailing newline for line comments
50+
(sub1 next-end)]
51+
[else
52+
;; Put the port position back and use the original end
53+
(file-position in pos-before-peek)
54+
comment-end-base]))
55+
(loop (cons (closed-open-range comment-start comment-end #:comparator natural<=>) ranges))]
56+
[(equal? type 'sexp-comment)
57+
;; For expression comments, we need to skip the following s-expression
58+
(define sexp-start (sub1 start))
59+
(define-values (expr-start expr-end) (skip-one-sexp in))
60+
;; Convert expr-end from 1-indexed to 0-indexed
61+
(define comment-end (if expr-end (sub1 expr-end) (sub1 end)))
62+
(loop (cons (closed-open-range sexp-start comment-end #:comparator natural<=>) ranges))]
63+
[else
64+
(loop ranges)])))
65+
66+
67+
;; Helper to skip one s-expression worth of tokens after a #; comment
68+
(define (skip-one-sexp in)
69+
(let loop ([depth 0]
70+
[seen-non-whitespace? #f]
71+
[start-pos #f]
72+
[end-pos #f])
73+
(define-values (lexeme type paren start end backup mode) (module-lexer in 0 #f))
74+
(cond
75+
[(eof-object? lexeme) (values start-pos end-pos)]
76+
[(equal? type 'white-space) (loop depth seen-non-whitespace? start-pos end-pos)]
77+
[(equal? type 'sexp-comment)
78+
;; Another sexp-comment; recursively skip its expression
79+
(define-values (nested-start nested-end) (skip-one-sexp in))
80+
(loop depth #t (or start-pos start) nested-end)]
81+
[else
82+
(define is-opener? (and paren (memq paren '(|[| |(| |{|))))
83+
(define is-closer? (and paren (memq paren '(|]| |)| |}|))))
84+
(define new-depth
85+
(cond
86+
[is-opener? (add1 depth)]
87+
[is-closer? (sub1 depth)]
88+
[else depth]))
89+
(define new-start (or start-pos start))
90+
(cond
91+
;; If this is a non-paren token and we haven't seen anything yet, consume just this token
92+
[(and (not seen-non-whitespace?) (= depth 0) (not paren))
93+
(values new-start end)]
94+
;; If we just closed all parens (depth went from 1 to 0), we're done
95+
[(and (= new-depth 0) is-closer? (= depth 1))
96+
(values new-start end)]
97+
;; Otherwise, continue
98+
[else (loop new-depth #t new-start end)])])))
7599

76100

77101
(module+ test
@@ -126,4 +150,29 @@
126150
(test-case "multiple line comments"
127151
(define input "; Line 1\n; Line 2\n; Line 3\n")
128152
(define expected (range-set (natural-range 0 27)))
153+
(check-equal? (read-comments-for-test input) expected))
154+
155+
(test-case "expression comments - simple"
156+
(define input "#;(foo)\n")
157+
(define expected (range-set (natural-range 0 7)))
158+
(check-equal? (read-comments-for-test input) expected))
159+
160+
(test-case "expression comments - nested"
161+
(define input "#;(foo (bar baz))\n")
162+
(define expected (range-set (natural-range 0 17)))
163+
(check-equal? (read-comments-for-test input) expected))
164+
165+
(test-case "expression comments - atom"
166+
(define input "#;atom\n")
167+
(define expected (range-set (natural-range 0 6)))
168+
(check-equal? (read-comments-for-test input) expected))
169+
170+
(test-case "expression comments - double"
171+
(define input "#; #; (foo) (bar)\n")
172+
(define expected (range-set (natural-range 0 17)))
173+
(check-equal? (read-comments-for-test input) expected))
174+
175+
(test-case "expression comments with code"
176+
(define input "(define x 1) #;(unused) (define y 2)\n")
177+
(define expected (range-set (natural-range 13 23)))
129178
(check-equal? (read-comments-for-test input) expected))))

0 commit comments

Comments
 (0)