Skip to content

Commit 75da3b0

Browse files
committed
Merge pull request #89 from MicahChalmer/fix-slow-angle-bracket-matching
Fix slow angle bracket matching
2 parents ee564d5 + ec3855f commit 75da3b0

File tree

2 files changed

+137
-97
lines changed

2 files changed

+137
-97
lines changed

rust-mode-tests.el

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,41 @@ this_is_not_a_string();)"
11101110
(should (equal nil (get-text-property 28 'face))) ;; Semicolon--should not be part of the string
11111111
))
11121112

1113+
(ert-deftest font-lock-runaway-raw-string ()
1114+
(rust-test-font-lock
1115+
"const Z = r#\"my raw string\";\n// oops this is still in the string"
1116+
'("const" font-lock-keyword-face
1117+
"Z" font-lock-type-face
1118+
"r#\"my raw string\";\n// oops this is still in the string" font-lock-string-face))
1119+
)
1120+
1121+
(ert-deftest font-lock-recognize-closing-raw-string ()
1122+
(with-temp-buffer
1123+
(rust-mode)
1124+
(insert "const foo = r##\"
1125+
1...............................................50
1126+
1...............................................50
1127+
1...............................................50
1128+
1...............195-->\"; let ...................50
1129+
1...............................................50
1130+
1...............................................50
1131+
1...............................................50
1132+
1...............................................50
1133+
1...............................................50
1134+
1......................500......................50
1135+
\"#;
1136+
")
1137+
(font-lock-fontify-buffer)
1138+
(goto-char 530)
1139+
(insert "#")
1140+
;; We have now closed the raw string. Check that the whole string is
1141+
;; recognized after the change
1142+
(font-lock-after-change-function (1- (point)) (point) 0)
1143+
(should (equal 'font-lock-string-face (get-text-property 195 'face))) ;; The "let"
1144+
(should (equal 'font-lock-string-face (get-text-property 500 'face))) ;; The "500"
1145+
(should (equal nil (get-text-property 531 'face))) ;; The second ";"
1146+
))
1147+
11131148
;;; Documentation comments
11141149

11151150
(ert-deftest font-lock-doc-line-comment-parent ()

rust-mode.el

Lines changed: 102 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -428,43 +428,43 @@
428428
part of it. Adjusts to include the r[#] of a raw string as
429429
well."
430430

431-
(let ((orig-beg font-lock-beg)
432-
(orig-end font-lock-end))
433-
(cond
434-
;; If we are not syntactically fontified yet, we cannot correctly cover
435-
;; anything less than the full buffer. The syntactic fontification
436-
;; modifies the syntax, so until it's done we can't use the syntax to
437-
;; determine what to fontify.
438-
((< (or font-lock-syntactically-fontified 0) font-lock-end)
439-
(setq font-lock-beg 1)
440-
(setq font-lock-end (buffer-end 1)))
441-
442-
((let* ((beg-ppss (syntax-ppss font-lock-beg))
443-
(beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss)))
444-
(beg-in-str (nth 3 beg-ppss))
445-
(end-ppss (syntax-ppss font-lock-end))
446-
(end-in-str (nth 3 end-ppss)))
447-
448-
(when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
431+
(save-excursion
432+
(let ((orig-beg font-lock-beg)
433+
(orig-end font-lock-end))
434+
435+
(let*
436+
;; It's safe to call `syntax-ppss' here on positions that are
437+
;; already syntactically fontified
438+
((beg-ppss (syntax-ppss font-lock-beg))
439+
(beg-in-cmnt (and beg-ppss (nth 4 beg-ppss) (nth 8 beg-ppss)))
440+
(beg-in-str (and beg-ppss (nth 3 beg-ppss) (nth 8 beg-ppss))))
441+
442+
(when (and beg-in-str (>= font-lock-beg beg-in-str))
449443
(setq font-lock-beg (nth 8 beg-ppss))
450444
(while (equal ?# (char-before font-lock-beg))
451445
(setq font-lock-beg (1- font-lock-beg)))
452446
(when (equal ?r (char-before font-lock-beg))
453447
(setq font-lock-beg (1- font-lock-beg))))
454448

455449
(when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt))
456-
(setq font-lock-beg beg-in-cmnt))
457-
458-
(when end-in-str
459-
(save-excursion
460-
(goto-char (nth 8 end-ppss))
461-
(ignore-errors (forward-sexp))
462-
(setq font-lock-end (max font-lock-end (point)))))
463-
)))
464-
465-
(or (/= font-lock-beg orig-beg)
466-
(/= font-lock-end orig-end))
467-
))
450+
(setq font-lock-beg beg-in-cmnt)))
451+
452+
;; We need to make sure that if the region ends inside a raw string, we
453+
;; extend it out past the end of it. But we can't use `syntax-ppss' to
454+
;; detect that, becaue that depends on font-lock already being done, and we
455+
;; are trying to figure out how much to font-lock before that. So we use
456+
;; the regexp directly.
457+
(save-match-data
458+
(goto-char font-lock-beg)
459+
(while (and (< (point) font-lock-end)
460+
(re-search-forward rust-re-non-standard-string (buffer-end 1) t)
461+
(<= (match-beginning 0) font-lock-end))
462+
(setq font-lock-end (max font-lock-end (match-end 0)))
463+
(goto-char (1+ (match-beginning 0)))))
464+
465+
(or (/= font-lock-beg orig-beg)
466+
(/= font-lock-end orig-end))
467+
)))
468468

469469
(defun rust-conditional-re-search-forward (regexp bound condition)
470470
;; Search forward for regexp (with bound). If found, call condition and return the found
@@ -492,77 +492,82 @@
492492
(set-match-data (nth 1 ret-list))
493493
(nth 0 ret-list))))
494494

495+
(defconst rust-re-non-standard-string
496+
(rx
497+
(or
498+
;; Raw string: if it matches, it ends up with the starting character
499+
;; of the string as group 1, any ending backslashes as group 4, and
500+
;; the ending character as either group 5 or group 6.
501+
(seq
502+
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
503+
(group "r")
504+
505+
;; Then either:
506+
(or
507+
;; a sequence at least one "#" (followed by quote). Capture all
508+
;; but the last "#" as group 2 for this case.
509+
(seq (group (* "#")) "#\"")
510+
511+
;; ...or a quote without any "#". Capture it as group 3. This is
512+
;; used later to match the opposite quote only if this capture
513+
;; occurred
514+
(group "\""))
515+
516+
;; The contents of the string:
517+
(*? anything)
518+
519+
;; If there are any backslashes at the end of the string, capture
520+
;; them as group 4 so we can suppress the normal escape syntax
521+
;; parsing:
522+
(group (* "\\"))
523+
524+
;; Then the end of the string--the backreferences ensure that we
525+
;; only match the kind of ending that corresponds to the beginning
526+
;; we had:
527+
(or
528+
;; There were "#"s - capture the last one as group 5 to mark it as
529+
;; the end of the string:
530+
(seq "\"" (backref 2) (group "#"))
531+
532+
;; No "#"s - capture the ending quote (using a backref to group 3,
533+
;; so that we can't match a quote if we had "#"s) as group 6
534+
(group (backref 3))
535+
536+
;; If the raw string wasn't actually closed, go all the way to the end
537+
string-end))
538+
539+
;; Character literal: match the beginning ' of a character literal
540+
;; as group 7, and the ending one as group 8
541+
(seq
542+
(group "'")
543+
(or
544+
(seq
545+
"\\"
546+
(or
547+
(: "U" (= 8 xdigit))
548+
(: "u" (= 4 xdigit))
549+
(: "x" (= 2 xdigit))
550+
(any "'nrt0\"\\")))
551+
(not (any "'\\"))
552+
)
553+
(group "'"))
554+
)
555+
))
556+
495557
(defun rust-look-for-non-standard-string (bound)
496558
;; Find a raw string or character literal, but only if it's not in the middle
497559
;; of another string or a comment.
498560

499-
(let* ((non-standard-str-regexp
500-
(rx
501-
(or
502-
;; Raw string: if it matches, it ends up with the starting character
503-
;; of the string as group 1, any ending backslashes as group 4, and
504-
;; the ending character as either group 5 or group 6.
505-
(seq
506-
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
507-
(group "r")
508-
509-
;; Then either:
510-
(or
511-
;; a sequence at least one "#" (followed by quote). Capture all
512-
;; but the last "#" as group 2 for this case.
513-
(seq (group (* "#")) "#\"")
514-
515-
;; ...or a quote without any "#". Capture it as group 3. This is
516-
;; used later to match the opposite quote only if this capture
517-
;; occurred
518-
(group "\""))
519-
520-
;; The contents of the string:
521-
(*? anything)
522-
523-
;; If there are any backslashes at the end of the string, capture
524-
;; them as group 4 so we can suppress the normal escape syntax
525-
;; parsing:
526-
(group (* "\\"))
527-
528-
;; Then the end of the string--the backreferences ensure that we
529-
;; only match the kind of ending that corresponds to the beginning
530-
;; we had:
531-
(or
532-
;; There were "#"s - capture the last one as group 5 to mark it as
533-
;; the end of the string:
534-
(seq "\"" (backref 2) (group "#"))
535-
536-
;; No "#"s - capture the ending quote (using a backref to group 3,
537-
;; so that we can't match a quote if we had "#"s) as group 6
538-
(group (backref 3))))
539-
540-
;; Character literal: match the beginning ' of a character literal
541-
;; as group 7, and the ending one as group 8
542-
(seq
543-
(group "'")
544-
(or
545-
(seq
546-
"\\"
547-
(or
548-
(: "U" (= 8 xdigit))
549-
(: "u" (= 4 xdigit))
550-
(: "x" (= 2 xdigit))
551-
(any "'nrt0\"\\")))
552-
(not (any "'\\"))
553-
)
554-
(group "'"))
555-
)
556-
)))
557-
(rust-conditional-re-search-forward
558-
non-standard-str-regexp bound
559-
(lambda ()
560-
(let ((pstate (syntax-ppss (match-beginning 0))))
561-
(not
562-
(or
563-
(nth 4 pstate) ;; Skip if in a comment
564-
(and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here
565-
)))))))
561+
(rust-conditional-re-search-forward
562+
rust-re-non-standard-string
563+
bound
564+
(lambda ()
565+
(let ((pstate (syntax-ppss (match-beginning 0))))
566+
(not
567+
(or
568+
(nth 4 pstate) ;; Skip if in a comment
569+
(and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here
570+
))))))
566571

567572
(defun rust-syntax-class-before-point ()
568573
(when (> (point) 1)

0 commit comments

Comments
 (0)