Skip to content

Commit 99b128c

Browse files
committed
Fix slowness in angle bracket matching
The problem was caused by the logic that made it refontify the whole buffer when "font-lock-syntactically-fontified" was set to a position before the start of the region to be potentially fontified. Unfortunately that variable is not reliably set when fontifying a large buffer. Fortunately, the new logic is much simpler, and font-lock already takes care of ensuring that everything before font-lock-beg was syntactically fontified. The other problem was calling syntax-ppss on positions known not to be fontified yet. This fixes both of these issues, and the angle bracket matching now works on larger buffers without pausing on every keystroke.
1 parent ee564d5 commit 99b128c

File tree

1 file changed

+99
-97
lines changed

1 file changed

+99
-97
lines changed

rust-mode.el

Lines changed: 99 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -428,43 +428,43 @@
428428
part of it. Adjusts to include the r[#] of a raw string as
429429
well."
430430

431-
(let ((orig-beg font-lock-beg)
432-
(orig-end font-lock-end))
433-
(cond
434-
;; If we are not syntactically fontified yet, we cannot correctly cover
435-
;; anything less than the full buffer. The syntactic fontification
436-
;; modifies the syntax, so until it's done we can't use the syntax to
437-
;; determine what to fontify.
438-
((< (or font-lock-syntactically-fontified 0) font-lock-end)
439-
(setq font-lock-beg 1)
440-
(setq font-lock-end (buffer-end 1)))
441-
442-
((let* ((beg-ppss (syntax-ppss font-lock-beg))
443-
(beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss)))
444-
(beg-in-str (nth 3 beg-ppss))
445-
(end-ppss (syntax-ppss font-lock-end))
446-
(end-in-str (nth 3 end-ppss)))
447-
448-
(when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
431+
(save-excursion
432+
(let ((orig-beg font-lock-beg)
433+
(orig-end font-lock-end))
434+
435+
(let*
436+
;; It's safe to call `syntax-ppss' here on positions that are
437+
;; already syntactically fontified
438+
((beg-ppss (syntax-ppss font-lock-beg))
439+
(beg-in-cmnt (and beg-ppss (nth 4 beg-ppss) (nth 8 beg-ppss)))
440+
(beg-in-str (and beg-ppss (nth 3 beg-ppss) (nth 8 beg-ppss))))
441+
442+
(when (and beg-in-str (>= font-lock-beg beg-in-str))
449443
(setq font-lock-beg (nth 8 beg-ppss))
450444
(while (equal ?# (char-before font-lock-beg))
451445
(setq font-lock-beg (1- font-lock-beg)))
452446
(when (equal ?r (char-before font-lock-beg))
453447
(setq font-lock-beg (1- font-lock-beg))))
454448

455449
(when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt))
456-
(setq font-lock-beg beg-in-cmnt))
457-
458-
(when end-in-str
459-
(save-excursion
460-
(goto-char (nth 8 end-ppss))
461-
(ignore-errors (forward-sexp))
462-
(setq font-lock-end (max font-lock-end (point)))))
463-
)))
464-
465-
(or (/= font-lock-beg orig-beg)
466-
(/= font-lock-end orig-end))
467-
))
450+
(setq font-lock-beg beg-in-cmnt)))
451+
452+
;; We need to make sure that if the region ends inside a raw string, we
453+
;; extend it out past the end of it. But we can't use `syntax-ppss' to
454+
;; detect that, becaue that depends on font-lock already being done, and we
455+
;; are trying to figure out how much to font-lock before that. So we use
456+
;; the regexp directly.
457+
(save-match-data
458+
(goto-char font-lock-beg)
459+
(while (and (< (point) font-lock-end)
460+
(re-search-forward rust-re-non-standard-string (buffer-end 1) t)
461+
(<= (match-beginning 0) font-lock-end))
462+
(setq font-lock-end (max font-lock-end (match-end 0)))
463+
(goto-char (1+ (match-beginning 0)))))
464+
465+
(or (/= font-lock-beg orig-beg)
466+
(/= font-lock-end orig-end))
467+
)))
468468

469469
(defun rust-conditional-re-search-forward (regexp bound condition)
470470
;; Search forward for regexp (with bound). If found, call condition and return the found
@@ -492,77 +492,79 @@
492492
(set-match-data (nth 1 ret-list))
493493
(nth 0 ret-list))))
494494

495+
(defconst rust-re-non-standard-string
496+
(rx
497+
(or
498+
;; Raw string: if it matches, it ends up with the starting character
499+
;; of the string as group 1, any ending backslashes as group 4, and
500+
;; the ending character as either group 5 or group 6.
501+
(seq
502+
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
503+
(group "r")
504+
505+
;; Then either:
506+
(or
507+
;; a sequence at least one "#" (followed by quote). Capture all
508+
;; but the last "#" as group 2 for this case.
509+
(seq (group (* "#")) "#\"")
510+
511+
;; ...or a quote without any "#". Capture it as group 3. This is
512+
;; used later to match the opposite quote only if this capture
513+
;; occurred
514+
(group "\""))
515+
516+
;; The contents of the string:
517+
(*? anything)
518+
519+
;; If there are any backslashes at the end of the string, capture
520+
;; them as group 4 so we can suppress the normal escape syntax
521+
;; parsing:
522+
(group (* "\\"))
523+
524+
;; Then the end of the string--the backreferences ensure that we
525+
;; only match the kind of ending that corresponds to the beginning
526+
;; we had:
527+
(or
528+
;; There were "#"s - capture the last one as group 5 to mark it as
529+
;; the end of the string:
530+
(seq "\"" (backref 2) (group "#"))
531+
532+
;; No "#"s - capture the ending quote (using a backref to group 3,
533+
;; so that we can't match a quote if we had "#"s) as group 6
534+
(group (backref 3))))
535+
536+
;; Character literal: match the beginning ' of a character literal
537+
;; as group 7, and the ending one as group 8
538+
(seq
539+
(group "'")
540+
(or
541+
(seq
542+
"\\"
543+
(or
544+
(: "U" (= 8 xdigit))
545+
(: "u" (= 4 xdigit))
546+
(: "x" (= 2 xdigit))
547+
(any "'nrt0\"\\")))
548+
(not (any "'\\"))
549+
)
550+
(group "'"))
551+
)
552+
))
553+
495554
(defun rust-look-for-non-standard-string (bound)
496555
;; Find a raw string or character literal, but only if it's not in the middle
497556
;; of another string or a comment.
498557

499-
(let* ((non-standard-str-regexp
500-
(rx
501-
(or
502-
;; Raw string: if it matches, it ends up with the starting character
503-
;; of the string as group 1, any ending backslashes as group 4, and
504-
;; the ending character as either group 5 or group 6.
505-
(seq
506-
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
507-
(group "r")
508-
509-
;; Then either:
510-
(or
511-
;; a sequence at least one "#" (followed by quote). Capture all
512-
;; but the last "#" as group 2 for this case.
513-
(seq (group (* "#")) "#\"")
514-
515-
;; ...or a quote without any "#". Capture it as group 3. This is
516-
;; used later to match the opposite quote only if this capture
517-
;; occurred
518-
(group "\""))
519-
520-
;; The contents of the string:
521-
(*? anything)
522-
523-
;; If there are any backslashes at the end of the string, capture
524-
;; them as group 4 so we can suppress the normal escape syntax
525-
;; parsing:
526-
(group (* "\\"))
527-
528-
;; Then the end of the string--the backreferences ensure that we
529-
;; only match the kind of ending that corresponds to the beginning
530-
;; we had:
531-
(or
532-
;; There were "#"s - capture the last one as group 5 to mark it as
533-
;; the end of the string:
534-
(seq "\"" (backref 2) (group "#"))
535-
536-
;; No "#"s - capture the ending quote (using a backref to group 3,
537-
;; so that we can't match a quote if we had "#"s) as group 6
538-
(group (backref 3))))
539-
540-
;; Character literal: match the beginning ' of a character literal
541-
;; as group 7, and the ending one as group 8
542-
(seq
543-
(group "'")
544-
(or
545-
(seq
546-
"\\"
547-
(or
548-
(: "U" (= 8 xdigit))
549-
(: "u" (= 4 xdigit))
550-
(: "x" (= 2 xdigit))
551-
(any "'nrt0\"\\")))
552-
(not (any "'\\"))
553-
)
554-
(group "'"))
555-
)
556-
)))
557-
(rust-conditional-re-search-forward
558-
non-standard-str-regexp bound
559-
(lambda ()
560-
(let ((pstate (syntax-ppss (match-beginning 0))))
561-
(not
562-
(or
563-
(nth 4 pstate) ;; Skip if in a comment
564-
(and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here
565-
)))))))
558+
(rust-conditional-re-search-forward
559+
rust-re-non-standard-string
560+
bound
561+
(lambda ()
562+
(let ((pstate (syntax-ppss (match-beginning 0))))
563+
(not
564+
(or
565+
(nth 4 pstate) ;; Skip if in a comment
566+
(and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here
567+
))))))
566568

567569
(defun rust-syntax-class-before-point ()
568570
(when (> (point) 1)

0 commit comments

Comments
 (0)