Skip to content

Commit 6e72f64

Browse files
committed
Fix bugs in raw strings and character literals
1 parent 53c558c commit 6e72f64

File tree

2 files changed

+196
-86
lines changed

2 files changed

+196
-86
lines changed

rust-mode-tests.el

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,3 +1482,83 @@ la la\");
14821482
(test-indent
14831483
;; Needs to leave 1 space before "world"
14841484
"\"hello \\\n world\""))
1485+
1486+
(defun rust-test-matching-parens (content pairs &optional nonparen-positions)
1487+
"Assert that in rust-mode, given a buffer with the given `content',
1488+
emacs's paren matching will find all of the pairs of positions
1489+
as matching braces. The list of nonparen-positions asserts
1490+
specific positions that should NOT be considered to be
1491+
parens/braces of any kind.
1492+
1493+
This does not assert that the `pairs' list is
1494+
comprehensive--there can be additional pairs that don't appear
1495+
in the list and the test still passes (as long as none of their
1496+
positions appear in `nonparen-positions'.)"
1497+
(with-temp-buffer
1498+
(rust-mode)
1499+
(insert content)
1500+
(font-lock-fontify-buffer)
1501+
(dolist (pair pairs)
1502+
(let* ((open-pos (nth 0 pair))
1503+
(close-pos (nth 1 pair)))
1504+
(should (equal 4 (syntax-class (syntax-after open-pos))))
1505+
(should (equal 5 (syntax-class (syntax-after close-pos))))
1506+
(should (equal (scan-sexps open-pos 1) (+ 1 close-pos)))
1507+
(should (equal (scan-sexps (+ 1 close-pos) -1) open-pos))))
1508+
(dolist (nonpar-pos nonparen-positions)
1509+
(let ((nonpar-syntax-class (syntax-class (syntax-after nonpar-pos))))
1510+
(should (not (equal 4 nonpar-syntax-class)))
1511+
(should (not (equal 5 nonpar-syntax-class)))))))
1512+
1513+
(ert-deftest rust-test-unmatched-single-quote-in-comment-paren-matching ()
1514+
;; This was a bug from the char quote handling that affected the paren
1515+
;; matching. An unmatched quote char in a comment caused the problems.
1516+
(rust-test-matching-parens
1517+
"// If this appeared first in the file...
1518+
\"\\
1519+
{\";
1520+
1521+
// And the { was not the on the first column:
1522+
{
1523+
// This then messed up the paren matching: '\\'
1524+
}
1525+
1526+
"
1527+
'((97 150) ;; The { and } at the bottom
1528+
)))
1529+
1530+
(ert-deftest rust-test-two-character-quotes-in-a-row ()
1531+
(with-temp-buffer
1532+
(rust-mode)
1533+
(font-lock-fontify-buffer)
1534+
(insert "'\\n','a', fn")
1535+
(font-lock-after-change-function 1 12 0)
1536+
1537+
(should (equal 'font-lock-string-face (get-text-property 3 'face)))
1538+
(should (equal nil (get-text-property 5 'face)))
1539+
(should (equal 'font-lock-string-face (get-text-property 7 'face)))
1540+
(should (equal nil (get-text-property 9 'face)))
1541+
(should (equal 'font-lock-keyword-face (get-text-property 12 'face)))
1542+
)
1543+
)
1544+
1545+
(ert-deftest single-quote-null-char ()
1546+
(rust-test-font-lock
1547+
"'\\0' 'a' fn"
1548+
'("'\\0'" font-lock-string-face
1549+
"'a'" font-lock-string-face
1550+
"fn" font-lock-keyword-face)))
1551+
1552+
(ert-deftest r-in-string-after-single-quoted-double-quote ()
1553+
(rust-test-font-lock
1554+
"'\"';\n\"r\";\n\"oops\";"
1555+
'("'\"'" font-lock-string-face
1556+
"\"r\"" font-lock-string-face
1557+
"\"oops\"" font-lock-string-face
1558+
)))
1559+
1560+
(ert-deftest char-literal-after-quote-in-raw-string ()
1561+
(rust-test-font-lock
1562+
"r#\"\"\"#;\n'q'"
1563+
'("r#\"\"\"#" font-lock-string-face
1564+
"'q'" font-lock-string-face)))

rust-mode.el

Lines changed: 116 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -374,38 +374,53 @@
374374
("fn" . font-lock-function-name-face)
375375
("static" . font-lock-constant-face)))))
376376

377-
(defun rust-extend-region-raw-string ()
377+
(defun rust-font-lock-extend-region ()
378378
"Extend the region given by `font-lock-beg' and `font-lock-end'
379-
to include the beginning of a string if it includes part of it.
380-
Adjusts to include the r[#] of a raw string as well."
381-
382-
(let* ((orig-beg font-lock-beg)
383-
(orig-end font-lock-end)
384-
(beg-ppss (syntax-ppss font-lock-beg))
385-
(beg-in-str (nth 3 beg-ppss))
386-
(end-ppss (syntax-ppss font-lock-end))
387-
(end-in-str (nth 3 end-ppss)))
388-
389-
(when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
390-
(setq font-lock-beg str-beg)
391-
(while (equal ?# (char-before font-lock-beg))
392-
(setq font-lock-beg (1- font-lock-beg)))
393-
(when (equal ?r (char-before font-lock-beg))
394-
(setq font-lock-beg (1- font-lock-beg))))
395-
396-
(when end-in-str
397-
(save-excursion
398-
(goto-char (nth 8 end-ppss))
399-
(ignore-errors (forward-sexp))
400-
(setq font-lock-end (max font-lock-end (point)))))
401-
402-
;; If we have the beginning of a raw string in the region, make sure we have the end of
403-
;; it.
404-
(when (or beg-in-str end-in-str)
405-
(save-excursion
406-
(goto-char font-lock-beg)
407-
(while (and (< (point) font-lock-end) (ignore-errors (rust-look-for-raw-string (buffer-end 1)))))
408-
(setq font-lock-end (max font-lock-end (point)))))
379+
to include the beginning of a string or comment if it includes
380+
part of it. Adjusts to include the r[#] of a raw string as
381+
well."
382+
383+
(let ((orig-beg font-lock-beg)
384+
(orig-end font-lock-end))
385+
(cond
386+
;; If we are not syntactically fontified yet, we cannot correctly cover
387+
;; anything less than the full buffer. The syntactic fontification
388+
;; modifies the syntax, so until it's done we can't use the syntax to
389+
;; determine what to fontify.
390+
((< (or font-lock-syntactically-fontified 0) font-lock-end)
391+
(setq font-lock-beg 1)
392+
(setq font-lock-end (buffer-end 1)))
393+
394+
((let* ((beg-ppss (syntax-ppss font-lock-beg))
395+
(beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss)))
396+
(beg-in-str (nth 3 beg-ppss))
397+
(end-ppss (syntax-ppss font-lock-end))
398+
(end-in-str (nth 3 end-ppss)))
399+
400+
(when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
401+
(setq font-lock-beg (nth 8 beg-ppss))
402+
(while (equal ?# (char-before font-lock-beg))
403+
(setq font-lock-beg (1- font-lock-beg)))
404+
(when (equal ?r (char-before font-lock-beg))
405+
(setq font-lock-beg (1- font-lock-beg))))
406+
407+
(when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt))
408+
(setq font-lock-beg beg-in-cmnt))
409+
410+
(when end-in-str
411+
(save-excursion
412+
(goto-char (nth 8 end-ppss))
413+
(ignore-errors (forward-sexp))
414+
(setq font-lock-end (max font-lock-end (point)))))
415+
416+
;; If we have the beginning of a raw string in the region, make sure we have the end of
417+
;; it.
418+
(when (or beg-in-str end-in-str)
419+
(save-excursion
420+
(goto-char font-lock-beg)
421+
(while (and (< (point) font-lock-end) (ignore-errors (rust-look-for-raw-string (buffer-end 1)))))
422+
(setq font-lock-end (max font-lock-end (point)))))
423+
)))
409424

410425
(or (/= font-lock-beg orig-beg)
411426
(/= font-lock-end orig-end))
@@ -437,67 +452,82 @@
437452
(set-match-data (nth 1 ret-list))
438453
(nth 0 ret-list))))
439454

440-
(defun rust-look-for-raw-string (bound)
441-
;; Find a raw string, but only if it's not in the middle of another string or
442-
;; a comment
455+
(defun rust-look-for-non-standard-string (bound)
456+
;; Find a raw string or character literal, but only if it's not in the middle
457+
;; of another string or a comment.
443458

444-
(let* ((raw-str-regexp
459+
(let* ((non-standard-str-regexp
445460
(rx
446-
(seq
447-
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
448-
(group "r")
449-
450-
;; Then either:
451-
(or
452-
;; a sequence at least one "#" (followed by quote). Capture all
453-
;; but the last "#" as group 2 for this case.
454-
(seq (group (* "#")) "#\"")
455-
456-
;; ...or a quote without any "#". Capture it as group 3. This is
457-
;; used later to match the opposite quote only if this capture
458-
;; occurred
459-
(group "\""))
460-
461-
;; The contents of the string:
462-
(*? anything)
463-
464-
;; If there are any backslashes at the end of the string, capture
465-
;; them as group 4 so we can suppress the normal escape syntax
466-
;; parsing:
467-
(group (* "\\"))
468-
469-
;; Then the end of the string--the backreferences ensure that we
470-
;; only match the kind of ending that corresponds to the beginning
471-
;; we had:
472-
(or
473-
;; There were "#"s - capture the last one as group 5 to mark it as
474-
;; the end of the string:
475-
(seq "\"" (backref 2) (group "#"))
476-
477-
;; No "#"s - capture the ending quote (using a backref to group 3,
478-
;; so that we can't match a quote if we had "#"s) as group 6
479-
(group (backref 3))))
480-
;; If it matches, it ends up with the starting character of the string
481-
;; as group 1, any ending backslashes as group 4, and the ending
482-
;; character as either group 5 or group 6.
461+
(or
462+
;; Raw string: if it matches, it ends up with the starting character
463+
;; of the string as group 1, any ending backslashes as group 4, and
464+
;; the ending character as either group 5 or group 6.
465+
(seq
466+
;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically:
467+
(group "r")
468+
469+
;; Then either:
470+
(or
471+
;; a sequence at least one "#" (followed by quote). Capture all
472+
;; but the last "#" as group 2 for this case.
473+
(seq (group (* "#")) "#\"")
474+
475+
;; ...or a quote without any "#". Capture it as group 3. This is
476+
;; used later to match the opposite quote only if this capture
477+
;; occurred
478+
(group "\""))
479+
480+
;; The contents of the string:
481+
(*? anything)
482+
483+
;; If there are any backslashes at the end of the string, capture
484+
;; them as group 4 so we can suppress the normal escape syntax
485+
;; parsing:
486+
(group (* "\\"))
487+
488+
;; Then the end of the string--the backreferences ensure that we
489+
;; only match the kind of ending that corresponds to the beginning
490+
;; we had:
491+
(or
492+
;; There were "#"s - capture the last one as group 5 to mark it as
493+
;; the end of the string:
494+
(seq "\"" (backref 2) (group "#"))
495+
496+
;; No "#"s - capture the ending quote (using a backref to group 3,
497+
;; so that we can't match a quote if we had "#"s) as group 6
498+
(group (backref 3))))
499+
500+
;; Character literal: match the beginning ' of a character literal
501+
;; as group 7, and the ending one as group 8
502+
(seq
503+
(group "'")
504+
(or
505+
(seq
506+
"\\"
507+
(or
508+
(: "U" (= 8 xdigit))
509+
(: "u" (= 4 xdigit))
510+
(: "x" (= 2 xdigit))
511+
(any "'nrt0\"\\")))
512+
(not (any "'\\"))
513+
)
514+
(group "'"))
515+
)
483516
)))
484517
(rust-conditional-re-search-forward
485-
raw-str-regexp bound
486-
(lambda () (save-excursion
487-
(goto-char (match-beginning 0))
488-
(not (rust-in-str-or-cmnt)))))))
518+
non-standard-str-regexp bound
519+
(lambda ()
520+
(let ((pstate (syntax-ppss (match-beginning 0))))
521+
(not
522+
(or
523+
(nth 4 pstate) ;; Skip if in a comment
524+
(and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here
525+
)))))))
489526

490527
(defvar rust-mode-font-lock-syntactic-keywords
491528
(append
492-
;; Handle single quoted character literals:
493-
(mapcar (lambda (re) (list re '(1 "\"") '(2 "\"")))
494-
'("\\('\\)[^']\\('\\)"
495-
"\\('\\)\\\\['nrt\"\\]\\('\\)"
496-
"\\('\\)\\\\x[[:xdigit:]]\\{2\\}\\('\\)"
497-
"\\('\\)\\\\u[[:xdigit:]]\\{4\\}\\('\\)"
498-
"\\('\\)\\\\U[[:xdigit:]]\\{8\\}\\('\\)"))
499-
;; Handle raw strings:
500-
`((rust-look-for-raw-string (1 "|") (4 "_" nil t) (5 "|" nil t) (6 "|" nil t)))))
529+
;; Handle raw strings and character literals:
530+
`((rust-look-for-non-standard-string (1 "|" nil t) (4 "_" nil t) (5 "|" nil t) (6 "|" nil t) (7 "\"" nil t) (8 "\"" nil t)))))
501531

502532
(defun rust-mode-syntactic-face-function (state)
503533
"Syntactic face function to distinguish doc comments from other comments."
@@ -768,7 +798,7 @@ This is written mainly to be used as `end-of-defun-function' for Rust."
768798
(setq-local indent-line-function 'rust-mode-indent-line)
769799

770800
;; Fonts
771-
(add-to-list 'font-lock-extend-region-functions 'rust-extend-region-raw-string)
801+
(add-to-list 'font-lock-extend-region-functions 'rust-font-lock-extend-region)
772802
(setq-local font-lock-defaults '(rust-mode-font-lock-keywords
773803
nil nil nil nil
774804
(font-lock-syntactic-keywords . rust-mode-font-lock-syntactic-keywords)

0 commit comments

Comments
 (0)