|
428 | 428 | part of it. Adjusts to include the r[#] of a raw string as
|
429 | 429 | well."
|
430 | 430 |
|
431 |
| - (let ((orig-beg font-lock-beg) |
432 |
| - (orig-end font-lock-end)) |
433 |
| - (cond |
434 |
| - ;; If we are not syntactically fontified yet, we cannot correctly cover |
435 |
| - ;; anything less than the full buffer. The syntactic fontification |
436 |
| - ;; modifies the syntax, so until it's done we can't use the syntax to |
437 |
| - ;; determine what to fontify. |
438 |
| - ((< (or font-lock-syntactically-fontified 0) font-lock-end) |
439 |
| - (setq font-lock-beg 1) |
440 |
| - (setq font-lock-end (buffer-end 1))) |
441 |
| - |
442 |
| - ((let* ((beg-ppss (syntax-ppss font-lock-beg)) |
443 |
| - (beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss))) |
444 |
| - (beg-in-str (nth 3 beg-ppss)) |
445 |
| - (end-ppss (syntax-ppss font-lock-end)) |
446 |
| - (end-in-str (nth 3 end-ppss))) |
447 |
| - |
448 |
| - (when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss))) |
| 431 | + (save-excursion |
| 432 | + (let ((orig-beg font-lock-beg) |
| 433 | + (orig-end font-lock-end)) |
| 434 | + |
| 435 | + (let* |
| 436 | + ;; It's safe to call `syntax-ppss' here on positions that are |
| 437 | + ;; already syntactically fontified |
| 438 | + ((beg-ppss (syntax-ppss font-lock-beg)) |
| 439 | + (beg-in-cmnt (and beg-ppss (nth 4 beg-ppss) (nth 8 beg-ppss))) |
| 440 | + (beg-in-str (and beg-ppss (nth 3 beg-ppss) (nth 8 beg-ppss)))) |
| 441 | + |
| 442 | + (when (and beg-in-str (>= font-lock-beg beg-in-str)) |
449 | 443 | (setq font-lock-beg (nth 8 beg-ppss))
|
450 | 444 | (while (equal ?# (char-before font-lock-beg))
|
451 | 445 | (setq font-lock-beg (1- font-lock-beg)))
|
452 | 446 | (when (equal ?r (char-before font-lock-beg))
|
453 | 447 | (setq font-lock-beg (1- font-lock-beg))))
|
454 | 448 |
|
455 | 449 | (when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt))
|
456 |
| - (setq font-lock-beg beg-in-cmnt)) |
457 |
| - |
458 |
| - (when end-in-str |
459 |
| - (save-excursion |
460 |
| - (goto-char (nth 8 end-ppss)) |
461 |
| - (ignore-errors (forward-sexp)) |
462 |
| - (setq font-lock-end (max font-lock-end (point))))) |
463 |
| - ))) |
464 |
| - |
465 |
| - (or (/= font-lock-beg orig-beg) |
466 |
| - (/= font-lock-end orig-end)) |
467 |
| - )) |
| 450 | + (setq font-lock-beg beg-in-cmnt))) |
| 451 | + |
| 452 | + ;; We need to make sure that if the region ends inside a raw string, we |
| 453 | + ;; extend it out past the end of it. But we can't use `syntax-ppss' to |
| 454 | + ;; detect that, becaue that depends on font-lock already being done, and we |
| 455 | + ;; are trying to figure out how much to font-lock before that. So we use |
| 456 | + ;; the regexp directly. |
| 457 | + (save-match-data |
| 458 | + (goto-char font-lock-beg) |
| 459 | + (while (and (< (point) font-lock-end) |
| 460 | + (re-search-forward rust-re-non-standard-string (buffer-end 1) t) |
| 461 | + (<= (match-beginning 0) font-lock-end)) |
| 462 | + (setq font-lock-end (max font-lock-end (match-end 0))) |
| 463 | + (goto-char (1+ (match-beginning 0))))) |
| 464 | + |
| 465 | + (or (/= font-lock-beg orig-beg) |
| 466 | + (/= font-lock-end orig-end)) |
| 467 | + ))) |
468 | 468 |
|
469 | 469 | (defun rust-conditional-re-search-forward (regexp bound condition)
|
470 | 470 | ;; Search forward for regexp (with bound). If found, call condition and return the found
|
|
492 | 492 | (set-match-data (nth 1 ret-list))
|
493 | 493 | (nth 0 ret-list))))
|
494 | 494 |
|
| 495 | +(defconst rust-re-non-standard-string |
| 496 | + (rx |
| 497 | + (or |
| 498 | + ;; Raw string: if it matches, it ends up with the starting character |
| 499 | + ;; of the string as group 1, any ending backslashes as group 4, and |
| 500 | + ;; the ending character as either group 5 or group 6. |
| 501 | + (seq |
| 502 | + ;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically: |
| 503 | + (group "r") |
| 504 | + |
| 505 | + ;; Then either: |
| 506 | + (or |
| 507 | + ;; a sequence at least one "#" (followed by quote). Capture all |
| 508 | + ;; but the last "#" as group 2 for this case. |
| 509 | + (seq (group (* "#")) "#\"") |
| 510 | + |
| 511 | + ;; ...or a quote without any "#". Capture it as group 3. This is |
| 512 | + ;; used later to match the opposite quote only if this capture |
| 513 | + ;; occurred |
| 514 | + (group "\"")) |
| 515 | + |
| 516 | + ;; The contents of the string: |
| 517 | + (*? anything) |
| 518 | + |
| 519 | + ;; If there are any backslashes at the end of the string, capture |
| 520 | + ;; them as group 4 so we can suppress the normal escape syntax |
| 521 | + ;; parsing: |
| 522 | + (group (* "\\")) |
| 523 | + |
| 524 | + ;; Then the end of the string--the backreferences ensure that we |
| 525 | + ;; only match the kind of ending that corresponds to the beginning |
| 526 | + ;; we had: |
| 527 | + (or |
| 528 | + ;; There were "#"s - capture the last one as group 5 to mark it as |
| 529 | + ;; the end of the string: |
| 530 | + (seq "\"" (backref 2) (group "#")) |
| 531 | + |
| 532 | + ;; No "#"s - capture the ending quote (using a backref to group 3, |
| 533 | + ;; so that we can't match a quote if we had "#"s) as group 6 |
| 534 | + (group (backref 3)) |
| 535 | + |
| 536 | + ;; If the raw string wasn't actually closed, go all the way to the end |
| 537 | + string-end)) |
| 538 | + |
| 539 | + ;; Character literal: match the beginning ' of a character literal |
| 540 | + ;; as group 7, and the ending one as group 8 |
| 541 | + (seq |
| 542 | + (group "'") |
| 543 | + (or |
| 544 | + (seq |
| 545 | + "\\" |
| 546 | + (or |
| 547 | + (: "U" (= 8 xdigit)) |
| 548 | + (: "u" (= 4 xdigit)) |
| 549 | + (: "x" (= 2 xdigit)) |
| 550 | + (any "'nrt0\"\\"))) |
| 551 | + (not (any "'\\")) |
| 552 | + ) |
| 553 | + (group "'")) |
| 554 | + ) |
| 555 | + )) |
| 556 | + |
495 | 557 | (defun rust-look-for-non-standard-string (bound)
|
496 | 558 | ;; Find a raw string or character literal, but only if it's not in the middle
|
497 | 559 | ;; of another string or a comment.
|
498 | 560 |
|
499 |
| - (let* ((non-standard-str-regexp |
500 |
| - (rx |
501 |
| - (or |
502 |
| - ;; Raw string: if it matches, it ends up with the starting character |
503 |
| - ;; of the string as group 1, any ending backslashes as group 4, and |
504 |
| - ;; the ending character as either group 5 or group 6. |
505 |
| - (seq |
506 |
| - ;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically: |
507 |
| - (group "r") |
508 |
| - |
509 |
| - ;; Then either: |
510 |
| - (or |
511 |
| - ;; a sequence at least one "#" (followed by quote). Capture all |
512 |
| - ;; but the last "#" as group 2 for this case. |
513 |
| - (seq (group (* "#")) "#\"") |
514 |
| - |
515 |
| - ;; ...or a quote without any "#". Capture it as group 3. This is |
516 |
| - ;; used later to match the opposite quote only if this capture |
517 |
| - ;; occurred |
518 |
| - (group "\"")) |
519 |
| - |
520 |
| - ;; The contents of the string: |
521 |
| - (*? anything) |
522 |
| - |
523 |
| - ;; If there are any backslashes at the end of the string, capture |
524 |
| - ;; them as group 4 so we can suppress the normal escape syntax |
525 |
| - ;; parsing: |
526 |
| - (group (* "\\")) |
527 |
| - |
528 |
| - ;; Then the end of the string--the backreferences ensure that we |
529 |
| - ;; only match the kind of ending that corresponds to the beginning |
530 |
| - ;; we had: |
531 |
| - (or |
532 |
| - ;; There were "#"s - capture the last one as group 5 to mark it as |
533 |
| - ;; the end of the string: |
534 |
| - (seq "\"" (backref 2) (group "#")) |
535 |
| - |
536 |
| - ;; No "#"s - capture the ending quote (using a backref to group 3, |
537 |
| - ;; so that we can't match a quote if we had "#"s) as group 6 |
538 |
| - (group (backref 3)))) |
539 |
| - |
540 |
| - ;; Character literal: match the beginning ' of a character literal |
541 |
| - ;; as group 7, and the ending one as group 8 |
542 |
| - (seq |
543 |
| - (group "'") |
544 |
| - (or |
545 |
| - (seq |
546 |
| - "\\" |
547 |
| - (or |
548 |
| - (: "U" (= 8 xdigit)) |
549 |
| - (: "u" (= 4 xdigit)) |
550 |
| - (: "x" (= 2 xdigit)) |
551 |
| - (any "'nrt0\"\\"))) |
552 |
| - (not (any "'\\")) |
553 |
| - ) |
554 |
| - (group "'")) |
555 |
| - ) |
556 |
| - ))) |
557 |
| - (rust-conditional-re-search-forward |
558 |
| - non-standard-str-regexp bound |
559 |
| - (lambda () |
560 |
| - (let ((pstate (syntax-ppss (match-beginning 0)))) |
561 |
| - (not |
562 |
| - (or |
563 |
| - (nth 4 pstate) ;; Skip if in a comment |
564 |
| - (and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here |
565 |
| - ))))))) |
| 561 | + (rust-conditional-re-search-forward |
| 562 | + rust-re-non-standard-string |
| 563 | + bound |
| 564 | + (lambda () |
| 565 | + (let ((pstate (syntax-ppss (match-beginning 0)))) |
| 566 | + (not |
| 567 | + (or |
| 568 | + (nth 4 pstate) ;; Skip if in a comment |
| 569 | + (and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here |
| 570 | + )))))) |
566 | 571 |
|
567 | 572 | (defun rust-syntax-class-before-point ()
|
568 | 573 | (when (> (point) 1)
|
|
0 commit comments