|
28 | 28 | whitespace_chars = re.compile('[\s,]')
|
29 | 29 | newline_chars = re.compile('(\r\n|\r|\n)')
|
30 | 30 | fn_macro_args = re.compile('(%)(&|[0-9])?')
|
| 31 | +unicode_char = re.compile('u(\w+)') |
31 | 32 |
|
32 | 33 | GenSymEnvironment = Dict[str, symbol.Symbol]
|
33 | 34 | Resolver = Callable[[symbol.Symbol], symbol.Symbol]
|
@@ -719,6 +720,56 @@ def _read_deref(ctx: ReaderContext) -> LispForm:
|
719 | 720 | return llist.l(_DEREF, next_form)
|
720 | 721 |
|
721 | 722 |
|
| 723 | +_SPECIAL_CHARS = { |
| 724 | + 'newline': '\n', |
| 725 | + 'space': ' ', |
| 726 | + 'tab': '\t', |
| 727 | + 'formfeed': '\f', |
| 728 | + 'backspace': '\b', |
| 729 | + 'return': '\r' |
| 730 | +} |
| 731 | + |
| 732 | + |
| 733 | +def _read_character(ctx: ReaderContext) -> str: |
| 734 | + """Read a character literal from the input stream. |
| 735 | +
|
| 736 | + Character literals may appear as: |
| 737 | + - \\a \\b \\c etc will yield 'a', 'b', and 'c' respectively |
| 738 | +
|
| 739 | + - \\newline, \\space, \\tab, \\formfeed, \\backspace, \\return yield |
| 740 | + the named characters |
| 741 | +
|
| 742 | + - \\uXXXX yield the unicode digit corresponding to the code |
| 743 | + point named by the hex digits XXXX""" |
| 744 | + start = ctx.reader.advance() |
| 745 | + assert start == "\\" |
| 746 | + |
| 747 | + s: List[str] = [] |
| 748 | + reader = ctx.reader |
| 749 | + while True: |
| 750 | + token = reader.advance() |
| 751 | + if token == '' or whitespace_chars.match(token): |
| 752 | + break |
| 753 | + s.append(token) |
| 754 | + |
| 755 | + char = ''.join(s) |
| 756 | + special = _SPECIAL_CHARS.get(char, None) |
| 757 | + if special is not None: |
| 758 | + return special |
| 759 | + |
| 760 | + match = unicode_char.match(char) |
| 761 | + if match is not None: |
| 762 | + try: |
| 763 | + return chr(int(f"0x{match.group(1)}", 16)) |
| 764 | + except (ValueError, OverflowError): |
| 765 | + raise SyntaxError(f"Unsupported character \\u{char}") from None |
| 766 | + |
| 767 | + if len(char) > 1: |
| 768 | + raise SyntaxError(f"Unsupportred character \\{char}") |
| 769 | + |
| 770 | + return char |
| 771 | + |
| 772 | + |
722 | 773 | def _read_regex(ctx: ReaderContext) -> Pattern:
|
723 | 774 | """Read a regex reader macro from the input stream."""
|
724 | 775 | s = _read_str(ctx)
|
@@ -823,6 +874,8 @@ def _read_next(ctx: ReaderContext) -> LispForm: # noqa: C901
|
823 | 874 | return _read_str(ctx)
|
824 | 875 | elif token == "'":
|
825 | 876 | return _read_quoted(ctx)
|
| 877 | + elif token == '\\': |
| 878 | + return _read_character(ctx) |
826 | 879 | elif ns_name_chars.match(token):
|
827 | 880 | return _read_sym(ctx)
|
828 | 881 | elif token == '#':
|
|
0 commit comments