Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub enum LexError {
Generic,
UnmatchedStrInterpLParen,
UnmatchedStrInterpRParen,
UnmatchedRawStringRSharp,
}

/// Average number of bytes per token used for estimating the tokens buffer size.
Expand Down Expand Up @@ -279,6 +280,32 @@ pub fn lex(contents: &[u8], span_offset: usize) -> (Tokens, Result<(), Spanned<L
(tokens, Ok(()))
}

fn match_rawstring(remainder: &[u8], lexer: &mut Lexer<Token>) -> Result<(), LexError> {
let prefix = lexer.slice();
let prefix_sharp_length = prefix[1..prefix.len() - 1].len(); // without first `r` and last `'`
let mut pos = 0;

while pos < remainder.len() {
if remainder[pos] == b'\'' {
// might be ending of raw string like '##, move forward and check.
pos += 1;
let mut postfix_sharp_length = 0;
while pos < remainder.len() && remainder[pos] == b'#' {
pos += 1;
postfix_sharp_length += 1;
if postfix_sharp_length == prefix_sharp_length {
// found a matched raw string.
lexer.bump(pos);
return Ok(());
}
}
} else {
pos += 1;
}
}
Err(LexError::UnmatchedRawStringRSharp)
}

#[derive(Logos, Debug, Clone, Copy, PartialEq)]
#[logos(skip r"[ \t]+")]
#[logos(source = [u8], error = LexError)]
Expand All @@ -295,6 +322,8 @@ pub enum Token {
SingleQuotedString,
#[regex(r#"`[^`]*`"#)]
BacktickBareword,
#[regex("r#+'", |lex| match_rawstring(lex.remainder(), lex))]
RawString,
// #[regex(r#"[ \t]+"#)]
// HorizontalWhitespace,
#[regex(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?)?(Z|[\+-][0-9]{2}:[0-9]{2})?"#)]
Expand Down
14 changes: 14 additions & 0 deletions src/snapshots/new_nu_parser__test__lexer@raw_string.nu.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: src/test.rs
expression: evaluate_lexer(path)
input_file: tests/lex/raw_string.nu
snapshot_kind: text
---
==== TOKENS ====
Token3 0: RawString span: 0 .. 9 'r#'aabb'#'
Token3 1: Newline span: 9 .. 10 '\n'
Token3 2: RawString span: 10 .. 25 'r##'aa\n'#\nbb'##'
Token3 3: Newline span: 25 .. 26 '\n'
Token3 4: RawString span: 26 .. 58 'r####'aa\nbb\ncc'##dd\n###\nddd'####'
Token3 5: Newline span: 58 .. 59 '\n'
Token3 6: Eof span: 59 .. 59 ''
9 changes: 9 additions & 0 deletions tests/lex/raw_string.nu
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
r#'aabb'#
r##'aa
'#
bb'##
r####'aa
bb
cc'##dd
###
ddd'####