Skip to content

Commit fca3632

Browse files
authored
Add lexing of rawstring (#48)
As title. This pr is going to make nushell support lexing of rawstring, which have the syntax: - `r#'<content>'#` - `r##'<content>'##` - `r###'<content>'###` - etc... To implement this, just need to move forward to find the ending mark of the raw string.
1 parent f91d922 commit fca3632

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed

src/lexer.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pub enum LexError {
77
Generic,
88
UnmatchedStrInterpLParen,
99
UnmatchedStrInterpRParen,
10+
UnmatchedRawStringRSharp,
1011
}
1112

1213
/// Average number of bytes per token used for estimating the tokens buffer size.
@@ -279,6 +280,32 @@ pub fn lex(contents: &[u8], span_offset: usize) -> (Tokens, Result<(), Spanned<L
279280
(tokens, Ok(()))
280281
}
281282

283+
fn match_rawstring(remainder: &[u8], lexer: &mut Lexer<Token>) -> Result<(), LexError> {
284+
let prefix = lexer.slice();
285+
let prefix_sharp_length = prefix[1..prefix.len() - 1].len(); // without first `r` and last `'`
286+
let mut pos = 0;
287+
288+
while pos < remainder.len() {
289+
if remainder[pos] == b'\'' {
290+
// might be ending of raw string like '##, move forward and check.
291+
pos += 1;
292+
let mut postfix_sharp_length = 0;
293+
while pos < remainder.len() && remainder[pos] == b'#' {
294+
pos += 1;
295+
postfix_sharp_length += 1;
296+
if postfix_sharp_length == prefix_sharp_length {
297+
// found a matched raw string.
298+
lexer.bump(pos);
299+
return Ok(());
300+
}
301+
}
302+
} else {
303+
pos += 1;
304+
}
305+
}
306+
Err(LexError::UnmatchedRawStringRSharp)
307+
}
308+
282309
#[derive(Logos, Debug, Clone, Copy, PartialEq)]
283310
#[logos(skip r"[ \t]+")]
284311
#[logos(source = [u8], error = LexError)]
@@ -295,6 +322,8 @@ pub enum Token {
295322
SingleQuotedString,
296323
#[regex(r#"`[^`]*`"#)]
297324
BacktickBareword,
325+
#[regex("r#+'", |lex| match_rawstring(lex.remainder(), lex))]
326+
RawString,
298327
// #[regex(r#"[ \t]+"#)]
299328
// HorizontalWhitespace,
300329
#[regex(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?)?(Z|[\+-][0-9]{2}:[0-9]{2})?"#)]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
source: src/test.rs
3+
expression: evaluate_lexer(path)
4+
input_file: tests/lex/raw_string.nu
5+
snapshot_kind: text
6+
---
7+
==== TOKENS ====
8+
Token3 0: RawString span: 0 .. 9 'r#'aabb'#'
9+
Token3 1: Newline span: 9 .. 10 '\n'
10+
Token3 2: RawString span: 10 .. 25 'r##'aa\n'#\nbb'##'
11+
Token3 3: Newline span: 25 .. 26 '\n'
12+
Token3 4: RawString span: 26 .. 58 'r####'aa\nbb\ncc'##dd\n###\nddd'####'
13+
Token3 5: Newline span: 58 .. 59 '\n'
14+
Token3 6: Eof span: 59 .. 59 ''

tests/lex/raw_string.nu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
r#'aabb'#
2+
r##'aa
3+
'#
4+
bb'##
5+
r####'aa
6+
bb
7+
cc'##dd
8+
###
9+
ddd'####

0 commit comments

Comments
 (0)