Skip to content

Commit fcaf249

Browse files
Merge pull request #10 from LukasKalbertodt/arbitrary-suffixes
Implement arbitrary suffixes (for all literals)
2 parents 8c88a04 + 83816a8 commit fcaf249

File tree

21 files changed

+756
-429
lines changed

21 files changed

+756
-429
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,10 @@ jobs:
4040
run: |
4141
cargo test --release --no-default-features --lib -- --include-ignored
4242
cargo test --doc --no-default-features
43+
44+
- name: Build with check_suffix
45+
run: cargo build --features=check_suffix
46+
- name: Run tests with check_suffix
47+
run: |
48+
cargo test --release --features=check_suffix --lib -- --include-ignored
49+
cargo test --doc --features=check_suffix

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ exclude = [".github"]
2626

2727
[features]
2828
default = ["proc-macro2"]
29+
check_suffix = ["unicode-xid"]
2930

3031
[dependencies]
3132
proc-macro2 = { version = "1", optional = true }
33+
unicode-xid = { version = "0.2.4", optional = true }

examples/procmacro/examples/main.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use procmacro_example::{concat, repeat};
1+
use procmacro_example::{concat, dbg_and_swallow, repeat};
22

33
const FOO: &str = concat!(r#"Hello "# '🦊' "\nHere is a friend: \u{1F427}");
44
// const FOO: &str = concat!(::);
@@ -8,6 +8,7 @@ const BAR: &str = repeat!(3 * "నా పిల్లి లావుగా ఉ
88
const BAZ: &str = repeat!(0b101 * "🦀");
99
// const BAZ: &str = repeat!(3.5 * "🦀");
1010

11+
dbg_and_swallow!(16px);
1112

1213
fn main() {
1314
println!("{}", FOO);

examples/procmacro/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ use proc_macro::{Spacing, TokenStream, TokenTree};
33
use litrs::{Literal, IntegerLit, StringLit};
44

55

6+
#[proc_macro]
7+
pub fn dbg_and_swallow(input: TokenStream) -> TokenStream {
8+
for token in input {
9+
println!("{} -> {:#?}", token, Literal::try_from(&token));
10+
}
11+
TokenStream::new()
12+
}
13+
614
/// Concatinates all input string and char literals into a single output string
715
/// literal.
816
#[proc_macro]

src/byte/mod.rs

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::{
44
Buffer, ParseError,
55
err::{perr, ParseErrorKind::*},
66
escape::unescape,
7+
parse::check_suffix,
78
};
89

910

@@ -15,6 +16,8 @@ use crate::{
1516
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1617
pub struct ByteLit<B: Buffer> {
1718
raw: B,
19+
/// Start index of the suffix or `raw.len()` if there is no suffix.
20+
start_suffix: usize,
1821
value: u8,
1922
}
2023

@@ -29,15 +32,20 @@ impl<B: Buffer> ByteLit<B> {
2932
return Err(perr(None, InvalidByteLiteralStart));
3033
}
3134

32-
let value = parse_impl(&input)?;
33-
Ok(Self { raw: input, value })
35+
let (value, start_suffix) = parse_impl(&input)?;
36+
Ok(Self { raw: input, value, start_suffix })
3437
}
3538

3639
/// Returns the byte value that this literal represents.
3740
pub fn value(&self) -> u8 {
3841
self.value
3942
}
4043

44+
/// The optional suffix. Returns `""` if the suffix is empty/does not exist.
45+
pub fn suffix(&self) -> &str {
46+
&(*self.raw)[self.start_suffix..]
47+
}
48+
4149
/// Returns the raw input that was passed to `parse`.
4250
pub fn raw_input(&self) -> &str {
4351
&self.raw
@@ -56,6 +64,7 @@ impl ByteLit<&str> {
5664
pub fn to_owned(&self) -> ByteLit<String> {
5765
ByteLit {
5866
raw: self.raw.to_owned(),
67+
start_suffix: self.start_suffix,
5968
value: self.value,
6069
}
6170
}
@@ -69,32 +78,29 @@ impl<B: Buffer> fmt::Display for ByteLit<B> {
6978

7079
/// Precondition: must start with `b'`.
7180
#[inline(never)]
72-
pub(crate) fn parse_impl(input: &str) -> Result<u8, ParseError> {
73-
if input.len() == 2 {
74-
return Err(perr(None, UnterminatedByteLiteral));
75-
}
76-
if *input.as_bytes().last().unwrap() != b'\'' {
77-
return Err(perr(None, UnterminatedByteLiteral));
78-
}
79-
80-
let inner = &input[2..input.len() - 1];
81-
let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?;
81+
pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> {
82+
let input_bytes = input.as_bytes();
83+
let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?;
8284
let (c, len) = match first {
83-
b'\'' => return Err(perr(2, UnescapedSingleQuote)),
84-
b'\n' | b'\t' | b'\r'
85-
=> return Err(perr(2, UnescapedSpecialWhitespace)),
86-
87-
b'\\' => unescape::<u8>(inner, 2)?,
85+
b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)),
86+
b'\'' => return Err(perr(None, EmptyByteLiteral)),
87+
b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)),
88+
b'\\' => unescape::<u8>(&input[2..], 2)?,
8889
other if other.is_ascii() => (*other, 1),
8990
_ => return Err(perr(2, NonAsciiInByteLiteral)),
9091
};
91-
let rest = &inner[len..];
9292

93-
if !rest.is_empty() {
94-
return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral));
93+
match input[2 + len..].find('\'') {
94+
Some(0) => {}
95+
Some(_) => return Err(perr(None, OverlongByteLiteral)),
96+
None => return Err(perr(None, UnterminatedByteLiteral)),
9597
}
9698

97-
Ok(c)
99+
let start_suffix = 2 + len + 1;
100+
let suffix = &input[start_suffix..];
101+
check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
102+
103+
Ok((c, start_suffix))
98104
}
99105

100106
#[cfg(test)]

src/byte/tests.rs

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@ use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}
33
// ===== Utility functions =======================================================================
44

55
macro_rules! check {
6-
($lit:literal) => {
7-
let input = stringify!($lit);
6+
($lit:literal) => { check!($lit, stringify!($lit), "") };
7+
($lit:literal, $input:expr, $suffix:literal) => {
8+
let input = $input;
89
let expected = ByteLit {
910
raw: input,
11+
start_suffix: input.len() - $suffix.len(),
1012
value: $lit,
1113
};
1214

1315
assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse");
1416
assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse");
15-
assert_eq!(ByteLit::parse(input).unwrap().value(), $lit);
17+
let lit = ByteLit::parse(input).unwrap();
18+
assert_eq!(lit.value(), $lit);
19+
assert_eq!(lit.suffix(), $suffix);
1620
assert_roundtrip(expected.to_owned(), input);
1721
};
1822
}
@@ -113,13 +117,23 @@ fn byte_escapes() {
113117
check!(b'\xFF');
114118
}
115119

120+
#[test]
121+
fn suffixes() {
122+
check!(b'a', r##"b'a'peter"##, "peter");
123+
check!(b'#', r##"b'#'peter"##, "peter");
124+
check!(b'\n', r##"b'\n'peter"##, "peter");
125+
check!(b'\'', r##"b'\''peter"##, "peter");
126+
check!(b'\"', r##"b'\"'peter"##, "peter");
127+
check!(b'\xFF', r##"b'\xFF'peter"##, "peter");
128+
}
129+
116130
#[test]
117131
fn invald_escapes() {
118132
assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4);
119133
assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4);
120-
assert_err!(ByteLit, r"b'\", UnterminatedByteLiteral, None);
121-
assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..4);
122-
assert_err!(ByteLit, r"b'\x1'", UnterminatedEscape, 2..5);
134+
assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3);
135+
assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5);
136+
assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6);
123137
assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6);
124138
assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6);
125139
}
@@ -148,16 +162,16 @@ fn unicode_escape_not_allowed() {
148162
#[test]
149163
fn parse_err() {
150164
assert_err!(ByteLit, r"b''", EmptyByteLiteral, None);
151-
assert_err!(ByteLit, r"b' ''", OverlongByteLiteral, 3..4);
165+
assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5);
152166

153167
assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None);
154168
assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None);
155169
assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None);
156170
assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None);
157171

158-
assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, 3..4);
159-
assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, 3..5);
160-
assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, 4..5);
172+
assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None);
173+
assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None);
174+
assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None);
161175

162176
assert_err!(ByteLit, r"", Empty, None);
163177

src/bytestr/mod.rs

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ pub struct ByteStringLit<B: Buffer> {
2424
/// The number of hash signs in case of a raw string literal, or `None` if
2525
/// it's not a raw string literal.
2626
num_hashes: Option<u32>,
27+
28+
/// Start index of the suffix or `raw.len()` if there is no suffix.
29+
start_suffix: usize,
2730
}
2831

2932
impl<B: Buffer> ByteStringLit<B> {
@@ -37,7 +40,8 @@ impl<B: Buffer> ByteStringLit<B> {
3740
return Err(perr(None, InvalidByteStringLiteralStart));
3841
}
3942

40-
Self::parse_impl(input)
43+
let (value, num_hashes, start_suffix) = parse_impl(&input)?;
44+
Ok(Self { raw: input, value, num_hashes, start_suffix })
4145
}
4246

4347
/// Returns the string value this literal represents (where all escapes have
@@ -56,6 +60,11 @@ impl<B: Buffer> ByteStringLit<B> {
5660
value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow())
5761
}
5862

63+
/// The optional suffix. Returns `""` if the suffix is empty/does not exist.
64+
pub fn suffix(&self) -> &str {
65+
&(*self.raw)[self.start_suffix..]
66+
}
67+
5968
/// Returns whether this literal is a raw string literal (starting with
6069
/// `r`).
6170
pub fn is_raw_byte_string(&self) -> bool {
@@ -75,27 +84,8 @@ impl<B: Buffer> ByteStringLit<B> {
7584
/// The range within `self.raw` that excludes the quotes and potential `r#`.
7685
fn inner_range(&self) -> Range<usize> {
7786
match self.num_hashes {
78-
None => 2..self.raw.len() - 1,
79-
Some(n) => 2 + n as usize + 1..self.raw.len() - n as usize - 1,
80-
}
81-
}
82-
83-
/// Precondition: input has to start with either `b"` or `br`.
84-
pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
85-
if input.starts_with(r"br") {
86-
let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?;
87-
Ok(Self {
88-
raw: input,
89-
value: value.map(|s| s.into_bytes()),
90-
num_hashes: Some(num_hashes),
91-
})
92-
} else {
93-
let value = unescape_string::<u8>(&input, 2)?.map(|s| s.into_bytes());
94-
Ok(Self {
95-
raw: input,
96-
value,
97-
num_hashes: None,
98-
})
87+
None => 2..self.start_suffix - 1,
88+
Some(n) => 2 + n as usize + 1..self.start_suffix - n as usize - 1,
9989
}
10090
}
10191
}
@@ -108,6 +98,7 @@ impl ByteStringLit<&str> {
10898
raw: self.raw.to_owned(),
10999
value: self.value,
110100
num_hashes: self.num_hashes,
101+
start_suffix: self.start_suffix,
111102
}
112103
}
113104
}
@@ -119,5 +110,17 @@ impl<B: Buffer> fmt::Display for ByteStringLit<B> {
119110
}
120111

121112

113+
/// Precondition: input has to start with either `b"` or `br`.
114+
#[inline(never)]
115+
fn parse_impl(input: &str) -> Result<(Option<Vec<u8>>, Option<u32>, usize), ParseError> {
116+
if input.starts_with("br") {
117+
scan_raw_string::<u8>(&input, 2)
118+
.map(|(v, num, start_suffix)| (v.map(String::into_bytes), Some(num), start_suffix))
119+
} else {
120+
unescape_string::<u8>(&input, 2)
121+
.map(|(v, start_suffix)| (v.map(String::into_bytes), None, start_suffix))
122+
}
123+
}
124+
122125
#[cfg(test)]
123126
mod tests;

src/bytestr/tests.rs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,25 @@ use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_round
44

55
macro_rules! check {
66
($lit:literal, $has_escapes:expr, $num_hashes:expr) => {
7-
let input = stringify!($lit);
7+
check!($lit, stringify!($lit), $has_escapes, $num_hashes, "")
8+
};
9+
($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => {
10+
let input = $input;
811
let expected = ByteStringLit {
912
raw: input,
1013
value: if $has_escapes { Some($lit.to_vec()) } else { None },
1114
num_hashes: $num_hashes,
15+
start_suffix: input.len() - $suffix.len(),
1216
};
1317

1418
assert_parse_ok_eq(
1519
input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse");
1620
assert_parse_ok_eq(
1721
input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse");
18-
assert_eq!(ByteStringLit::parse(input).unwrap().value(), $lit);
19-
assert_eq!(ByteStringLit::parse(input).unwrap().into_value().as_ref(), $lit);
22+
let lit = ByteStringLit::parse(input).unwrap();
23+
assert_eq!(lit.value(), $lit);
24+
assert_eq!(lit.suffix(), $suffix);
25+
assert_eq!(lit.into_value().as_ref(), $lit);
2026
assert_roundtrip(expected.into_owned(), input);
2127
};
2228
}
@@ -43,6 +49,7 @@ fn special_whitespace() {
4349
raw: &*input,
4450
value: None,
4551
num_hashes,
52+
start_suffix: input.len(),
4653
};
4754
assert_parse_ok_eq(
4855
&input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse");
@@ -147,17 +154,23 @@ fn raw_byte_string() {
147154
check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1));
148155
}
149156

157+
#[test]
158+
fn suffixes() {
159+
check!(b"hello", r###"b"hello"suffix"###, false, None, "suffix");
160+
check!(b"fox", r#"b"fox"peter"#, false, None, "peter");
161+
check!(b"a\x0cb\\", r#"b"a\x0cb\\"_jürgen"#, true, None, "_jürgen");
162+
check!(br"a\x0cb\\", r###"br#"a\x0cb\\"#_jürgen"###, false, Some(1), "_jürgen");
163+
}
164+
150165
#[test]
151166
fn parse_err() {
152167
assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None);
153168
assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None);
154169
assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None);
155170
assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None);
156171

157-
assert_err!(ByteStringLit, r#"b"fox"peter"#, UnexpectedChar, 6..11);
158-
assert_err!(ByteStringLit, r#"b"fox"peter""#, UnexpectedChar, 6..12);
159-
assert_err!(ByteStringLit, r#"b"fox"bar"#, UnexpectedChar, 6..9);
160-
assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10..16);
172+
assert_err!(ByteStringLit, r#"b"fox"peter""#, InvalidSuffix, 6);
173+
assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10);
161174

162175
assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2);
163176
assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4);
@@ -179,10 +192,10 @@ fn non_ascii() {
179192
}
180193

181194
#[test]
182-
fn invald_escapes() {
195+
fn invalid_escapes() {
183196
assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4);
184197
assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7);
185-
assert_err!(ByteStringLit, r#"b"\"#, UnterminatedString, None);
198+
assert_err!(ByteStringLit, r#"b"\"#, UnterminatedEscape, 2);
186199
assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4);
187200
assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8);
188201
assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7);

0 commit comments

Comments
 (0)