Skip to content

Commit 68b47c2

Browse files
committed
Fix string literals lexing, parsing and displaying
1 parent 4b479f0 commit 68b47c2

File tree

7 files changed

+291
-182
lines changed

7 files changed

+291
-182
lines changed

juniper/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi
1515
- [September 2025] GraphQL spec: ([#1347])
1616
- Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142])
1717
- Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040])
18+
- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349])
1819

1920
### Added
2021

@@ -38,10 +39,12 @@ All user visible changes to `juniper` crate will be documented in this file. Thi
3839

3940
- Incorrect `__Type.specifiedByUrl` field to `__Type.specifiedByURL`. ([#1348])
4041
- Missing `@specifiedBy(url:)` directive in [SDL] generated by `RootNode::as_sdl()` and `RootNode::as_document()` methods. ([#1348])
42+
- Incorrect double escaping in `ScalarToken::String` `Display`ing. ([#1349])
4143

4244
[#864]: /../../issues/864
4345
[#1347]: /../../issues/1347
4446
[#1348]: /../../pull/1348
47+
[#1349]: /../../pull/1349
4548
[graphql/graphql-spec#525]: https://github.com/graphql/graphql-spec/pull/525
4649
[graphql/graphql-spec#805]: https://github.com/graphql/graphql-spec/pull/805
4750
[graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825

juniper/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ pub use crate::{
8585
},
8686
introspection::IntrospectionFormat,
8787
macros::helper::subscription::{ExtractTypeFromStream, IntoFieldResult},
88-
parser::{ParseError, ScalarToken, Span, Spanning},
88+
parser::{ParseError, ScalarToken, Span, Spanning, StringLiteral},
8989
schema::{
9090
meta,
9191
model::{RootNode, SchemaType},

juniper/src/parser/lexer.rs

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::{char, iter::Peekable, ops::Deref, str::CharIndices};
22

33
use derive_more::with_trait::{Display, Error};
4+
//use itertools::Itertools as _;
45

56
use crate::parser::{SourcePosition, Spanning};
67

@@ -14,36 +15,34 @@ pub struct Lexer<'a> {
1415
has_reached_eof: bool,
1516
}
1617

17-
/// A single scalar value literal
18+
/// Representation of a raw unparsed scalar value literal.
1819
///
1920
/// This is only used for tagging how the lexer has interpreted a value literal
2021
#[expect(missing_docs, reason = "self-explanatory")]
2122
#[derive(Clone, Copy, Debug, Display, Eq, PartialEq)]
2223
pub enum ScalarToken<'a> {
23-
String(StringValue<'a>),
24+
String(StringLiteral<'a>),
2425
Float(&'a str),
2526
Int(&'a str),
2627
}
2728

28-
/// Representation of a [String Value].
29+
/// Representation of a raw unparsed [String Value] literal (with quotes included).
2930
///
3031
/// [String Value]: https://spec.graphql.org/October2021#sec-String-Value
3132
#[derive(Clone, Copy, Debug, Display, Eq, PartialEq)]
32-
pub enum StringValue<'a> {
33-
/// [Quoted][0] string representation.
33+
pub enum StringLiteral<'a> {
34+
/// [Quoted][0] literal (denoted by single quotes `"`).
3435
///
3536
/// [0]: https://spec.graphql.org/October2021#StringCharacter
36-
#[display(r#""{}""#, _0.replace('\\', r"\\").replace('"', r#"\""#))]
3737
Quoted(&'a str),
3838

39-
/// [Block][0] string representation.
39+
/// [Block][0] literal (denoted by triple quotes `"""`).
4040
///
4141
/// [0]: https://spec.graphql.org/October2021#BlockStringCharacter
42-
#[display(r#""""{}""""#, _0.replace(r#"""""#, r#"\""""#))]
4342
Block(&'a str),
4443
}
4544

46-
impl Deref for StringValue<'_> {
45+
impl Deref for StringLiteral<'_> {
4746
type Target = str;
4847

4948
fn deref(&self) -> &Self::Target {
@@ -115,6 +114,10 @@ pub enum LexerError {
115114
#[display("Unterminated string literal")]
116115
UnterminatedString,
117116

117+
/// An unterminated block string literal was found.
118+
#[display("Unterminated block string literal")]
119+
UnterminatedBlockString,
120+
118121
/// An unknown character in a string literal was found
119122
///
120123
/// This occurs when an invalid source character is found in a string
@@ -294,8 +297,8 @@ impl<'a> Lexer<'a> {
294297
return Ok(Spanning::start_end(
295298
&start_pos,
296299
&self.position,
297-
Token::Scalar(ScalarToken::String(StringValue::Quoted(
298-
&self.source[start_idx + 1..idx],
300+
Token::Scalar(ScalarToken::String(StringLiteral::Quoted(
301+
&self.source[start_idx..=idx],
299302
))),
300303
));
301304
}
@@ -322,6 +325,77 @@ impl<'a> Lexer<'a> {
322325
))
323326
}
324327

328+
/*
329+
fn scan_block_string(&mut self) -> LexerResult<'a> {
330+
let start_pos = self.position;
331+
let (start_idx, mut start_ch) = self
332+
.next_char()
333+
.ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile))?;
334+
if start_ch != '"' {
335+
return Err(Spanning::zero_width(
336+
&self.position,
337+
LexerError::UnterminatedString,
338+
));
339+
}
340+
for _ in 0..2 {
341+
(_, start_ch) = self.next_char().ok_or_else(|| {
342+
Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile)
343+
})?;
344+
if start_ch != '"' {
345+
return Err(Spanning::zero_width(
346+
&self.position,
347+
LexerError::UnexpectedCharacter(start_ch),
348+
));
349+
}
350+
}
351+
352+
let mut quotes = 0;
353+
let mut escaped = false;
354+
let mut old_pos = self.position;
355+
while let Some((idx, ch)) = self.next_char() {
356+
match ch {
357+
'\\' => escaped = true,
358+
359+
360+
'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => {
361+
escaped = false;
362+
}
363+
'u' if escaped => {
364+
self.scan_escaped_unicode(&old_pos)?;
365+
escaped = false;
366+
}
367+
c if escaped => {
368+
return Err(Spanning::zero_width(
369+
&old_pos,
370+
LexerError::UnknownEscapeSequence(format!("\\{c}")),
371+
));
372+
}
373+
374+
375+
376+
'"' if !escaped => {
377+
return Ok(Spanning::start_end(
378+
&start_pos,
379+
&self.position,
380+
Token::Scalar(ScalarToken::String(StringValue::Quoted(
381+
&self.source[start_idx + 1..idx],
382+
))),
383+
));
384+
}
385+
386+
_ => {}
387+
}
388+
old_pos = self.position;
389+
}
390+
391+
Err(Spanning::zero_width(
392+
&self.position,
393+
LexerError::UnterminatedBlockString,
394+
))
395+
}
396+
397+
*/
398+
325399
fn scan_escaped_unicode(
326400
&mut self,
327401
start_pos: &SourcePosition,

juniper/src/parser/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod tests;
1313
pub use self::document::parse_document_source;
1414

1515
pub use self::{
16-
lexer::{Lexer, LexerError, ScalarToken, StringValue, Token},
16+
lexer::{Lexer, LexerError, ScalarToken, StringLiteral, Token},
1717
parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult},
1818
utils::{SourcePosition, Span, Spanning},
1919
};

juniper/src/parser/parser.rs

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
use std::fmt;
1+
use std::{borrow::Cow, fmt};
22

33
use compact_str::{CompactString, format_compact};
44
use derive_more::with_trait::{Display, Error};
55

6-
use crate::parser::{Lexer, LexerError, Spanning, Token};
6+
use crate::parser::{Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token};
77

88
/// Error while parsing a GraphQL query
99
#[derive(Clone, Debug, Display, Eq, Error, PartialEq)]
@@ -199,3 +199,128 @@ impl<'a> Parser<'a> {
199199
}
200200
}
201201
}
202+
203+
impl<'a> StringLiteral<'a> {
204+
/// Parses this [`StringLiteral`] returning an unescaped and unquoted string value.
205+
///
206+
/// # Errors
207+
///
208+
/// If this [`StringLiteral`] is invalid.
209+
pub fn parse(self) -> Result<Cow<'a, str>, ParseError> {
210+
match self {
211+
Self::Quoted(lit) => {
212+
if !lit.starts_with('"') {
213+
return Err(ParseError::unexpected_token(Token::Scalar(
214+
ScalarToken::String(self),
215+
)));
216+
}
217+
if !lit.ends_with('"') {
218+
return Err(ParseError::LexerError(LexerError::UnterminatedString));
219+
}
220+
221+
let unquoted = &lit[1..lit.len() - 1];
222+
if !unquoted.contains('\\') {
223+
return Ok(unquoted.into());
224+
}
225+
226+
let mut unescaped = String::with_capacity(unquoted.len());
227+
let mut char_iter = unquoted.chars();
228+
while let Some(ch) = char_iter.next() {
229+
match ch {
230+
'\\' => match char_iter.next() {
231+
Some('"') => {
232+
unescaped.push('"');
233+
}
234+
Some('/') => {
235+
unescaped.push('/');
236+
}
237+
Some('n') => {
238+
unescaped.push('\n');
239+
}
240+
Some('r') => {
241+
unescaped.push('\r');
242+
}
243+
Some('t') => {
244+
unescaped.push('\t');
245+
}
246+
Some('\\') => {
247+
unescaped.push('\\');
248+
}
249+
Some('f') => {
250+
unescaped.push('\u{000c}');
251+
}
252+
Some('b') => {
253+
unescaped.push('\u{0008}');
254+
}
255+
Some('u') => {
256+
unescaped.push(parse_unicode_codepoint(&mut char_iter)?);
257+
}
258+
Some(s) => {
259+
return Err(ParseError::LexerError(
260+
LexerError::UnknownEscapeSequence(format!(r"\{s}")),
261+
));
262+
}
263+
None => {
264+
return Err(ParseError::LexerError(LexerError::UnterminatedString));
265+
}
266+
},
267+
ch => {
268+
unescaped.push(ch);
269+
}
270+
}
271+
}
272+
Ok(unescaped.into())
273+
}
274+
Self::Block(_) => todo!(),
275+
}
276+
}
277+
}
278+
279+
fn parse_unicode_codepoint<I>(char_iter: &mut I) -> Result<char, ParseError>
280+
where
281+
I: Iterator<Item = char>,
282+
{
283+
let escaped_code_point = char_iter
284+
.next()
285+
.ok_or_else(|| ParseError::LexerError(LexerError::UnknownEscapeSequence(r"\u".into())))
286+
.and_then(|c1| {
287+
char_iter
288+
.next()
289+
.map(|c2| format!("{c1}{c2}"))
290+
.ok_or_else(|| {
291+
ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{c1}")))
292+
})
293+
})
294+
.and_then(|mut s| {
295+
char_iter
296+
.next()
297+
.ok_or_else(|| {
298+
ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}")))
299+
})
300+
.map(|c2| {
301+
s.push(c2);
302+
s
303+
})
304+
})
305+
.and_then(|mut s| {
306+
char_iter
307+
.next()
308+
.ok_or_else(|| {
309+
ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}")))
310+
})
311+
.map(|c2| {
312+
s.push(c2);
313+
s
314+
})
315+
})?;
316+
let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| {
317+
ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(
318+
r"\u{escaped_code_point}",
319+
)))
320+
})?;
321+
char::from_u32(code_point).ok_or_else(|| {
322+
ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(
323+
r"\u{escaped_code_point}",
324+
)))
325+
})
326+
}

0 commit comments

Comments
 (0)