From 4b479f0099bbb67ce9adf11e78dab33df31de9c0 Mon Sep 17 00:00:00 2001 From: tyranron Date: Fri, 12 Sep 2025 19:29:37 +0300 Subject: [PATCH 01/13] Bootstrap block string in lexer --- juniper/src/ast.rs | 1 + juniper/src/parser/lexer.rs | 38 +++++++++++++++++++++++++++---- juniper/src/parser/mod.rs | 2 +- juniper/src/parser/tests/lexer.rs | 38 +++++++++++++++++++++++-------- juniper/src/types/scalars.rs | 23 ++++++++++++------- 5 files changed, 80 insertions(+), 22 deletions(-) diff --git a/juniper/src/ast.rs b/juniper/src/ast.rs index 0c2208cb2..4f22bcc65 100644 --- a/juniper/src/ast.rs +++ b/juniper/src/ast.rs @@ -384,6 +384,7 @@ pub enum OperationType { #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Debug, PartialEq)] pub struct Operation<'a, S> { + //pub description: Option>, pub operation_type: OperationType, pub name: Option>, pub variable_definitions: Option>>, diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 56b1a61de..533dca382 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,4 +1,4 @@ -use std::{char, iter::Peekable, str::CharIndices}; +use std::{char, iter::Peekable, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; @@ -20,12 +20,40 @@ pub struct Lexer<'a> { #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] pub enum ScalarToken<'a> { - #[display("\"{}\"", _0.replace('\\', "\\\\").replace('"', "\\\""))] - String(&'a str), + String(StringValue<'a>), Float(&'a str), Int(&'a str), } +/// Representation of a [String Value]. +/// +/// [String Value]: https://spec.graphql.org/October2021#sec-String-Value +#[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] +pub enum StringValue<'a> { + /// [Quoted][0] string representation. + /// + /// [0]: https://spec.graphql.org/October2021#StringCharacter + #[display(r#""{}""#, _0.replace('\\', r"\\").replace('"', r#"\""#))] + Quoted(&'a str), + + /// [Block][0] string representation. + /// + /// [0]: https://spec.graphql.org/October2021#BlockStringCharacter + #[display(r#""""{}""""#, _0.replace(r#"""""#, r#"\""""#))] + Block(&'a str), +} + +impl Deref for StringValue<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match self { + Self::Quoted(s) => s, + Self::Block(s) => s, + } + } +} + /// A single token in the input source #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] @@ -266,7 +294,9 @@ impl<'a> Lexer<'a> { return Ok(Spanning::start_end( &start_pos, &self.position, - Token::Scalar(ScalarToken::String(&self.source[start_idx + 1..idx])), + Token::Scalar(ScalarToken::String(StringValue::Quoted( + &self.source[start_idx + 1..idx], + ))), )); } '\n' | '\r' => { diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index eb2d0d3c1..ea41ff338 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -13,7 +13,7 @@ mod tests; pub use self::document::parse_document_source; pub use self::{ - lexer::{Lexer, LexerError, ScalarToken, Token}, + lexer::{Lexer, LexerError, ScalarToken, StringValue, Token}, parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs index 0eb62ae73..068f57673 100644 --- a/juniper/src/parser/tests/lexer.rs +++ b/juniper/src/parser/tests/lexer.rs @@ -1,4 +1,4 @@ -use crate::parser::{Lexer, LexerError, ScalarToken, SourcePosition, Spanning, Token}; +use crate::parser::{Lexer, LexerError, ScalarToken, SourcePosition, Spanning, StringValue, Token}; fn tokenize_to_vec(s: &str) -> Vec>> { let mut tokens = Vec::new(); @@ -21,6 +21,7 @@ fn tokenize_to_vec(s: &str) -> Vec>> { tokens } +#[track_caller] fn tokenize_single(s: &str) -> Spanning> { let mut tokens = tokenize_to_vec(s); @@ -151,7 +152,7 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String("simple")) + Token::Scalar(ScalarToken::String(StringValue::Quoted("simple"))) ) ); @@ -160,7 +161,7 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(" white space ")) + Token::Scalar(ScalarToken::String(StringValue::Quoted(" white space "))) ) ); @@ -169,7 +170,7 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(10, 0, 10), - Token::Scalar(ScalarToken::String(r#"quote \""#)) + Token::Scalar(ScalarToken::String(StringValue::Quoted(r#"quote \""#))) ) ); @@ -178,7 +179,9 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(20, 0, 20), - Token::Scalar(ScalarToken::String(r"escaped \n\r\b\t\f")) + Token::Scalar(ScalarToken::String(StringValue::Quoted( + r"escaped \n\r\b\t\f" + ))) ) ); @@ -187,7 +190,7 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(r"slashes \\ \/")) + Token::Scalar(ScalarToken::String(StringValue::Quoted(r"slashes \\ \/"))) ) ); @@ -196,7 +199,21 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(34, 0, 34), - Token::Scalar(ScalarToken::String(r"unicode \u1234\u5678\u90AB\uCDEF")), + Token::Scalar(ScalarToken::String(StringValue::Quoted( + r"unicode \u1234\u5678\u90AB\uCDEF" + ))), + ) + ); +} + +#[test] +fn block_strings() { + assert_eq!( + tokenize_single(r#""""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(8, 0, 8), + Token::Scalar(ScalarToken::String(StringValue::Block("".into()))), ) ); } @@ -665,13 +682,16 @@ fn display() { (Token::Scalar(ScalarToken::Int("123")), "123"), (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), ( - Token::Scalar(ScalarToken::String("some string")), + Token::Scalar(ScalarToken::String(StringValue::Quoted("some string"))), "\"some string\"", ), ( - Token::Scalar(ScalarToken::String("string with \\ escape and \" quote")), + Token::Scalar(ScalarToken::String(StringValue::Quoted( + "string with \\ escape and \" quote", + ))), "\"string with \\\\ escape and \\\" quote\"", ), + // TODO: tests for block string (Token::ExclamationMark, "!"), (Token::Dollar, "$"), (Token::ParenOpen, "("), diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index 8467e8c0b..a6e9a793d 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -68,6 +68,7 @@ type String = std::string::String; mod impl_string_scalar { use super::*; + use crate::parser::StringValue; impl<'s, S> FromScalarValue<'s, S> for String where @@ -81,7 +82,7 @@ mod impl_string_scalar { } pub(super) fn parse_token(value: ScalarToken<'_>) -> ParseScalarResult { - if let ScalarToken::String(value) = value { + if let ScalarToken::String(StringValue::Quoted(value)) = value { let mut ret = String::with_capacity(value.len()); let mut char_iter = value.chars(); while let Some(ch) = char_iter.next() { @@ -128,6 +129,7 @@ mod impl_string_scalar { } Ok(ret.into()) } else { + // TODO: Support block string too. Err(ParseError::unexpected_token(Token::Scalar(value))) } } @@ -571,7 +573,9 @@ mod tests { #[test] fn parse_strings() { - fn parse_string(s: &str, expected: &str) { + use crate::parser::StringValue::{self, Quoted}; + + fn parse_string(s: StringValue<'_>, expected: &str) { let s = >::from_str(ScalarToken::String(s)); assert!(s.is_ok(), "A parsing error occurred: {s:?}"); @@ -580,13 +584,16 @@ mod tests { assert_eq!(s.unwrap(), expected); } - parse_string("simple", "simple"); - parse_string(" white space ", " white space "); - parse_string(r#"quote \""#, "quote \""); - parse_string(r"escaped \n\r\b\t\f", "escaped \n\r\u{0008}\t\u{000c}"); - parse_string(r"slashes \\ \/", "slashes \\ /"); + parse_string(Quoted("simple"), "simple"); + parse_string(Quoted(" white space "), " white space "); + parse_string(Quoted(r#"quote \""#), "quote \""); + parse_string( + Quoted(r"escaped \n\r\b\t\f"), + "escaped \n\r\u{0008}\t\u{000c}", + ); + parse_string(Quoted(r"slashes \\ \/"), "slashes \\ /"); parse_string( - r"unicode \u1234\u5678\u90AB\uCDEF", + Quoted(r"unicode \u1234\u5678\u90AB\uCDEF"), "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", ); } From 68b47c2f51f701819fa790b7df85ba60ab89c0cd Mon Sep 17 00:00:00 2001 From: tyranron Date: Fri, 12 Sep 2025 23:15:20 +0300 Subject: [PATCH 02/13] Fix string literals lexing, parsing and displaying --- juniper/CHANGELOG.md | 3 + juniper/src/lib.rs | 2 +- juniper/src/parser/lexer.rs | 96 ++++++++++++++++--- juniper/src/parser/mod.rs | 2 +- juniper/src/parser/parser.rs | 129 +++++++++++++++++++++++++- juniper/src/parser/tests/lexer.rs | 94 ++++++++++--------- juniper/src/types/scalars.rs | 147 +++++------------------------- 7 files changed, 291 insertions(+), 182 deletions(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 4863ea4bf..03504ca2d 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -15,6 +15,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - [September 2025] GraphQL spec: ([#1347]) - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) +- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) ### Added @@ -38,10 +39,12 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Incorrect `__Type.specifiedByUrl` field to `__Type.specifiedByURL`. ([#1348]) - Missing `@specifiedBy(url:)` directive in [SDL] generated by `RootNode::as_sdl()` and `RootNode::as_document()` methods. ([#1348]) +- Incorrect double escaping in `ScalarToken::String` `Display`ing. ([#1349]) [#864]: /../../issues/864 [#1347]: /../../issues/1347 [#1348]: /../../pull/1348 +[#1349]: /../../pull/1349 [graphql/graphql-spec#525]: https://github.com/graphql/graphql-spec/pull/525 [graphql/graphql-spec#805]: https://github.com/graphql/graphql-spec/pull/805 [graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825 diff --git a/juniper/src/lib.rs b/juniper/src/lib.rs index e79c5290a..f0f563313 100644 --- a/juniper/src/lib.rs +++ b/juniper/src/lib.rs @@ -85,7 +85,7 @@ pub use crate::{ }, introspection::IntrospectionFormat, macros::helper::subscription::{ExtractTypeFromStream, IntoFieldResult}, - parser::{ParseError, ScalarToken, Span, Spanning}, + parser::{ParseError, ScalarToken, Span, Spanning, StringLiteral}, schema::{ meta, model::{RootNode, SchemaType}, diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 533dca382..c678da927 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,6 +1,7 @@ use std::{char, iter::Peekable, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; +//use itertools::Itertools as _; use crate::parser::{SourcePosition, Spanning}; @@ -14,36 +15,34 @@ pub struct Lexer<'a> { has_reached_eof: bool, } -/// A single scalar value literal +/// Representation of a raw unparsed scalar value literal. /// /// This is only used for tagging how the lexer has interpreted a value literal #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] pub enum ScalarToken<'a> { - String(StringValue<'a>), + String(StringLiteral<'a>), Float(&'a str), Int(&'a str), } -/// Representation of a [String Value]. +/// Representation of a raw unparsed [String Value] literal (with quotes included). /// /// [String Value]: https://spec.graphql.org/October2021#sec-String-Value #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] -pub enum StringValue<'a> { - /// [Quoted][0] string representation. +pub enum StringLiteral<'a> { + /// [Quoted][0] literal (denoted by single quotes `"`). /// /// [0]: https://spec.graphql.org/October2021#StringCharacter - #[display(r#""{}""#, _0.replace('\\', r"\\").replace('"', r#"\""#))] Quoted(&'a str), - /// [Block][0] string representation. + /// [Block][0] literal (denoted by triple quotes `"""`). /// /// [0]: https://spec.graphql.org/October2021#BlockStringCharacter - #[display(r#""""{}""""#, _0.replace(r#"""""#, r#"\""""#))] Block(&'a str), } -impl Deref for StringValue<'_> { +impl Deref for StringLiteral<'_> { type Target = str; fn deref(&self) -> &Self::Target { @@ -115,6 +114,10 @@ pub enum LexerError { #[display("Unterminated string literal")] UnterminatedString, + /// An unterminated block string literal was found. + #[display("Unterminated block string literal")] + UnterminatedBlockString, + /// An unknown character in a string literal was found /// /// This occurs when an invalid source character is found in a string @@ -294,8 +297,8 @@ impl<'a> Lexer<'a> { return Ok(Spanning::start_end( &start_pos, &self.position, - Token::Scalar(ScalarToken::String(StringValue::Quoted( - &self.source[start_idx + 1..idx], + Token::Scalar(ScalarToken::String(StringLiteral::Quoted( + &self.source[start_idx..=idx], ))), )); } @@ -322,6 +325,77 @@ impl<'a> Lexer<'a> { )) } + /* + fn scan_block_string(&mut self) -> LexerResult<'a> { + let start_pos = self.position; + let (start_idx, mut start_ch) = self + .next_char() + .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile))?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedString, + )); + } + for _ in 0..2 { + (_, start_ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile) + })?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnexpectedCharacter(start_ch), + )); + } + } + + let mut quotes = 0; + let mut escaped = false; + let mut old_pos = self.position; + while let Some((idx, ch)) = self.next_char() { + match ch { + '\\' => escaped = true, + + + 'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => { + escaped = false; + } + 'u' if escaped => { + self.scan_escaped_unicode(&old_pos)?; + escaped = false; + } + c if escaped => { + return Err(Spanning::zero_width( + &old_pos, + LexerError::UnknownEscapeSequence(format!("\\{c}")), + )); + } + + + + '"' if !escaped => { + return Ok(Spanning::start_end( + &start_pos, + &self.position, + Token::Scalar(ScalarToken::String(StringValue::Quoted( + &self.source[start_idx + 1..idx], + ))), + )); + } + + _ => {} + } + old_pos = self.position; + } + + Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedBlockString, + )) + } + + */ + fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index ea41ff338..f7d98b114 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -13,7 +13,7 @@ mod tests; pub use self::document::parse_document_source; pub use self::{ - lexer::{Lexer, LexerError, ScalarToken, StringValue, Token}, + lexer::{Lexer, LexerError, ScalarToken, StringLiteral, Token}, parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index 2459df205..3388987d1 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -1,9 +1,9 @@ -use std::fmt; +use std::{borrow::Cow, fmt}; use compact_str::{CompactString, format_compact}; use derive_more::with_trait::{Display, Error}; -use crate::parser::{Lexer, LexerError, Spanning, Token}; +use crate::parser::{Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token}; /// Error while parsing a GraphQL query #[derive(Clone, Debug, Display, Eq, Error, PartialEq)] @@ -199,3 +199,128 @@ impl<'a> Parser<'a> { } } } + +impl<'a> StringLiteral<'a> { + /// Parses this [`StringLiteral`] returning an unescaped and unquoted string value. + /// + /// # Errors + /// + /// If this [`StringLiteral`] is invalid. + pub fn parse(self) -> Result, ParseError> { + match self { + Self::Quoted(lit) => { + if !lit.starts_with('"') { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with('"') { + return Err(ParseError::LexerError(LexerError::UnterminatedString)); + } + + let unquoted = &lit[1..lit.len() - 1]; + if !unquoted.contains('\\') { + return Ok(unquoted.into()); + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut char_iter = unquoted.chars(); + while let Some(ch) = char_iter.next() { + match ch { + '\\' => match char_iter.next() { + Some('"') => { + unescaped.push('"'); + } + Some('/') => { + unescaped.push('/'); + } + Some('n') => { + unescaped.push('\n'); + } + Some('r') => { + unescaped.push('\r'); + } + Some('t') => { + unescaped.push('\t'); + } + Some('\\') => { + unescaped.push('\\'); + } + Some('f') => { + unescaped.push('\u{000c}'); + } + Some('b') => { + unescaped.push('\u{0008}'); + } + Some('u') => { + unescaped.push(parse_unicode_codepoint(&mut char_iter)?); + } + Some(s) => { + return Err(ParseError::LexerError( + LexerError::UnknownEscapeSequence(format!(r"\{s}")), + )); + } + None => { + return Err(ParseError::LexerError(LexerError::UnterminatedString)); + } + }, + ch => { + unescaped.push(ch); + } + } + } + Ok(unescaped.into()) + } + Self::Block(_) => todo!(), + } + } +} + +fn parse_unicode_codepoint(char_iter: &mut I) -> Result +where + I: Iterator, +{ + let escaped_code_point = char_iter + .next() + .ok_or_else(|| ParseError::LexerError(LexerError::UnknownEscapeSequence(r"\u".into()))) + .and_then(|c1| { + char_iter + .next() + .map(|c2| format!("{c1}{c2}")) + .ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{c1}"))) + }) + }) + .and_then(|mut s| { + char_iter + .next() + .ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}"))) + }) + .map(|c2| { + s.push(c2); + s + }) + }) + .and_then(|mut s| { + char_iter + .next() + .ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}"))) + }) + .map(|c2| { + s.push(c2); + s + }) + })?; + let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}", + ))) + })?; + char::from_u32(code_point).ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}", + ))) + }) +} diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs index 068f57673..2d34f91d2 100644 --- a/juniper/src/parser/tests/lexer.rs +++ b/juniper/src/parser/tests/lexer.rs @@ -1,5 +1,10 @@ -use crate::parser::{Lexer, LexerError, ScalarToken, SourcePosition, Spanning, StringValue, Token}; +use crate::parser::{ + Lexer, LexerError, ScalarToken, SourcePosition, Spanning, + StringLiteral::{Block, Quoted}, + Token, +}; +#[track_caller] fn tokenize_to_vec(s: &str) -> Vec>> { let mut tokens = Vec::new(); let mut lexer = Lexer::new(s); @@ -152,8 +157,8 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String(StringValue::Quoted("simple"))) - ) + Token::Scalar(ScalarToken::String(Quoted(r#""simple""#))), + ), ); assert_eq!( @@ -161,8 +166,8 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(StringValue::Quoted(" white space "))) - ) + Token::Scalar(ScalarToken::String(Quoted(r#"" white space ""#))), + ), ); assert_eq!( @@ -170,8 +175,8 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(10, 0, 10), - Token::Scalar(ScalarToken::String(StringValue::Quoted(r#"quote \""#))) - ) + Token::Scalar(ScalarToken::String(Quoted(r#""quote \"""#))), + ), ); assert_eq!( @@ -179,10 +184,8 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(20, 0, 20), - Token::Scalar(ScalarToken::String(StringValue::Quoted( - r"escaped \n\r\b\t\f" - ))) - ) + Token::Scalar(ScalarToken::String(Quoted(r#""escaped \n\r\b\t\f""#))), + ), ); assert_eq!( @@ -190,8 +193,8 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(StringValue::Quoted(r"slashes \\ \/"))) - ) + Token::Scalar(ScalarToken::String(Quoted(r#""slashes \\ \/""#))), + ), ); assert_eq!( @@ -199,41 +202,42 @@ fn strings() { Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(34, 0, 34), - Token::Scalar(ScalarToken::String(StringValue::Quoted( - r"unicode \u1234\u5678\u90AB\uCDEF" + Token::Scalar(ScalarToken::String(Quoted( + r#""unicode \u1234\u5678\u90AB\uCDEF""# ))), - ) + ), ); } #[test] +#[ignore] fn block_strings() { assert_eq!( tokenize_single(r#""""""#), Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String(StringValue::Block("".into()))), - ) + Token::Scalar(ScalarToken::String(Block(r#""""""#))), + ), ); } #[test] fn string_errors() { assert_eq!( - tokenize_error("\""), + tokenize_error(r#"""#), Spanning::zero_width( &SourcePosition::new(1, 0, 1), LexerError::UnterminatedString, - ) + ), ); assert_eq!( - tokenize_error("\"no end quote"), + tokenize_error(r#""no end quote"#), Spanning::zero_width( &SourcePosition::new(13, 0, 13), LexerError::UnterminatedString, - ) + ), ); assert_eq!( @@ -241,7 +245,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(20, 0, 20), LexerError::UnknownCharacterInString('\u{0007}'), - ) + ), ); assert_eq!( @@ -249,7 +253,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(18, 0, 18), LexerError::UnknownCharacterInString('\u{0000}'), - ) + ), ); assert_eq!( @@ -257,7 +261,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnterminatedString, - ) + ), ); assert_eq!( @@ -265,7 +269,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnterminatedString, - ) + ), ); assert_eq!( @@ -273,7 +277,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\z".into()), - ) + ), ); assert_eq!( @@ -281,7 +285,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\x".into()), - ) + ), ); assert_eq!( @@ -289,7 +293,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\u1".into()), - ) + ), ); assert_eq!( @@ -297,7 +301,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\u0XX1".into()), - ) + ), ); assert_eq!( @@ -305,7 +309,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\uXXXX".into()), - ) + ), ); assert_eq!( @@ -313,7 +317,7 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\uFXXX".into()), - ) + ), ); assert_eq!( @@ -321,23 +325,23 @@ fn string_errors() { Spanning::zero_width( &SourcePosition::new(6, 0, 6), LexerError::UnknownEscapeSequence("\\uXXXF".into()), - ) + ), ); assert_eq!( tokenize_error(r#""unterminated in string \""#), Spanning::zero_width( &SourcePosition::new(26, 0, 26), - LexerError::UnterminatedString - ) + LexerError::UnterminatedString, + ), ); assert_eq!( tokenize_error(r#""unterminated \"#), Spanning::zero_width( &SourcePosition::new(15, 0, 15), - LexerError::UnterminatedString - ) + LexerError::UnterminatedString, + ), ); // Found by fuzzing. @@ -345,8 +349,8 @@ fn string_errors() { tokenize_error(r#""\uɠ^A"#), Spanning::zero_width( &SourcePosition::new(5, 0, 5), - LexerError::UnterminatedString - ) + LexerError::UnterminatedString, + ), ); } @@ -682,16 +686,16 @@ fn display() { (Token::Scalar(ScalarToken::Int("123")), "123"), (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), ( - Token::Scalar(ScalarToken::String(StringValue::Quoted("some string"))), - "\"some string\"", + Token::Scalar(ScalarToken::String(Quoted(r#""some string""#))), + r#""some string""#, ), ( - Token::Scalar(ScalarToken::String(StringValue::Quoted( - "string with \\ escape and \" quote", + Token::Scalar(ScalarToken::String(Quoted( + r#""string with \\ escape and \" quote""#, ))), - "\"string with \\\\ escape and \\\" quote\"", + r#""string with \\ escape and \" quote""#, ), - // TODO: tests for block string + // TODO: Tests for `Block` string. (Token::ExclamationMark, "!"), (Token::Dollar, "$"), (Token::ParenOpen, "("), diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index a6e9a793d..612ff8036 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -1,4 +1,4 @@ -use std::{char, marker::PhantomData, rc::Rc, thread::JoinHandle}; +use std::{marker::PhantomData, rc::Rc, thread::JoinHandle}; use derive_more::with_trait::{Deref, Display, From, Into}; use serde::{Deserialize, Serialize}; @@ -9,7 +9,7 @@ use crate::{ executor::{ExecutionResult, Executor, Registry}, graphql_scalar, macros::reflect, - parser::{LexerError, ParseError, ScalarToken, Token}, + parser::{ParseError, ScalarToken, Token}, schema::meta::MetaType, types::{ async_await::GraphQLValueAsync, @@ -68,7 +68,6 @@ type String = std::string::String; mod impl_string_scalar { use super::*; - use crate::parser::StringValue; impl<'s, S> FromScalarValue<'s, S> for String where @@ -82,52 +81,10 @@ mod impl_string_scalar { } pub(super) fn parse_token(value: ScalarToken<'_>) -> ParseScalarResult { - if let ScalarToken::String(StringValue::Quoted(value)) = value { - let mut ret = String::with_capacity(value.len()); - let mut char_iter = value.chars(); - while let Some(ch) = char_iter.next() { - match ch { - '\\' => match char_iter.next() { - Some('"') => { - ret.push('"'); - } - Some('/') => { - ret.push('/'); - } - Some('n') => { - ret.push('\n'); - } - Some('r') => { - ret.push('\r'); - } - Some('t') => { - ret.push('\t'); - } - Some('\\') => { - ret.push('\\'); - } - Some('f') => { - ret.push('\u{000c}'); - } - Some('b') => { - ret.push('\u{0008}'); - } - Some('u') => { - ret.push(parse_unicode_codepoint(&mut char_iter)?); - } - Some(s) => { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - format!("\\{s}"), - ))); - } - None => return Err(ParseError::LexerError(LexerError::UnterminatedString)), - }, - ch => { - ret.push(ch); - } - } - } - Ok(ret.into()) + if let ScalarToken::String(lit) = value { + let parsed = lit.parse()?; + // TODO: Allow cheaper from `Cow<'_, str>` conversions for `ScalarValue`. + Ok(parsed.into_owned().into()) } else { // TODO: Support block string too. Err(ParseError::unexpected_token(Token::Scalar(value))) @@ -135,57 +92,6 @@ mod impl_string_scalar { } } -fn parse_unicode_codepoint(char_iter: &mut I) -> Result -where - I: Iterator, -{ - let escaped_code_point = char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(String::from("\\u"))) - }) - .and_then(|c1| { - char_iter - .next() - .map(|c2| format!("{c1}{c2}")) - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{c1}"))) - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - })?; - let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - })?; - char::from_u32(code_point).ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - }) -} - #[graphql_scalar] #[graphql( name = "String", @@ -539,7 +445,7 @@ impl Default for EmptySubscription { #[cfg(test)] mod tests { use crate::{ - parser::ScalarToken, + parser::{ScalarToken, StringLiteral}, value::{DefaultScalarValue, ParseScalarValue, ScalarValue as _}, }; @@ -573,29 +479,26 @@ mod tests { #[test] fn parse_strings() { - use crate::parser::StringValue::{self, Quoted}; - - fn parse_string(s: StringValue<'_>, expected: &str) { - let s = - >::from_str(ScalarToken::String(s)); - assert!(s.is_ok(), "A parsing error occurred: {s:?}"); - let s: Option = s.unwrap().try_to().ok(); - assert!(s.is_some(), "No string returned"); + for (input, expected) in [ + (r#""simple""#, "simple"), + (r#"" white space ""#, " white space "), + (r#""quote \"""#, r#"quote ""#), + (r#""escaped \n\r\b\t\f""#, "escaped \n\r\u{0008}\t\u{000c}"), + (r#""slashes \\ \/""#, r"slashes \ /"), + ( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Quoted(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); + + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); assert_eq!(s.unwrap(), expected); } - - parse_string(Quoted("simple"), "simple"); - parse_string(Quoted(" white space "), " white space "); - parse_string(Quoted(r#"quote \""#), "quote \""); - parse_string( - Quoted(r"escaped \n\r\b\t\f"), - "escaped \n\r\u{0008}\t\u{000c}", - ); - parse_string(Quoted(r"slashes \\ \/"), "slashes \\ /"); - parse_string( - Quoted(r"unicode \u1234\u5678\u90AB\uCDEF"), - "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", - ); } #[test] From 0d1220062796c1559e2ae58a22cfa85257f0a1d2 Mon Sep 17 00:00:00 2001 From: tyranron Date: Sat, 13 Sep 2025 00:56:50 +0300 Subject: [PATCH 03/13] Lex block strings --- juniper/src/parser/lexer.rs | 56 ++--- juniper/src/parser/tests/lexer.rs | 197 ++++++++++++++++-- juniper/src/tests/fixtures/starwars/schema.rs | 3 +- 3 files changed, 199 insertions(+), 57 deletions(-) diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index c678da927..61fc07b35 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,14 +1,13 @@ -use std::{char, iter::Peekable, ops::Deref, str::CharIndices}; +use std::{char, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; -//use itertools::Itertools as _; use crate::parser::{SourcePosition, Spanning}; #[doc(hidden)] #[derive(Debug)] pub struct Lexer<'a> { - iterator: Peekable>, + iterator: itertools::PeekNth>, source: &'a str, length: usize, position: SourcePosition, @@ -150,7 +149,7 @@ impl<'a> Lexer<'a> { #[doc(hidden)] pub fn new(source: &'a str) -> Lexer<'a> { Lexer { - iterator: source.char_indices().peekable(), + iterator: itertools::peek_nth(source.char_indices()), source, length: source.len(), position: SourcePosition::new_origin(), @@ -325,7 +324,6 @@ impl<'a> Lexer<'a> { )) } - /* fn scan_block_string(&mut self) -> LexerResult<'a> { let start_pos = self.position; let (start_idx, mut start_ch) = self @@ -349,43 +347,23 @@ impl<'a> Lexer<'a> { } } - let mut quotes = 0; - let mut escaped = false; - let mut old_pos = self.position; + let (mut quotes, mut escaped) = (0, false); while let Some((idx, ch)) = self.next_char() { match ch { - '\\' => escaped = true, - - - 'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => { - escaped = false; - } - 'u' if escaped => { - self.scan_escaped_unicode(&old_pos)?; - escaped = false; - } - c if escaped => { - return Err(Spanning::zero_width( - &old_pos, - LexerError::UnknownEscapeSequence(format!("\\{c}")), - )); - } - - - - '"' if !escaped => { + '\\' => (quotes, escaped) = (0, true), + '"' if escaped => (quotes, escaped) = (0, false), + '"' if quotes < 2 => quotes += 1, + '"' if quotes == 2 => { return Ok(Spanning::start_end( &start_pos, &self.position, - Token::Scalar(ScalarToken::String(StringValue::Quoted( - &self.source[start_idx + 1..idx], + Token::Scalar(ScalarToken::String(StringLiteral::Block( + &self.source[start_idx..=idx], ))), )); } - - _ => {} + _ => (quotes, escaped) = (0, false), } - old_pos = self.position; } Err(Spanning::zero_width( @@ -394,8 +372,6 @@ impl<'a> Lexer<'a> { )) } - */ - fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, @@ -584,7 +560,15 @@ impl<'a> Iterator for Lexer<'a> { Some('@') => Ok(self.emit_single_char(Token::At)), Some('|') => Ok(self.emit_single_char(Token::Pipe)), Some('.') => self.scan_ellipsis(), - Some('"') => self.scan_string(), + Some('"') => { + if self.iterator.peek_nth(1).map(|&(_, ch)| ch) == Some('"') + && self.iterator.peek_nth(2).map(|&(_, ch)| ch) == Some('"') + { + self.scan_block_string() + } else { + self.scan_string() + } + } Some(ch) => { if is_number_start(ch) { self.scan_number() diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs index 2d34f91d2..9bd5a4d01 100644 --- a/juniper/src/parser/tests/lexer.rs +++ b/juniper/src/parser/tests/lexer.rs @@ -18,8 +18,8 @@ fn tokenize_to_vec(s: &str) -> Vec>> { break; } } - Some(Err(e)) => panic!("Error in input stream: {e:#?} for {s:#?}"), - None => panic!("EOF before EndOfFile token in {s:#?}"), + Some(Err(e)) => panic!("error in input stream: {e} for {s:#?}"), + None => panic!("EOF before `Token::EndOfFile` in {s:#?}"), } } @@ -36,6 +36,7 @@ fn tokenize_single(s: &str) -> Spanning> { tokens.remove(0) } +#[track_caller] fn tokenize_error(s: &str) -> Spanning { let mut lexer = Lexer::new(s); @@ -43,13 +44,13 @@ fn tokenize_error(s: &str) -> Spanning { match lexer.next() { Some(Ok(t)) => { if t.item == Token::EndOfFile { - panic!("Tokenizer did not return error for {s:#?}"); + panic!("lexer did not return error for {s:#?}"); } } Some(Err(e)) => { return e; } - None => panic!("Tokenizer did not return error for {s:#?}"), + None => panic!("lexer did not return error for {s:#?}"), } } } @@ -140,13 +141,13 @@ fn error_positions() { ? - "# + "#, ) .next(), Some(Err(Spanning::zero_width( &SourcePosition::new(14, 2, 12), - LexerError::UnknownCharacter('?') - ))) + LexerError::UnknownCharacter('?'), + ))), ); } @@ -209,19 +210,6 @@ fn strings() { ); } -#[test] -#[ignore] -fn block_strings() { - assert_eq!( - tokenize_single(r#""""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String(Block(r#""""""#))), - ), - ); -} - #[test] fn string_errors() { assert_eq!( @@ -354,6 +342,175 @@ fn string_errors() { ); } +#[test] +fn block_strings() { + assert_eq!( + tokenize_single(r#""""""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(6, 0, 6), + Token::Scalar(ScalarToken::String(Block(r#""""""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""simple""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(12, 0, 12), + Token::Scalar(ScalarToken::String(Block(r#""""simple""""#))), + ), + ); + assert_eq!( + tokenize_single(r#"""" white space """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(19, 0, 19), + Token::Scalar(ScalarToken::String(Block(r#"""" white space """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains " quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""contains " quote""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \"" double quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \"" double quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \\""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(33, 0, 33), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \\""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""\"""quote" """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""\"""quote" """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\nline""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""multi\nline""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\rline\r\nnormalized""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""multi\rline\r\nnormalized""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(38, 0, 38), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped unicode outside BMP \u{1f600}""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(45, 0, 45), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""slashes \\\\ \\/""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""slashes \\\\ \\/""""#))), + ), + ); + assert_eq!( + tokenize_single( + r#"""" + + spans + multiple + lines + + """"#, + ), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(76, 6, 11), + Token::Scalar(ScalarToken::String(Block( + r#"""" + + spans + multiple + lines + + """"#, + ))), + ), + ); +} + +#[test] +fn block_string_errors() { + assert_eq!( + tokenize_error(r#""""""#), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#"""""""#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#""""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedBlockString, + ), + ); +} + #[test] fn numbers() { fn assert_float_token_eq( diff --git a/juniper/src/tests/fixtures/starwars/schema.rs b/juniper/src/tests/fixtures/starwars/schema.rs index 10d1f87a6..43ff46d9e 100644 --- a/juniper/src/tests/fixtures/starwars/schema.rs +++ b/juniper/src/tests/fixtures/starwars/schema.rs @@ -1,4 +1,5 @@ -#![expect(missing_docs, reason = "GraphQL schema testing")] +#![cfg_attr(test, expect(dead_code, reason = "GraphQL schema testing"))] +#![cfg_attr(not(test), expect(missing_docs, reason = "GraphQL schema testing"))] use std::{collections::HashMap, pin::Pin}; From 771a7afebf8dd61fcc002ccd8622c45aae047a09 Mon Sep 17 00:00:00 2001 From: tyranron Date: Sat, 13 Sep 2025 02:29:02 +0300 Subject: [PATCH 04/13] Parse block strings --- juniper/CHANGELOG.md | 3 ++ juniper/src/parser/parser.rs | 61 +++++++++++++++++++++- juniper/src/types/scalars.rs | 99 +++++++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 03504ca2d..ecb6f360d 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -16,6 +16,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) - Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) +- Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added @@ -29,6 +30,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - `includeDeprecated` argument to `__Type.inputFields`, `__Field.args` and `__Directive.args` fields. - `__InputValue.isDeprecated` and `__InputValue.deprecationReason` fields. - `schema::meta::Argument::deprecation_status` field. +- Support for [block strings][0180-1]. ([#1349]) ### Changed @@ -50,6 +52,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi [graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825 [graphql/graphql-spec#1040]: https://github.com/graphql/graphql-spec/pull/1040 [graphql/graphql-spec#1142]: https://github.com/graphql/graphql-spec/pull/1142 +[0180-1]: https://spec.graphql.org/September2025/#sec-String-Value.Block-Strings diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index 3388987d1..ca7aa8eb9 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -271,7 +271,66 @@ impl<'a> StringLiteral<'a> { } Ok(unescaped.into()) } - Self::Block(_) => todo!(), + Self::Block(lit) => { + if !lit.starts_with(r#"""""#) { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with(r#"""""#) { + return Err(ParseError::LexerError(LexerError::UnterminatedBlockString)); + } + + let unquoted = &lit[3..lit.len() - 3]; + + let (mut indent, mut total_lines) = (usize::MAX, 0); + let (mut first_text_line, mut last_text_line) = (None, 0); + for (n, line) in unquoted.lines().enumerate() { + total_lines += 1; + + let trimmed = line.trim_start(); + if trimmed.is_empty() { + continue; + } + + _ = first_text_line.get_or_insert(n); + last_text_line = n; + + if n != 0 { + indent = indent.min(line.len() - trimmed.len()); + } + } + + let Some(first_text_line) = first_text_line else { + return Ok("".into()); // no text, only whitespaces + }; + if (indent == 0 || total_lines == 1) && !unquoted.contains(r#"\""""#) { + return Ok(unquoted.into()); // nothing to dedent or unescape + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut lines = unquoted + .lines() + .enumerate() + .skip(first_text_line) + .take(last_text_line - first_text_line + 1) + .map(|(n, line)| { + if n != 0 && line.len() >= indent { + &line[indent..] + } else { + line + } + }) + .map(|x| x.replace(r#"\""""#, r#"""""#)); + if let Some(line) = lines.next() { + unescaped.push_str(&line); + for line in lines { + unescaped.push('\n'); + unescaped.push_str(&line); + } + } + Ok(unescaped.into()) + } } } } diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index 612ff8036..1934d41ca 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -86,7 +86,6 @@ mod impl_string_scalar { // TODO: Allow cheaper from `Cow<'_, str>` conversions for `ScalarValue`. Ok(parsed.into_owned().into()) } else { - // TODO: Support block string too. Err(ParseError::unexpected_token(Token::Scalar(value))) } } @@ -501,6 +500,104 @@ mod tests { } } + #[test] + fn parse_block_strings() { + for (input, expected) in [ + (r#""""""""#, ""), + (r#""""simple""""#, "simple"), + (r#"""" white space """"#, " white space "), + (r#""""contains " quote""""#, r#"contains " quote"#), + ( + r#""""contains \""" triple quote""""#, + r#"contains """ triple quote"#, + ), + ( + r#""""contains \"" double quote""""#, + r#"contains \"" double quote"#, + ), + ( + r#""""contains \\""" triple quote""""#, + r#"contains \""" triple quote"#, + ), + (r#""""\"""quote" """"#, r#""""quote" "#), + (r#""""multi\nline""""#, r"multi\nline"), + ( + r#""""multi\rline\r\nnormalized""""#, + r"multi\rline\r\nnormalized", + ), + ( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#, + r"unescaped \\n\\r\\b\\t\\f\\u1234", + ), + ( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + r"unescaped unicode outside BMP \u{1f600}", + ), + (r#""""slashes \\\\ \\/""""#, r"slashes \\\\ \\/"), + ( + r#"""" + + spans + multiple + lines + + """"#, + "spans\n multiple\n lines", + ), + // removes uniform indentation + ( + r#"""" + Hello, + World! + + Yours, + GraphQL.""""#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // removes empty leading and trailing lines + ( + r#"""" + + Hello, + World! + + Yours, + GraphQL. + + """"#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // retains indentation from first line + ( + r#"""" Hello, + World! + + Yours, + GraphQL.""""#, + " Hello,\n World!\n\nYours,\n GraphQL.", + ), + // does not alter trailing spaces + ( + r#"""" + Hello, + World! + + Yours, + GraphQL. """"#, + "Hello,\n World!\n\nYours,\n GraphQL. ", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Block(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); + + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); + assert_eq!(s.unwrap(), expected); + } + } + #[test] fn parse_f64_from_int() { for (v, expected) in [ From 1ed88606aaf75ae0a1736dabadc878838135c0ca Mon Sep 17 00:00:00 2001 From: tyranron Date: Tue, 16 Sep 2025 14:06:53 +0300 Subject: [PATCH 05/13] Support full Unicode in lexer --- juniper/CHANGELOG.md | 6 +- juniper/src/parser/lexer.rs | 1094 +++++++++++++++++++++++++++-- juniper/src/parser/tests/lexer.rs | 872 ----------------------- juniper/src/parser/tests/mod.rs | 1 - 4 files changed, 1044 insertions(+), 929 deletions(-) delete mode 100644 juniper/src/parser/tests/lexer.rs diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index ecb6f360d..bf3a67070 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -23,13 +23,15 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - [September 2025] GraphQL spec: ([#1347]) - `__Type.isOneOf` field. ([#1348], [graphql/graphql-spec#825]) - `SCHEMA`, `OBJECT`, `ARGUMENT_DEFINITION`, `INTERFACE`, `UNION`, `ENUM`, `INPUT_OBJECT` and `INPUT_FIELD_DEFINITION` values to `__DirectiveLocation` enum. ([#1348]) - - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) + - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) - Placing `#[graphql(deprecated)]` and `#[deprecated]` attributes on struct fields in `#[derive(GraphQLInputObject)]` macro. - Placing `#[graphql(deprecated)]` attribute on method arguments in `#[graphql_object]` and `#[graphql_interface]` macros. - Placing `@deprecated` directive on arguments and input object fields. - `includeDeprecated` argument to `__Type.inputFields`, `__Field.args` and `__Directive.args` fields. - `__InputValue.isDeprecated` and `__InputValue.deprecationReason` fields. - `schema::meta::Argument::deprecation_status` field. + - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Support full Unicode range. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - Support for [block strings][0180-1]. ([#1349]) ### Changed @@ -48,8 +50,10 @@ All user visible changes to `juniper` crate will be documented in this file. Thi [#1348]: /../../pull/1348 [#1349]: /../../pull/1349 [graphql/graphql-spec#525]: https://github.com/graphql/graphql-spec/pull/525 +[graphql/graphql-spec#687]: https://github.com/graphql/graphql-spec/issues/687 [graphql/graphql-spec#805]: https://github.com/graphql/graphql-spec/pull/805 [graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825 +[graphql/graphql-spec#849]: https://github.com/graphql/graphql-spec/pull/849 [graphql/graphql-spec#1040]: https://github.com/graphql/graphql-spec/pull/1040 [graphql/graphql-spec#1142]: https://github.com/graphql/graphql-spec/pull/1142 [0180-1]: https://spec.graphql.org/September2025/#sec-String-Value.Block-Strings diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 61fc07b35..f3ef1e206 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -117,13 +117,6 @@ pub enum LexerError { #[display("Unterminated block string literal")] UnterminatedBlockString, - /// An unknown character in a string literal was found - /// - /// This occurs when an invalid source character is found in a string - /// literal, such as ASCII control characters. - #[display("Unknown character \"{_0}\" in string literal")] - UnknownCharacterInString(#[error(not(source))] char), - /// An unknown escape sequence in a string literal was found /// /// Only a limited set of escape sequences are supported, this is emitted @@ -192,25 +185,51 @@ impl<'a> Lexer<'a> { Spanning::single_width(&start_pos, t) } + /// Advances this [`Lexer`] over any [ignored] character until a non-[ignored] is met. + /// + /// [ignored]: https://spec.graphql.org/September2025#Ignored fn scan_over_whitespace(&mut self) { while let Some((_, ch)) = self.peek_char() { - if ch == '\t' || ch == ' ' || ch == '\n' || ch == '\r' || ch == ',' { - self.next_char(); - } else if ch == '#' { - self.next_char(); - - while let Some((_, ch)) = self.peek_char() { - if is_source_char(ch) && (ch == '\n' || ch == '\r') { - self.next_char(); - break; - } else if is_source_char(ch) { - self.next_char(); - } else { - break; + // Ignored :: + // UnicodeBOM + // WhiteSpace + // LineTerminator + // Comment + // Comma + match ch { + // UnicodeBOM :: + // Byte Order Mark (U+FEFF) + // Whitespace :: + // Horizontal Tab (U+0009) + // Space (U+0020) + // LineTerminator :: + // New Line (U+000A) + // Carriage Return (U+000D) [lookahead != New Line (U+000A)] + // Carriage Return (U+000D) New Line (U+000A) + // Comma :: + // , + '\u{FEFF}' | '\t' | ' ' | '\n' | '\r' | ',' => _ = self.next_char(), + // Comment :: + // #CommentChar[list][opt] [lookahead != CommentChar] + // CommentChar :: + // SourceCharacter but not LineTerminator + '#' => { + _ = self.next_char(); + while let Some((_, ch)) = self.peek_char() { + _ = self.next_char(); + match ch { + '\r' if matches!(self.peek_char(), Some((_, '\n'))) => { + _ = self.next_char(); + break; + } + '\n' | '\r' => break, + // Continue scanning `Comment`. + _ => {} + } } } - } else { - break; + // Any other character is not `Ignored`. + _ => break, } } } @@ -262,7 +281,16 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [string] by this [`Lexer`], but not a [block string]. + /// + /// [string]: https://spec.graphql.org/September2025#StringValue + /// [block string]: https://spec.graphql.org/September2025#BlockString fn scan_string(&mut self) -> LexerResult<'a> { + // StringValue :: + // "" [lookahead != "] + // "StringCharacter[list]" + // BlockString + let start_pos = self.position; let (start_idx, start_ch) = self .next_char() @@ -277,10 +305,19 @@ impl<'a> Lexer<'a> { let mut escaped = false; let mut old_pos = self.position; while let Some((idx, ch)) = self.next_char() { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter match ch { - 'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => { + // EscapedCharacter :: one of + // " \ / b f n r t + '"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' if escaped => { escaped = false; } + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit 'u' if escaped => { self.scan_escaped_unicode(&old_pos)?; escaped = false; @@ -307,12 +344,8 @@ impl<'a> Lexer<'a> { LexerError::UnterminatedString, )); } - c if !is_source_char(c) => { - return Err(Spanning::zero_width( - &old_pos, - LexerError::UnknownCharacterInString(ch), - )); - } + // Any other valid Unicode scalar value is a `SourceCharacter`: + // https://spec.graphql.org/September2025#SourceCharacter _ => {} } old_pos = self.position; @@ -324,7 +357,13 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [block string] by this [`Lexer`]. + /// + /// [block string]: https://spec.graphql.org/September2025#BlockString fn scan_block_string(&mut self) -> LexerResult<'a> { + // BlockString :: + // """BlockStringCharacter[list][opt]""" + let start_pos = self.position; let (start_idx, mut start_ch) = self .next_char() @@ -346,9 +385,11 @@ impl<'a> Lexer<'a> { )); } } - let (mut quotes, mut escaped) = (0, false); while let Some((idx, ch)) = self.next_char() { + // BlockStringCharacter :: + // SourceCharacter but not """ or \""" + // \""" match ch { '\\' => (quotes, escaped) = (0, true), '"' if escaped => (quotes, escaped) = (0, false), @@ -372,27 +413,49 @@ impl<'a> Lexer<'a> { )) } + /// Scans an [escaped unicode] character by this [`Lexer`]. + /// + /// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, ) -> Result<(), Spanning> { - let (start_idx, _) = self + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let (start_idx, mut curr_ch) = self .peek_char() .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; let mut end_idx = start_idx; let mut len = 0; - for _ in 0..4 { - let (idx, ch) = self.next_char().ok_or_else(|| { - Spanning::zero_width(&self.position, LexerError::UnterminatedString) - })?; - - if !ch.is_alphanumeric() { - break; + let is_variable_width = curr_ch == '{'; + if is_variable_width { + _ = self.next_char(); + loop { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + end_idx = idx; + len += 1; + if !curr_ch.is_alphanumeric() { + break; + } + } + } else { + for _ in 0..4 { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + if !curr_ch.is_alphanumeric() { + break; + } + end_idx = idx; + len += 1; } - - end_idx = idx; - len += 1; } // Make sure we are on a valid char boundary. @@ -401,17 +464,30 @@ impl<'a> Lexer<'a> { .get(start_idx..=end_idx) .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; - if len != 4 { - return Err(Spanning::zero_width( - start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), - )); + let code_point = if is_variable_width { + if curr_ch != '}' { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!( + r"\u{}", + &escape[..escape.len() - 1], + )), + )); + } + u32::from_str_radix(&escape[1..escape.len() - 1], 16) + } else { + if len != 4 { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), + )); + } + u32::from_str_radix(escape, 16) } - - let code_point = u32::from_str_radix(escape, 16).map_err(|_| { + .map_err(|_| { Spanning::zero_width( start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), ) })?; @@ -419,10 +495,10 @@ impl<'a> Lexer<'a> { .ok_or_else(|| { Spanning::zero_width( start_pos, - LexerError::UnknownEscapeSequence("\\u".to_owned() + escape), + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), ) }) - .map(|_| ()) + .map(drop) } fn scan_number(&mut self) -> LexerResult<'a> { @@ -589,10 +665,6 @@ impl<'a> Iterator for Lexer<'a> { } } -fn is_source_char(c: char) -> bool { - c == '\t' || c == '\n' || c == '\r' || c >= ' ' -} - fn is_name_start(c: char) -> bool { c == '_' || c.is_ascii_alphabetic() } @@ -604,3 +676,915 @@ fn is_name_cont(c: char) -> bool { fn is_number_start(c: char) -> bool { c == '-' || c.is_ascii_digit() } + +#[cfg(test)] +mod test { + use crate::parser::{ + Lexer, LexerError, ScalarToken, SourcePosition, Spanning, + StringLiteral::{Block, Quoted}, + Token, + }; + + #[track_caller] + fn tokenize_to_vec(s: &str) -> Vec>> { + let mut tokens = Vec::new(); + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + let at_eof = t.item == Token::EndOfFile; + tokens.push(t); + if at_eof { + break; + } + } + Some(Err(e)) => panic!("error in input stream: {e} for {s:#?}"), + None => panic!("EOF before `Token::EndOfFile` in {s:#?}"), + } + } + + tokens + } + + #[track_caller] + fn tokenize_single(s: &str) -> Spanning> { + let mut tokens = tokenize_to_vec(s); + + assert_eq!(tokens.len(), 2); + assert_eq!(tokens[1].item, Token::EndOfFile); + + tokens.remove(0) + } + + #[track_caller] + fn tokenize_error(s: &str) -> Spanning { + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + if t.item == Token::EndOfFile { + panic!("lexer did not return error for {s:#?}"); + } + } + Some(Err(e)) => { + return e; + } + None => panic!("lexer did not return error for {s:#?}"), + } + } + } + + #[test] + fn empty_source() { + assert_eq!( + tokenize_to_vec(""), + vec![Spanning::zero_width( + &SourcePosition::new_origin(), + Token::EndOfFile, + )] + ); + } + + #[test] + fn disallow_control_codes() { + assert_eq!( + Lexer::new("\u{0007}").next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new_origin(), + LexerError::UnknownCharacter('\u{0007}'), + ))) + ); + } + + #[test] + fn skip_whitespace() { + assert_eq!( + tokenize_to_vec( + r#" + + foo + + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(14, 2, 12), + &SourcePosition::new(17, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_comments() { + assert_eq!( + tokenize_to_vec( + r#" + #comment + foo#comment + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(34, 2, 12), + &SourcePosition::new(37, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_commas() { + assert_eq!( + tokenize_to_vec(r#",,,foo,,,"#), + vec![ + Spanning::start_end( + &SourcePosition::new(3, 0, 3), + &SourcePosition::new(6, 0, 6), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), + ] + ); + } + + #[test] + fn error_positions() { + assert_eq!( + Lexer::new( + r#" + + ? + + "#, + ) + .next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new(14, 2, 12), + LexerError::UnknownCharacter('?'), + ))), + ); + } + + #[test] + fn strings() { + assert_eq!( + tokenize_single(r#""simple""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(8, 0, 8), + Token::Scalar(ScalarToken::String(Quoted(r#""simple""#))), + ), + ); + + assert_eq!( + tokenize_single(r#"" white space ""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#"" white space ""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""quote \"""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(10, 0, 10), + Token::Scalar(ScalarToken::String(Quoted(r#""quote \"""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""escaped \n\r\b\t\f""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(20, 0, 20), + Token::Scalar(ScalarToken::String(Quoted(r#""escaped \n\r\b\t\f""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""slashes \\ \/""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#""slashes \\ \/""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(34, 0, 34), + Token::Scalar(ScalarToken::String(Quoted( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(58, 0, 58), + Token::Scalar(ScalarToken::String(Quoted( + r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single("\"contains unescaped \u{0007} control char\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(35, 0, 35), + Token::Scalar(ScalarToken::String(Quoted( + "\"contains unescaped \u{0007} control char\"", + ))), + ), + ); + + assert_eq!( + tokenize_single("\"null-byte is not \u{0000} end of file\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Quoted( + "\"null-byte is not \u{0000} end of file\"", + ))), + ), + ); + } + + #[test] + fn string_errors() { + assert_eq!( + tokenize_error(r#"""#), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(13, 0, 13), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\nline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\rline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""bad \z esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\z".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \x esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\x".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u0XX1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u0XX1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uFXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uFXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{110000} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{110000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{DEAD} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{DEAD}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{DEA esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{DEA".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated in string \""#), + Spanning::zero_width( + &SourcePosition::new(26, 0, 26), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated \"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedString, + ), + ); + + // Found by fuzzing. + assert_eq!( + tokenize_error(r#""\uɠ^A"#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedString, + ), + ); + } + + #[test] + fn block_strings() { + assert_eq!( + tokenize_single(r#""""""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(6, 0, 6), + Token::Scalar(ScalarToken::String(Block(r#""""""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""simple""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(12, 0, 12), + Token::Scalar(ScalarToken::String(Block(r#""""simple""""#))), + ), + ); + assert_eq!( + tokenize_single(r#"""" white space """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(19, 0, 19), + Token::Scalar(ScalarToken::String(Block(r#"""" white space """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains " quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""contains " quote""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \"" double quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \"" double quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \\""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(33, 0, 33), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \\""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""\"""quote" """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""\"""quote" """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\nline""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""multi\nline""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\rline\r\nnormalized""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""multi\rline\r\nnormalized""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(38, 0, 38), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped unicode outside BMP \u{1f600}""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(45, 0, 45), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""slashes \\\\ \\/""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""slashes \\\\ \\/""""#))), + ), + ); + assert_eq!( + tokenize_single( + r#"""" + + spans + multiple + lines + + """"#, + ), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(76, 6, 11), + Token::Scalar(ScalarToken::String(Block( + r#"""" + + spans + multiple + lines + + """"#, + ))), + ), + ); + } + + #[test] + fn block_string_errors() { + assert_eq!( + tokenize_error(r#""""""#), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#"""""""#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#""""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedBlockString, + ), + ); + } + + #[test] + fn numbers() { + fn assert_float_token_eq( + source: &str, + start: SourcePosition, + end: SourcePosition, + expected: &str, + ) { + let parsed = tokenize_single(source); + assert_eq!(parsed.span.start, start); + assert_eq!(parsed.span.end, end); + + match parsed.item { + Token::Scalar(ScalarToken::Float(actual)) => assert_eq!(actual, expected), + _ => assert!(false), + } + } + + assert_eq!( + tokenize_single("4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("4")) + ) + ); + + assert_float_token_eq( + "4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "4.123", + ); + + assert_float_token_eq( + "4.0", + SourcePosition::new(0, 0, 0), + SourcePosition::new(3, 0, 3), + "4.0", + ); + + assert_eq!( + tokenize_single("-4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(2, 0, 2), + Token::Scalar(ScalarToken::Int("-4")), + ) + ); + + assert_eq!( + tokenize_single("9"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("9")), + ) + ); + + assert_eq!( + tokenize_single("0"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("0")), + ) + ); + + assert_float_token_eq( + "-4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "-4.123", + ); + + assert_float_token_eq( + "0.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "0.123", + ); + + assert_float_token_eq( + "123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123e4", + ); + + assert_float_token_eq( + "123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123E4", + ); + + assert_float_token_eq( + "123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e-4", + ); + + assert_float_token_eq( + "123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e+4", + ); + + assert_float_token_eq( + "-1.123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123e4", + ); + + assert_float_token_eq( + "-1.123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123E4", + ); + + assert_float_token_eq( + "-1.123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e-4", + ); + + assert_float_token_eq( + "-1.123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e+4", + ); + + assert_float_token_eq( + "-1.123e45", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e45", + ); + } + + #[test] + fn numbers_errors() { + assert_eq!( + tokenize_error("00"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('0'), + ) + ); + + assert_eq!( + tokenize_error("+1"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('+'), + ) + ); + + assert_eq!( + tokenize_error("1."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error(".123"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnexpectedCharacter('.'), + ) + ); + + assert_eq!( + tokenize_error("1.A"), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("-A"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("1.0e"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("1.0eA"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedCharacter('A'), + ) + ); + } + + #[test] + fn punctuation() { + assert_eq!( + tokenize_single("!"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark), + ); + + assert_eq!( + tokenize_single("$"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar), + ); + + assert_eq!( + tokenize_single("("), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen), + ); + + assert_eq!( + tokenize_single(")"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose), + ); + + assert_eq!( + tokenize_single("..."), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(3, 0, 3), + Token::Ellipsis, + ) + ); + + assert_eq!( + tokenize_single(":"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon), + ); + + assert_eq!( + tokenize_single("="), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals), + ); + + assert_eq!( + tokenize_single("@"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At), + ); + + assert_eq!( + tokenize_single("["), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen), + ); + + assert_eq!( + tokenize_single("]"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose), + ); + + assert_eq!( + tokenize_single("{"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen), + ); + + assert_eq!( + tokenize_single("}"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose), + ); + + assert_eq!( + tokenize_single("|"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe), + ); + } + + #[test] + fn punctuation_error() { + assert_eq!( + tokenize_error(".."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("?"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('?'), + ) + ); + + assert_eq!( + tokenize_error("\u{203b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{203b}'), + ) + ); + + assert_eq!( + tokenize_error("\u{200b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{200b}'), + ) + ); + } + + #[test] + fn display() { + for (input, expected) in [ + (Token::Name("identifier"), "identifier"), + (Token::Scalar(ScalarToken::Int("123")), "123"), + (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), + ( + Token::Scalar(ScalarToken::String(Quoted(r#""some string""#))), + r#""some string""#, + ), + ( + Token::Scalar(ScalarToken::String(Quoted( + r#""string with \\ escape and \" quote""#, + ))), + r#""string with \\ escape and \" quote""#, + ), + // TODO: Tests for `Block` string. + (Token::ExclamationMark, "!"), + (Token::Dollar, "$"), + (Token::ParenOpen, "("), + (Token::ParenClose, ")"), + (Token::BracketOpen, "["), + (Token::BracketClose, "]"), + (Token::CurlyOpen, "{"), + (Token::CurlyClose, "}"), + (Token::Ellipsis, "..."), + (Token::Colon, ":"), + (Token::Equals, "="), + (Token::At, "@"), + (Token::Pipe, "|"), + ] { + assert_eq!(input.to_string(), expected); + } + } +} diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs deleted file mode 100644 index 9bd5a4d01..000000000 --- a/juniper/src/parser/tests/lexer.rs +++ /dev/null @@ -1,872 +0,0 @@ -use crate::parser::{ - Lexer, LexerError, ScalarToken, SourcePosition, Spanning, - StringLiteral::{Block, Quoted}, - Token, -}; - -#[track_caller] -fn tokenize_to_vec(s: &str) -> Vec>> { - let mut tokens = Vec::new(); - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - let at_eof = t.item == Token::EndOfFile; - tokens.push(t); - if at_eof { - break; - } - } - Some(Err(e)) => panic!("error in input stream: {e} for {s:#?}"), - None => panic!("EOF before `Token::EndOfFile` in {s:#?}"), - } - } - - tokens -} - -#[track_caller] -fn tokenize_single(s: &str) -> Spanning> { - let mut tokens = tokenize_to_vec(s); - - assert_eq!(tokens.len(), 2); - assert_eq!(tokens[1].item, Token::EndOfFile); - - tokens.remove(0) -} - -#[track_caller] -fn tokenize_error(s: &str) -> Spanning { - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - if t.item == Token::EndOfFile { - panic!("lexer did not return error for {s:#?}"); - } - } - Some(Err(e)) => { - return e; - } - None => panic!("lexer did not return error for {s:#?}"), - } - } -} - -#[test] -fn empty_source() { - assert_eq!( - tokenize_to_vec(""), - vec![Spanning::zero_width( - &SourcePosition::new_origin(), - Token::EndOfFile, - )] - ); -} - -#[test] -fn disallow_control_codes() { - assert_eq!( - Lexer::new("\u{0007}").next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new_origin(), - LexerError::UnknownCharacter('\u{0007}') - ))) - ); -} - -#[test] -fn skip_whitespace() { - assert_eq!( - tokenize_to_vec( - r#" - - foo - - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(14, 2, 12), - &SourcePosition::new(17, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_comments() { - assert_eq!( - tokenize_to_vec( - r#" - #comment - foo#comment - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(34, 2, 12), - &SourcePosition::new(37, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_commas() { - assert_eq!( - tokenize_to_vec(r#",,,foo,,,"#), - vec![ - Spanning::start_end( - &SourcePosition::new(3, 0, 3), - &SourcePosition::new(6, 0, 6), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), - ] - ); -} - -#[test] -fn error_positions() { - assert_eq!( - Lexer::new( - r#" - - ? - - "#, - ) - .next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new(14, 2, 12), - LexerError::UnknownCharacter('?'), - ))), - ); -} - -#[test] -fn strings() { - assert_eq!( - tokenize_single(r#""simple""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String(Quoted(r#""simple""#))), - ), - ); - - assert_eq!( - tokenize_single(r#"" white space ""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(Quoted(r#"" white space ""#))), - ), - ); - - assert_eq!( - tokenize_single(r#""quote \"""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(10, 0, 10), - Token::Scalar(ScalarToken::String(Quoted(r#""quote \"""#))), - ), - ); - - assert_eq!( - tokenize_single(r#""escaped \n\r\b\t\f""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(20, 0, 20), - Token::Scalar(ScalarToken::String(Quoted(r#""escaped \n\r\b\t\f""#))), - ), - ); - - assert_eq!( - tokenize_single(r#""slashes \\ \/""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(Quoted(r#""slashes \\ \/""#))), - ), - ); - - assert_eq!( - tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(34, 0, 34), - Token::Scalar(ScalarToken::String(Quoted( - r#""unicode \u1234\u5678\u90AB\uCDEF""# - ))), - ), - ); -} - -#[test] -fn string_errors() { - assert_eq!( - tokenize_error(r#"""#), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnterminatedString, - ), - ); - - assert_eq!( - tokenize_error(r#""no end quote"#), - Spanning::zero_width( - &SourcePosition::new(13, 0, 13), - LexerError::UnterminatedString, - ), - ); - - assert_eq!( - tokenize_error("\"contains unescaped \u{0007} control char\""), - Spanning::zero_width( - &SourcePosition::new(20, 0, 20), - LexerError::UnknownCharacterInString('\u{0007}'), - ), - ); - - assert_eq!( - tokenize_error("\"null-byte is not \u{0000} end of file\""), - Spanning::zero_width( - &SourcePosition::new(18, 0, 18), - LexerError::UnknownCharacterInString('\u{0000}'), - ), - ); - - assert_eq!( - tokenize_error("\"multi\nline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ), - ); - - assert_eq!( - tokenize_error("\"multi\rline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ), - ); - - assert_eq!( - tokenize_error(r#""bad \z esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\z".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \x esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\x".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \u1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u1".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \u0XX1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u0XX1".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXX".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \uFXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uFXXX".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXF esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXF".into()), - ), - ); - - assert_eq!( - tokenize_error(r#""unterminated in string \""#), - Spanning::zero_width( - &SourcePosition::new(26, 0, 26), - LexerError::UnterminatedString, - ), - ); - - assert_eq!( - tokenize_error(r#""unterminated \"#), - Spanning::zero_width( - &SourcePosition::new(15, 0, 15), - LexerError::UnterminatedString, - ), - ); - - // Found by fuzzing. - assert_eq!( - tokenize_error(r#""\uɠ^A"#), - Spanning::zero_width( - &SourcePosition::new(5, 0, 5), - LexerError::UnterminatedString, - ), - ); -} - -#[test] -fn block_strings() { - assert_eq!( - tokenize_single(r#""""""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(6, 0, 6), - Token::Scalar(ScalarToken::String(Block(r#""""""""#))), - ), - ); - assert_eq!( - tokenize_single(r#""""simple""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(12, 0, 12), - Token::Scalar(ScalarToken::String(Block(r#""""simple""""#))), - ), - ); - assert_eq!( - tokenize_single(r#"""" white space """"#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(19, 0, 19), - Token::Scalar(ScalarToken::String(Block(r#"""" white space """"#))), - ), - ); - assert_eq!( - tokenize_single(r#""""contains " quote""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(22, 0, 22), - Token::Scalar(ScalarToken::String(Block(r#""""contains " quote""""#))), - ), - ); - assert_eq!( - tokenize_single(r#""""contains \""" triple quote""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(32, 0, 32), - Token::Scalar(ScalarToken::String(Block( - r#""""contains \""" triple quote""""# - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""contains \"" double quote""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(31, 0, 31), - Token::Scalar(ScalarToken::String(Block( - r#""""contains \"" double quote""""# - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""contains \\""" triple quote""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(33, 0, 33), - Token::Scalar(ScalarToken::String(Block( - r#""""contains \\""" triple quote""""# - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""\"""quote" """"#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(17, 0, 17), - Token::Scalar(ScalarToken::String(Block(r#""""\"""quote" """"#))), - ), - ); - assert_eq!( - tokenize_single(r#""""multi\nline""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(17, 0, 17), - Token::Scalar(ScalarToken::String(Block(r#""""multi\nline""""#))), - ), - ); - assert_eq!( - tokenize_single(r#""""multi\rline\r\nnormalized""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(31, 0, 31), - Token::Scalar(ScalarToken::String(Block( - r#""""multi\rline\r\nnormalized""""# - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(38, 0, 38), - Token::Scalar(ScalarToken::String(Block( - r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""# - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""unescaped unicode outside BMP \u{1f600}""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(45, 0, 45), - Token::Scalar(ScalarToken::String(Block( - r#""""unescaped unicode outside BMP \u{1f600}""""#, - ))), - ), - ); - assert_eq!( - tokenize_single(r#""""slashes \\\\ \\/""""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(22, 0, 22), - Token::Scalar(ScalarToken::String(Block(r#""""slashes \\\\ \\/""""#))), - ), - ); - assert_eq!( - tokenize_single( - r#"""" - - spans - multiple - lines - - """"#, - ), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(76, 6, 11), - Token::Scalar(ScalarToken::String(Block( - r#"""" - - spans - multiple - lines - - """"#, - ))), - ), - ); -} - -#[test] -fn block_string_errors() { - assert_eq!( - tokenize_error(r#""""""#), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnterminatedBlockString, - ), - ); - assert_eq!( - tokenize_error(r#"""""""#), - Spanning::zero_width( - &SourcePosition::new(5, 0, 5), - LexerError::UnterminatedBlockString, - ), - ); - assert_eq!( - tokenize_error(r#""""no end quote"#), - Spanning::zero_width( - &SourcePosition::new(15, 0, 15), - LexerError::UnterminatedBlockString, - ), - ); -} - -#[test] -fn numbers() { - fn assert_float_token_eq( - source: &str, - start: SourcePosition, - end: SourcePosition, - expected: &str, - ) { - let parsed = tokenize_single(source); - assert_eq!(parsed.span.start, start); - assert_eq!(parsed.span.end, end); - - match parsed.item { - Token::Scalar(ScalarToken::Float(actual)) => { - assert!( - expected == actual, - "[expected] {expected} != {actual} [actual]", - ); - } - _ => assert!(false), - } - } - - assert_eq!( - tokenize_single("4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("4")) - ) - ); - - assert_float_token_eq( - "4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "4.123", - ); - - assert_float_token_eq( - "4.0", - SourcePosition::new(0, 0, 0), - SourcePosition::new(3, 0, 3), - "4.0", - ); - - assert_eq!( - tokenize_single("-4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(2, 0, 2), - Token::Scalar(ScalarToken::Int("-4")) - ) - ); - - assert_eq!( - tokenize_single("9"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("9")) - ) - ); - - assert_eq!( - tokenize_single("0"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("0")) - ) - ); - - assert_float_token_eq( - "-4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "-4.123", - ); - - assert_float_token_eq( - "0.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "0.123", - ); - - assert_float_token_eq( - "123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123e4", - ); - - assert_float_token_eq( - "123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123E4", - ); - - assert_float_token_eq( - "123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e-4", - ); - - assert_float_token_eq( - "123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e+4", - ); - - assert_float_token_eq( - "-1.123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123e4", - ); - - assert_float_token_eq( - "-1.123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123E4", - ); - - assert_float_token_eq( - "-1.123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e-4", - ); - - assert_float_token_eq( - "-1.123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e+4", - ); - - assert_float_token_eq( - "-1.123e45", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e45", - ); -} - -#[test] -fn numbers_errors() { - assert_eq!( - tokenize_error("00"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('0') - ) - ); - - assert_eq!( - tokenize_error("+1"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('+') - ) - ); - - assert_eq!( - tokenize_error("1."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error(".123"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnexpectedCharacter('.') - ) - ); - - assert_eq!( - tokenize_error("1.A"), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("-A"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("1.0e"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("1.0eA"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedCharacter('A') - ) - ); -} - -#[test] -fn punctuation() { - assert_eq!( - tokenize_single("!"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark) - ); - - assert_eq!( - tokenize_single("$"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar) - ); - - assert_eq!( - tokenize_single("("), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen) - ); - - assert_eq!( - tokenize_single(")"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose) - ); - - assert_eq!( - tokenize_single("..."), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(3, 0, 3), - Token::Ellipsis - ) - ); - - assert_eq!( - tokenize_single(":"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon) - ); - - assert_eq!( - tokenize_single("="), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals) - ); - - assert_eq!( - tokenize_single("@"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At) - ); - - assert_eq!( - tokenize_single("["), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen) - ); - - assert_eq!( - tokenize_single("]"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose) - ); - - assert_eq!( - tokenize_single("{"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen) - ); - - assert_eq!( - tokenize_single("}"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose) - ); - - assert_eq!( - tokenize_single("|"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe) - ); -} - -#[test] -fn punctuation_error() { - assert_eq!( - tokenize_error(".."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("?"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('?') - ) - ); - - assert_eq!( - tokenize_error("\u{203b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{203b}') - ) - ); - - assert_eq!( - tokenize_error("\u{200b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{200b}') - ) - ); -} - -#[test] -fn display() { - for (input, expected) in [ - (Token::Name("identifier"), "identifier"), - (Token::Scalar(ScalarToken::Int("123")), "123"), - (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), - ( - Token::Scalar(ScalarToken::String(Quoted(r#""some string""#))), - r#""some string""#, - ), - ( - Token::Scalar(ScalarToken::String(Quoted( - r#""string with \\ escape and \" quote""#, - ))), - r#""string with \\ escape and \" quote""#, - ), - // TODO: Tests for `Block` string. - (Token::ExclamationMark, "!"), - (Token::Dollar, "$"), - (Token::ParenOpen, "("), - (Token::ParenClose, ")"), - (Token::BracketOpen, "["), - (Token::BracketClose, "]"), - (Token::CurlyOpen, "{"), - (Token::CurlyClose, "}"), - (Token::Ellipsis, "..."), - (Token::Colon, ":"), - (Token::Equals, "="), - (Token::At, "@"), - (Token::Pipe, "|"), - ] { - assert_eq!(input.to_string(), expected); - } -} diff --git a/juniper/src/parser/tests/mod.rs b/juniper/src/parser/tests/mod.rs index 18df2c92d..ab77d55c7 100644 --- a/juniper/src/parser/tests/mod.rs +++ b/juniper/src/parser/tests/mod.rs @@ -1,3 +1,2 @@ mod document; -mod lexer; mod value; From b97a6467e06301a16cb6019772e4390be60c0969 Mon Sep 17 00:00:00 2001 From: tyranron Date: Tue, 16 Sep 2025 14:09:11 +0300 Subject: [PATCH 06/13] Fill up display tests for block strings --- juniper/src/parser/lexer.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index f3ef1e206..e4b57f108 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1569,7 +1569,28 @@ mod test { ))), r#""string with \\ escape and \" quote""#, ), - // TODO: Tests for `Block` string. + ( + Token::Scalar(ScalarToken::String(Block( + r#""""string with \\ escape and \" quote""""#, + ))), + r#""""string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block string with \\ escape and \" quote""""#, + ))), + r#""""block string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block + multiline + string"""#, + ))), + r#""""block + multiline + string"""#, + ), (Token::ExclamationMark, "!"), (Token::Dollar, "$"), (Token::ParenOpen, "("), From 0ec1a99d67addd8ffc96085d733d0cb9a2acd9bc Mon Sep 17 00:00:00 2001 From: tyranron Date: Tue, 16 Sep 2025 18:21:47 +0300 Subject: [PATCH 07/13] Support parsing variable-length Unicode code points, vol.1 --- juniper/src/parser/parser.rs | 149 ++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 71 deletions(-) diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index ca7aa8eb9..c88bc6733 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, fmt}; +use std::{borrow::Cow, fmt, iter}; use compact_str::{CompactString, format_compact}; use derive_more::with_trait::{Display, Error}; @@ -227,31 +227,24 @@ impl<'a> StringLiteral<'a> { let mut char_iter = unquoted.chars(); while let Some(ch) = char_iter.next() { match ch { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter '\\' => match char_iter.next() { - Some('"') => { - unescaped.push('"'); - } - Some('/') => { - unescaped.push('/'); - } - Some('n') => { - unescaped.push('\n'); - } - Some('r') => { - unescaped.push('\r'); - } - Some('t') => { - unescaped.push('\t'); - } - Some('\\') => { - unescaped.push('\\'); - } - Some('f') => { - unescaped.push('\u{000c}'); - } - Some('b') => { - unescaped.push('\u{0008}'); - } + // EscapedCharacter :: one of + // " \ / b f n r t + Some('"') => unescaped.push('"'), + Some('\\') => unescaped.push('\\'), + Some('/') => unescaped.push('/'), + Some('b') => unescaped.push('\u{0008}'), + Some('f') => unescaped.push('\u{000C}'), + Some('n') => unescaped.push('\n'), + Some('r') => unescaped.push('\r'), + Some('t') => unescaped.push('\t'), + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit Some('u') => { unescaped.push(parse_unicode_codepoint(&mut char_iter)?); } @@ -335,51 +328,65 @@ impl<'a> StringLiteral<'a> { } } -fn parse_unicode_codepoint(char_iter: &mut I) -> Result -where - I: Iterator, -{ - let escaped_code_point = char_iter - .next() - .ok_or_else(|| ParseError::LexerError(LexerError::UnknownEscapeSequence(r"\u".into()))) - .and_then(|c1| { - char_iter - .next() - .map(|c2| format!("{c1}{c2}")) - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{c1}"))) - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) +/// Parses an [escaped unicode] character. +/// +/// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode +// TODO: Add tests +// TODO: Check surrogate pairs? +fn parse_unicode_codepoint(char_iter: &mut impl Iterator) -> Result { + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let Some(mut curr_ch) = char_iter.next() else { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + r"\u".into(), + ))); + }; + let mut escaped_code_point = String::with_capacity(6); // `\u{10FFFF}` is max code point + + let is_variable_width = curr_ch == '{'; + if is_variable_width { + loop { + curr_ch = char_iter.next().ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{{{escaped_code_point}" + ))) + })?; + if curr_ch == '}' { + break; + } else if !curr_ch.is_alphanumeric() { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + format!(r"\u{{{escaped_code_point}"), + ))); + } + escaped_code_point.push(curr_ch); + } + } else { + let mut char_iter = iter::once(curr_ch).chain(char_iter); + for _ in 0..4 { + curr_ch = char_iter.next().ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}" + ))) + })?; + if !curr_ch.is_alphanumeric() { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + format!(r"\u{escaped_code_point}"), + ))); + } + escaped_code_point.push(curr_ch); + } + } + + u32::from_str_radix(&escaped_code_point, 16) + .ok() + .and_then(char::from_u32) + .ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(if is_variable_width { + format!(r"\u{{{escaped_code_point}}}") + } else { + format!(r"\u{escaped_code_point}") + })) }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!(r"\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - })?; - let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - r"\u{escaped_code_point}", - ))) - })?; - char::from_u32(code_point).ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - r"\u{escaped_code_point}", - ))) - }) } From 4299e546703319a108216a9e6dc665dcbf39b481 Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 24 Sep 2025 19:02:11 +0300 Subject: [PATCH 08/13] Support surrogates in parsing --- juniper/src/parser/document.rs | 2 +- juniper/src/parser/lexer.rs | 68 ++++++- juniper/src/parser/mod.rs | 1 + juniper/src/parser/parser.rs | 334 ++++++++++++++++++++++++++------- juniper/src/types/scalars.rs | 28 +++ 5 files changed, 368 insertions(+), 65 deletions(-) diff --git a/juniper/src/parser/document.rs b/juniper/src/parser/document.rs index fdb4922f9..ff2f144fb 100644 --- a/juniper/src/parser/document.rs +++ b/juniper/src/parser/document.rs @@ -25,7 +25,7 @@ where S: ScalarValue, { let mut lexer = Lexer::new(s); - let mut parser = Parser::new(&mut lexer).map_err(|s| s.map(ParseError::LexerError))?; + let mut parser = Parser::new(&mut lexer).map_err(|s| s.map(Into::into))?; parse_document(&mut parser, schema) } diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index e4b57f108..4c877029b 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,4 +1,4 @@ -use std::{char, ops::Deref, str::CharIndices}; +use std::{char, fmt, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; @@ -491,6 +491,8 @@ impl<'a> Lexer<'a> { ) })?; + // TODO: Support surrogate. + char::from_u32(code_point) .ok_or_else(|| { Spanning::zero_width( @@ -677,6 +679,70 @@ fn is_number_start(c: char) -> bool { c == '-' || c.is_ascii_digit() } +/// Representation of a [Unicode code point]. +/// +/// This is different from a [Unicode scalar value] (aka "character") represented by a [`char`], +/// because can denote a [surrogate code point]. +/// +/// [surrogate code point]: https://unicode.org/glossary#surrogate_code_point +/// [Unicode code point]: https://unicode.org/glossary#code_point +/// [Unicode scalar value]: https://unicode.org/glossary#unicode_scalar_value +#[derive(Clone, Copy, Debug)] +pub(crate) struct UnicodeCodePoint { + /// Code representing this [`UnicodeCodePoint`]. + pub(crate) code: u32, + + /// Indicator whether this [`UnicodeCodePoint`] should be [`Display`]ed in variable-width form. + pub(crate) is_variable_width: bool, +} + +impl Display for UnicodeCodePoint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_variable_width { + write!(f, r"\u{{{:X}}}", self.code) + } else { + write!(f, r"\u{:04X}", self.code) + } + } +} + +impl UnicodeCodePoint { + /// Indicates whether this [`UnicodeCodePoint`] is a high (leading) [surrogate]. + /// + /// [surrogate]: https://unicode.org/glossary#surrogate_code_point + pub(crate) fn is_high_surrogate(self) -> bool { + (0xD800..=0xDBFF).contains(&self.code) + } + + /// Indicates whether this [`UnicodeCodePoint`] is a low (trailing) [surrogate]. + /// + /// [surrogate]: https://unicode.org/glossary#surrogate_code_point + pub(crate) fn is_low_surrogate(self) -> bool { + (0xDC00..=0xDFFF).contains(&self.code) + } + + /// Joins a [`UnicodeCodePoint`] from the provided [surrogate pair][0]. + /// + /// [0]: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs + pub(crate) fn from_surrogate_pair(high: Self, low: Self) -> Self { + Self { + code: 0x10000 + ((high.code & 0x03FF) << 10) + (low.code & 0x03FF), + is_variable_width: true, + } + } + + /// Tries to convert this [`UnicodeCodePoint`] into a [`char`]. + /// + /// # Errors + /// + /// If this [`UnicodeCodePoint`] doesn't represent a [Unicode scalar value]. + /// + /// [Unicode scalar value]: https://unicode.org/glossary#unicode_scalar_value + pub(crate) fn try_into_char(self) -> Result { + char::from_u32(self.code).ok_or_else(|| LexerError::UnknownEscapeSequence(self.to_string())) + } +} + #[cfg(test)] mod test { use crate::parser::{ diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index f7d98b114..eb0189b23 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -17,3 +17,4 @@ pub use self::{ parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; +pub(crate) use self::lexer::UnicodeCodePoint; \ No newline at end of file diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index c88bc6733..d2a2dadc9 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -1,12 +1,14 @@ use std::{borrow::Cow, fmt, iter}; use compact_str::{CompactString, format_compact}; -use derive_more::with_trait::{Display, Error}; +use derive_more::with_trait::{Display, Error, From}; -use crate::parser::{Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token}; +use crate::parser::{ + Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token, UnicodeCodePoint, +}; /// Error while parsing a GraphQL query -#[derive(Clone, Debug, Display, Eq, Error, PartialEq)] +#[derive(Clone, Debug, Display, Eq, Error, From, PartialEq)] pub enum ParseError { /// An unexpected token occurred in the source // TODO: Previously was `Token<'a>`. @@ -19,6 +21,7 @@ pub enum ParseError { UnexpectedEndOfFile, /// An error during tokenization occurred + #[from] LexerError(LexerError), /// A scalar of unexpected type occurred in the source @@ -215,7 +218,7 @@ impl<'a> StringLiteral<'a> { ))); } if !lit.ends_with('"') { - return Err(ParseError::LexerError(LexerError::UnterminatedString)); + return Err(LexerError::UnterminatedString.into()); } let unquoted = &lit[1..lit.len() - 1]; @@ -246,15 +249,40 @@ impl<'a> StringLiteral<'a> { // {HexDigit[list]} // HexDigit HexDigit HexDigit HexDigit Some('u') => { - unescaped.push(parse_unicode_codepoint(&mut char_iter)?); + let mut code_point = + UnicodeCodePoint::parse_escaped(&mut char_iter)?; + if code_point.is_high_surrogate() { + let (Some('\\'), Some('u')) = + (char_iter.next(), char_iter.next()) + else { + return Err(LexerError::UnknownEscapeSequence( + code_point.to_string(), + ) + .into()); + }; + + let trailing_code_point = + UnicodeCodePoint::parse_escaped(&mut char_iter)?; + if !trailing_code_point.is_low_surrogate() { + return Err(LexerError::UnknownEscapeSequence( + code_point.to_string(), + ) + .into()); + } + code_point = UnicodeCodePoint::from_surrogate_pair( + code_point, + trailing_code_point, + ); + } + unescaped.push(code_point.try_into_char()?); } Some(s) => { - return Err(ParseError::LexerError( - LexerError::UnknownEscapeSequence(format!(r"\{s}")), - )); + return Err( + LexerError::UnknownEscapeSequence(format!(r"\{s}")).into() + ); } None => { - return Err(ParseError::LexerError(LexerError::UnterminatedString)); + return Err(LexerError::UnterminatedString.into()); } }, ch => { @@ -271,7 +299,7 @@ impl<'a> StringLiteral<'a> { ))); } if !lit.ends_with(r#"""""#) { - return Err(ParseError::LexerError(LexerError::UnterminatedBlockString)); + return Err(LexerError::UnterminatedBlockString.into()); } let unquoted = &lit[3..lit.len() - 3]; @@ -328,65 +356,245 @@ impl<'a> StringLiteral<'a> { } } -/// Parses an [escaped unicode] character. -/// -/// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode -// TODO: Add tests -// TODO: Check surrogate pairs? -fn parse_unicode_codepoint(char_iter: &mut impl Iterator) -> Result { - // EscapedUnicode :: - // {HexDigit[list]} - // HexDigit HexDigit HexDigit HexDigit - - let Some(mut curr_ch) = char_iter.next() else { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - r"\u".into(), - ))); - }; - let mut escaped_code_point = String::with_capacity(6); // `\u{10FFFF}` is max code point - - let is_variable_width = curr_ch == '{'; - if is_variable_width { - loop { - curr_ch = char_iter.next().ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - r"\u{{{escaped_code_point}" - ))) - })?; - if curr_ch == '}' { - break; - } else if !curr_ch.is_alphanumeric() { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - format!(r"\u{{{escaped_code_point}"), - ))); +impl UnicodeCodePoint { + /// Parses a [`UnicodeCodePoint`] from an [escaped] value in the provided [`Iterator`]. + /// + /// [escaped]: https://spec.graphql.org/September2025#EscapedUnicode + pub(crate) fn parse_escaped( + char_iter: &mut impl Iterator, + ) -> Result { + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let Some(mut curr_ch) = char_iter.next() else { + return Err(LexerError::UnknownEscapeSequence(r"\u".into()).into()); + }; + let mut escaped_code_point = String::with_capacity(6); // `\u{10FFFF}` is max code point + + let is_variable_width = curr_ch == '{'; + if is_variable_width { + loop { + curr_ch = char_iter.next().ok_or_else(|| { + LexerError::UnknownEscapeSequence(format!(r"\u{{{escaped_code_point}")) + })?; + if curr_ch == '}' { + break; + } else if !curr_ch.is_alphanumeric() { + return Err(LexerError::UnknownEscapeSequence(format!( + r"\u{{{escaped_code_point}" + )) + .into()); + } + escaped_code_point.push(curr_ch); } - escaped_code_point.push(curr_ch); - } - } else { - let mut char_iter = iter::once(curr_ch).chain(char_iter); - for _ in 0..4 { - curr_ch = char_iter.next().ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - r"\u{escaped_code_point}" - ))) - })?; - if !curr_ch.is_alphanumeric() { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - format!(r"\u{escaped_code_point}"), - ))); + } else { + let mut char_iter = iter::once(curr_ch).chain(char_iter); + for _ in 0..4 { + curr_ch = char_iter.next().ok_or_else(|| { + LexerError::UnknownEscapeSequence(format!(r"\u{escaped_code_point}")) + })?; + if !curr_ch.is_alphanumeric() { + return Err(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}" + )) + .into()); + } + escaped_code_point.push(curr_ch); } - escaped_code_point.push(curr_ch); } - } - u32::from_str_radix(&escaped_code_point, 16) - .ok() - .and_then(char::from_u32) - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(if is_variable_width { + let Ok(code) = u32::from_str_radix(&escaped_code_point, 16) else { + return Err(LexerError::UnknownEscapeSequence(if is_variable_width { format!(r"\u{{{escaped_code_point}}}") } else { format!(r"\u{escaped_code_point}") - })) + }) + .into()); + }; + + Ok(Self { + code, + is_variable_width, }) + } +} + +#[cfg(test)] +mod string_literal_tests { + use super::StringLiteral; + + #[test] + fn quoted() { + for (input, expected) in [ + (r#""""#, ""), + (r#""simple""#, "simple"), + (r#"" white space ""#, " white space "), + (r#""quote \"""#, r#"quote ""#), + (r#""escaped \n\r\b\t\f""#, "escaped \n\r\u{0008}\t\u{000c}"), + (r#""slashes \\ \/""#, r"slashes \ /"), + ( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", + ), + ( + r#""string with unicode escape outside BMP \u{1F600}""#, + "string with unicode escape outside BMP \u{1F600}", + ), + ( + r#""string with minimal unicode escape \u{0}""#, + "string with minimal unicode escape \u{0}", + ), + ( + r#""string with maximal unicode escape \u{10FFFF}""#, + "string with maximal unicode escape \u{10FFFF}", + ), + ( + r#""string with maximal minimal unicode escape \u{000000}""#, + "string with maximal minimal unicode escape \u{000000}", + ), + ( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + "string with unicode surrogate pair escape \u{1f600}", + ), + ( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + "string with minimal surrogate pair escape \u{10000}", + ), + ( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + "string with maximal surrogate pair escape \u{10FFFF}", + ), + ] { + let res = StringLiteral::Quoted(input).parse(); + assert!( + res.is_ok(), + "parsing error occurred on {input}: {}", + res.unwrap_err(), + ); + + assert_eq!(res.unwrap(), expected); + } + } + + #[test] + fn quoted_errors() { + for (input, expected) in [ + ( + r#""bad surrogate \uDEAD""#, + r#"Unknown escape sequence "\uDEAD" in string"#, + ), + ( + r#""bad low surrogate pair \uD800\uD800""#, + r#"Unknown escape sequence "\uD800" in string"#, + ), + ] { + let res = StringLiteral::Quoted(input).parse(); + assert!(res.is_err(), "parsing error doesn't occur on {input}"); + + let err = res.unwrap_err(); + assert!( + err.to_string().contains(expected), + "returned error `{err}` doesn't contain `{expected}`", + ); + } + } + + #[test] + fn block() { + for (input, expected) in [ + (r#""""""""#, ""), + (r#""""simple""""#, "simple"), + (r#"""" white space """"#, " white space "), + (r#""""contains " quote""""#, r#"contains " quote"#), + ( + r#""""contains \""" triple quote""""#, + r#"contains """ triple quote"#, + ), + ( + r#""""contains \"" double quote""""#, + r#"contains \"" double quote"#, + ), + ( + r#""""contains \\""" triple quote""""#, + r#"contains \""" triple quote"#, + ), + (r#""""\"""quote" """"#, r#""""quote" "#), + (r#""""multi\nline""""#, r"multi\nline"), + ( + r#""""multi\rline\r\nnormalized""""#, + r"multi\rline\r\nnormalized", + ), + ( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#, + r"unescaped \\n\\r\\b\\t\\f\\u1234", + ), + ( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + r"unescaped unicode outside BMP \u{1f600}", + ), + (r#""""slashes \\\\ \\/""""#, r"slashes \\\\ \\/"), + ( + r#"""" + + spans + multiple + lines + + """"#, + "spans\n multiple\n lines", + ), + // removes uniform indentation + ( + r#"""" + Hello, + World! + + Yours, + GraphQL.""""#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // removes empty leading and trailing lines + ( + r#"""" + + Hello, + World! + + Yours, + GraphQL. + + """"#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // retains indentation from first line + ( + r#"""" Hello, + World! + + Yours, + GraphQL.""""#, + " Hello,\n World!\n\nYours,\n GraphQL.", + ), + // does not alter trailing spaces + ( + r#"""" + Hello, + World! + + Yours, + GraphQL. """"#, + "Hello,\n World!\n\nYours,\n GraphQL. ", + ), + ] { + let res = StringLiteral::Block(input).parse(); + assert!( + res.is_ok(), + "parsing error occurred on {input}: {}", + res.unwrap_err(), + ); + + assert_eq!(res.unwrap(), expected); + } + } } diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index 1934d41ca..7fa70b425 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -488,6 +488,34 @@ mod tests { r#""unicode \u1234\u5678\u90AB\uCDEF""#, "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", ), + ( + r#""string with unicode escape outside BMP \u{1F600}""#, + "string with unicode escape outside BMP \u{1F600}", + ), + ( + r#""string with minimal unicode escape \u{0}""#, + "string with minimal unicode escape \u{0}", + ), + ( + r#""string with maximal unicode escape \u{10FFFF}""#, + "string with maximal unicode escape \u{10FFFF}", + ), + ( + r#""string with maximal minimal unicode escape \u{000000}""#, + "string with maximal minimal unicode escape \u{000000}", + ), + ( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + "string with unicode surrogate pair escape \u{1f600}", + ), + ( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + "string with minimal surrogate pair escape \u{10000}", + ), + ( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + "string with maximal surrogate pair escape \u{10FFFF}", + ), ] { let res = >::from_str( ScalarToken::String(StringLiteral::Quoted(input)), From 095f40753e19aa28127bd0089c21b8d39c2c37f6 Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 24 Sep 2025 23:12:56 +0300 Subject: [PATCH 09/13] Support surrogates in lexer --- juniper/src/parser/lexer.rs | 215 +++++++++++++++++++++++++++++++++--- juniper/src/parser/mod.rs | 2 +- 2 files changed, 200 insertions(+), 17 deletions(-) diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 4c877029b..c7b77088f 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -319,7 +319,30 @@ impl<'a> Lexer<'a> { // {HexDigit[list]} // HexDigit HexDigit HexDigit HexDigit 'u' if escaped => { - self.scan_escaped_unicode(&old_pos)?; + let mut code_point = self.scan_escaped_unicode(&old_pos)?; + if code_point.is_high_surrogate() { + let new_pos = self.position; + let (Some((_, '\\')), Some((_, 'u'))) = + (self.next_char(), self.next_char()) + else { + return Err(Spanning::zero_width( + &old_pos, + LexerError::UnknownEscapeSequence(code_point.to_string()), + )); + }; + let trailing_code_point = self.scan_escaped_unicode(&new_pos)?; + if !trailing_code_point.is_low_surrogate() { + return Err(Spanning::zero_width( + &old_pos, + LexerError::UnknownEscapeSequence(code_point.to_string()), + )); + } + code_point = + UnicodeCodePoint::from_surrogate_pair(code_point, trailing_code_point); + } + _ = code_point + .try_into_char() + .map_err(|e| Spanning::zero_width(&old_pos, e))?; escaped = false; } c if escaped => { @@ -419,7 +442,7 @@ impl<'a> Lexer<'a> { fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, - ) -> Result<(), Spanning> { + ) -> Result> { // EscapedUnicode :: // {HexDigit[list]} // HexDigit HexDigit HexDigit HexDigit @@ -474,6 +497,13 @@ impl<'a> Lexer<'a> { )), )); } + // `\u{10FFFF}` is max code point + if escape.len() - 2 > 6 { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!(r"\u{}", &escape[..escape.len()])), + )); + } u32::from_str_radix(&escape[1..escape.len() - 1], 16) } else { if len != 4 { @@ -491,16 +521,10 @@ impl<'a> Lexer<'a> { ) })?; - // TODO: Support surrogate. - - char::from_u32(code_point) - .ok_or_else(|| { - Spanning::zero_width( - start_pos, - LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), - ) - }) - .map(drop) + Ok(UnicodeCodePoint { + code: code_point, + is_variable_width, + }) } fn scan_number(&mut self) -> LexerResult<'a> { @@ -725,6 +749,8 @@ impl UnicodeCodePoint { /// /// [0]: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs pub(crate) fn from_surrogate_pair(high: Self, low: Self) -> Self { + debug_assert!(high.is_high_surrogate(), "`{high}` is not a high surrogate"); + debug_assert!(low.is_low_surrogate(), "`{high}` is not a low surrogate"); Self { code: 0x10000 + ((high.code & 0x03FF) << 10) + (low.code & 0x03FF), is_variable_width: true, @@ -967,6 +993,83 @@ mod test { ), ); + assert_eq!( + tokenize_single(r#""string with unicode escape outside BMP \u{1F600}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(50, 0, 50), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with unicode escape outside BMP \u{1F600}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with minimal unicode escape \u{0}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(42, 0, 42), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with minimal unicode escape \u{0}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal unicode escape \u{10FFFF}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(47, 0, 47), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal unicode escape \u{10FFFF}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal minimal unicode escape \u{000000}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(55, 0, 55), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal minimal unicode escape \u{000000}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with unicode surrogate pair escape \uD83D\uDE00""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with minimal surrogate pair escape \uD800\uDC00""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + ))), + ), + ); + assert_eq!( tokenize_single("\"contains unescaped \u{0007} control char\""), Spanning::start_end( @@ -1089,18 +1192,98 @@ mod test { ); assert_eq!( - tokenize_error(r#""bad \u{DEAD} esc""#), + tokenize_error(r#""bad \u{FXXX} esc""#), Spanning::zero_width( &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence(r"\u{DEAD}".into()), + LexerError::UnknownEscapeSequence(r"\u{FXXX}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFFF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFFF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFFF""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFFF".into()), ), ); assert_eq!( - tokenize_error(r#""bad \u{DEA esc""#), + tokenize_error(r#""bad \u{} esc""#), Spanning::zero_width( &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence(r"\u{DEA".into()), + LexerError::UnknownEscapeSequence(r"\u{}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""too high \u{110000} esc""#), + Spanning::zero_width( + &SourcePosition::new(11, 0, 11), + LexerError::UnknownEscapeSequence(r"\u{110000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""way too high \u{12345678} esc""#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnknownEscapeSequence(r"\u{12345678}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""too long \u{000000000} esc""#), + Spanning::zero_width( + &SourcePosition::new(11, 0, 11), + LexerError::UnknownEscapeSequence(r"\u{000000000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad surrogate \uDEAD esc""#), + Spanning::zero_width( + &SourcePosition::new(16, 0, 16), + LexerError::UnknownEscapeSequence(r"\uDEAD".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad surrogate \u{DEAD} esc""#), + Spanning::zero_width( + &SourcePosition::new(16, 0, 16), + LexerError::UnknownEscapeSequence(r"\u{DEAD}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad high surrogate pair \uDEAD\uDEAD esc""#), + Spanning::zero_width( + &SourcePosition::new(26, 0, 26), + LexerError::UnknownEscapeSequence(r"\uDEAD".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad low surrogate pair \uD800\uD800 esc""#), + Spanning::zero_width( + &SourcePosition::new(25, 0, 25), + LexerError::UnknownEscapeSequence(r"\uD800".into()), ), ); diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index eb0189b23..038864d15 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -12,9 +12,9 @@ mod tests; pub use self::document::parse_document_source; +pub(crate) use self::lexer::UnicodeCodePoint; pub use self::{ lexer::{Lexer, LexerError, ScalarToken, StringLiteral, Token}, parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; -pub(crate) use self::lexer::UnicodeCodePoint; \ No newline at end of file From e154d368419b4a39614f2cce488b5129ed11882d Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 24 Sep 2025 23:41:54 +0300 Subject: [PATCH 10/13] Fixes --- juniper/CHANGELOG.md | 2 +- juniper/src/lib.rs | 9 +++++++++ juniper/src/tests/fixtures/starwars/schema.rs | 10 ++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index bf3a67070..301976b21 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -31,7 +31,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - `__InputValue.isDeprecated` and `__InputValue.deprecationReason` fields. - `schema::meta::Argument::deprecation_status` field. - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - - Support full Unicode range. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Full Unicode range support. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - Support for [block strings][0180-1]. ([#1349]) ### Changed diff --git a/juniper/src/lib.rs b/juniper/src/lib.rs index f0f563313..25e71314a 100644 --- a/juniper/src/lib.rs +++ b/juniper/src/lib.rs @@ -13,6 +13,15 @@ extern crate self as juniper; mod for_benches_only { use bencher as _; } +#[cfg(test)] +mod for_feature_gated_tests_only { + #[cfg(not(feature = "chrono"))] + use chrono as _; + #[cfg(not(feature = "jiff"))] + use jiff as _; + #[cfg(not(feature = "anyhow"))] + use serial_test as _; +} // These are required by the code generated via the `juniper_codegen` macros. #[doc(hidden)] diff --git a/juniper/src/tests/fixtures/starwars/schema.rs b/juniper/src/tests/fixtures/starwars/schema.rs index 43ff46d9e..f16940863 100644 --- a/juniper/src/tests/fixtures/starwars/schema.rs +++ b/juniper/src/tests/fixtures/starwars/schema.rs @@ -1,5 +1,11 @@ -#![cfg_attr(test, expect(dead_code, reason = "GraphQL schema testing"))] -#![cfg_attr(not(test), expect(missing_docs, reason = "GraphQL schema testing"))] +#![cfg_attr( + not(feature = "expose-test-schema"), + expect(dead_code, reason = "GraphQL schema testing") +)] +#![cfg_attr( + feature = "expose-test-schema", + expect(missing_docs, reason = "GraphQL schema testing") +)] use std::{collections::HashMap, pin::Pin}; From ae6ec5d100df7574dcda5fff26f9c5a191476add Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 1 Oct 2025 16:07:58 +0300 Subject: [PATCH 11/13] Support descriptions on operations and fragments --- juniper/CHANGELOG.md | 3 + juniper/src/ast.rs | 13 +- juniper/src/parser/document.rs | 56 ++++-- juniper/src/parser/tests/document.rs | 260 ++++++++++++++++++++++++++- 4 files changed, 314 insertions(+), 18 deletions(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 301976b21..08ee73937 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -16,6 +16,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) - Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) + - Added `description` field to `ast::Operation` and `ast::Fragment` field. ([#1349], [graphql/graphql-spec#1170]) - Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added @@ -32,6 +33,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - `schema::meta::Argument::deprecation_status` field. - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - Full Unicode range support. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Support parsing descriptions on fragments. ([#1349], [graphql/graphql-spec#1170]) - Support for [block strings][0180-1]. ([#1349]) ### Changed @@ -56,6 +58,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi [graphql/graphql-spec#849]: https://github.com/graphql/graphql-spec/pull/849 [graphql/graphql-spec#1040]: https://github.com/graphql/graphql-spec/pull/1040 [graphql/graphql-spec#1142]: https://github.com/graphql/graphql-spec/pull/1142 +[graphql/graphql-spec#1170]: https://github.com/graphql/graphql-spec/pull/1170 [0180-1]: https://spec.graphql.org/September2025/#sec-String-Value.Block-Strings diff --git a/juniper/src/ast.rs b/juniper/src/ast.rs index 4f22bcc65..4de2b9904 100644 --- a/juniper/src/ast.rs +++ b/juniper/src/ast.rs @@ -384,7 +384,7 @@ pub enum OperationType { #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Debug, PartialEq)] pub struct Operation<'a, S> { - //pub description: Option>, + pub description: Option>>, pub operation_type: OperationType, pub name: Option>, pub variable_definitions: Option>>, @@ -395,6 +395,7 @@ pub struct Operation<'a, S> { #[derive(Clone, Debug, PartialEq)] pub struct Fragment<'a, S> { pub name: Spanning<&'a str>, + pub description: Option>>, pub type_condition: Spanning<&'a str>, pub directives: Option>>>, pub selection_set: Vec>, @@ -407,6 +408,16 @@ pub enum Definition<'a, S> { Fragment(Spanning>), } +impl<'a, S> Definition<'a, S> { + /// Sets or resets the provided `description` for this [`Definition`]. + pub(crate) fn set_description(&mut self, description: Option>>) { + match self { + Self::Operation(op) => op.item.description = description, + Self::Fragment(frag) => frag.item.description = description, + } + } +} + #[doc(hidden)] pub type Document<'a, S> = [Definition<'a, S>]; #[doc(hidden)] diff --git a/juniper/src/parser/document.rs b/juniper/src/parser/document.rs index ff2f144fb..870417998 100644 --- a/juniper/src/parser/document.rs +++ b/juniper/src/parser/document.rs @@ -1,12 +1,13 @@ -use crate::ast::{ - Arguments, Definition, Directive, Field, Fragment, FragmentSpread, InlineFragment, InputValue, - Operation, OperationType, OwnedDocument, Selection, Type, VariableDefinition, - VariableDefinitions, -}; +use std::borrow::Cow; use crate::{ + ast::{ + Arguments, Definition, Directive, Field, Fragment, FragmentSpread, InlineFragment, + InputValue, Operation, OperationType, OwnedDocument, Selection, Type, VariableDefinition, + VariableDefinitions, + }, parser::{ - Lexer, OptionParseResult, ParseError, ParseResult, Parser, Spanning, Token, + Lexer, OptionParseResult, ParseError, ParseResult, Parser, ScalarToken, Spanning, Token, UnlocatedParseResult, value::parse_value_literal, }, schema::{ @@ -54,18 +55,25 @@ fn parse_definition<'a, S>( where S: ScalarValue, { - match parser.peek().item { + let description = parse_description(parser)?; + + let mut def = match parser.peek().item { + // Descriptions are not permitted on query shorthand. + // See: https://spec.graphql.org/September2025#sel-GAFTRJABAByBz7P + Token::CurlyOpen if description.is_some() => { + return Err(parser.next_token()?.map(ParseError::unexpected_token)); + } Token::CurlyOpen | Token::Name("query") | Token::Name("mutation") - | Token::Name("subscription") => Ok(Definition::Operation(parse_operation_definition( - parser, schema, - )?)), - Token::Name("fragment") => Ok(Definition::Fragment(parse_fragment_definition( - parser, schema, - )?)), - _ => Err(parser.next_token()?.map(ParseError::unexpected_token)), - } + | Token::Name("subscription") => { + Definition::Operation(parse_operation_definition(parser, schema)?) + } + Token::Name("fragment") => Definition::Fragment(parse_fragment_definition(parser, schema)?), + _ => return Err(parser.next_token()?.map(ParseError::unexpected_token)), + }; + def.set_description(description); + Ok(def) } fn parse_operation_definition<'a, S>( @@ -85,6 +93,7 @@ where Operation { operation_type: OperationType::Query, name: None, + description: None, variable_definitions: None, directives: None, selection_set: selection_set.item, @@ -115,6 +124,7 @@ where Operation { operation_type: operation_type.item, name, + description: None, variable_definitions, directives: directives.map(|s| s.item), selection_set: selection_set.item, @@ -158,6 +168,7 @@ where &selection_set.span.end, Fragment { name, + description: None, type_condition: type_cond, directives: directives.map(|s| s.item), selection_set: selection_set.item, @@ -460,6 +471,21 @@ where )) } +fn parse_description<'a>(parser: &mut Parser<'a>) -> OptionParseResult> { + if !matches!(parser.peek().item, Token::Scalar(ScalarToken::String(_))) { + Ok(None) + } else { + let token = parser.next_token()?; + let Token::Scalar(ScalarToken::String(lit)) = token.item else { + unreachable!("already checked to be `ScalarToken::String`") + }; + Ok(Some(Spanning::new( + token.span, + lit.parse().map_err(|e| Spanning::new(token.span, e))?, + ))) + } +} + fn parse_directives<'a, S>( parser: &mut Parser<'a>, schema: &SchemaType, diff --git a/juniper/src/parser/tests/document.rs b/juniper/src/parser/tests/document.rs index 31c7ac433..2938c9935 100644 --- a/juniper/src/parser/tests/document.rs +++ b/juniper/src/parser/tests/document.rs @@ -1,5 +1,10 @@ +use std::borrow::Cow; + use crate::{ - ast::{Arguments, Definition, Field, Operation, OperationType, OwnedDocument, Selection}, + ast::{ + Arguments, Definition, Directive, Field, Fragment, FragmentSpread, Operation, + OperationType, OwnedDocument, Selection, Type, VariableDefinition, VariableDefinitions, + }, graphql_input_value, parser::{ParseError, SourcePosition, Spanning, Token, document::parse_document_source}, schema::model::SchemaType, @@ -16,7 +21,7 @@ where s, &SchemaType::new::(&(), &(), &()), ) - .unwrap_or_else(|_| panic!("Parse error on input {s:#?}")) + .unwrap_or_else(|e| panic!("parse error on input {s:#?}:\n{e}")) } fn parse_document_error(s: &str) -> Spanning { @@ -33,6 +38,7 @@ fn parse_document_error(s: &str) -> Spanning { fn simple_ast() { assert_eq!( parse_document::( + // language=GraphQL r#"{ node(id: 4) { id @@ -46,6 +52,7 @@ fn simple_ast() { Operation { operation_type: OperationType::Query, name: None, + description: None, variable_definitions: None, directives: None, selection_set: vec![Selection::Field(Spanning::start_end( @@ -116,6 +123,244 @@ fn simple_ast() { ) } +#[test] +fn description() { + assert_eq!( + parse_document::( + // language=GraphQL + r#" + "Some description with \u90AB symbol" + query SomeOperation( + #"ID you should provide" + $id: String + #"Switch for experiment ...." + $enableBaz: Boolean = false + ) { + foo(id: $id) { + bar + baz @include(if: $enableBaz) { + ...BazInfo + } + } + } + + """ + Some block description here + Multiline + """ + fragment BazInfo on Baz { + whatever + } + "#, + ), + vec![ + Definition::Operation(Spanning::start_end( + &SourcePosition::new(71, 2, 16), + &SourcePosition::new(479, 14, 17), + Operation { + operation_type: OperationType::Query, + name: Some(Spanning::start_end( + &SourcePosition::new(77, 2, 22), + &SourcePosition::new(90, 2, 35), + "SomeOperation", + )), + description: Some(Spanning::start_end( + &SourcePosition::new(17, 1, 16), + &SourcePosition::new(54, 1, 53), + Cow::Owned("Some description with \u{90AB} symbol".into()), + )), + variable_definitions: Some(Spanning::start_end( + &SourcePosition::new(90, 2, 35), + &SourcePosition::new(276, 7, 17), + VariableDefinitions { + items: vec![ + ( + Spanning::start_end( + &SourcePosition::new(153, 4, 18), + &SourcePosition::new(156, 4, 21), + "id", + ), + VariableDefinition { + var_type: Spanning::start_end( + &SourcePosition::new(158, 4, 23), + &SourcePosition::new(164, 4, 29), + Type::nullable("String"), + ), + default_value: None, + directives: None, + }, + ), + ( + Spanning::start_end( + &SourcePosition::new(231, 6, 18), + &SourcePosition::new(241, 6, 28), + "enableBaz", + ), + VariableDefinition { + var_type: Spanning::start_end( + &SourcePosition::new(243, 6, 30), + &SourcePosition::new(250, 6, 37), + Type::nullable("Boolean"), + ), + default_value: Some(Spanning::start_end( + &SourcePosition::new(253, 6, 40), + &SourcePosition::new(258, 6, 45), + graphql_input_value!(false), + )), + directives: None, + }, + ) + ], + } + )), + directives: None, + selection_set: vec![Selection::Field(Spanning::start_end( + &SourcePosition::new(297, 8, 18), + &SourcePosition::new(461, 13, 19), + Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(297, 8, 18), + &SourcePosition::new(300, 8, 21), + "foo", + ), + arguments: Some(Spanning::start_end( + &SourcePosition::new(300, 8, 21), + &SourcePosition::new(309, 8, 30), + Arguments { + items: vec![( + Spanning::start_end( + &SourcePosition::new(301, 8, 22), + &SourcePosition::new(303, 8, 24), + "id", + ), + Spanning::start_end( + &SourcePosition::new(305, 8, 26), + &SourcePosition::new(308, 8, 29), + graphql_input_value!(@id), + ), + )], + }, + )), + directives: None, + selection_set: Some(vec![ + Selection::Field(Spanning::start_end( + &SourcePosition::new(332, 9, 20), + &SourcePosition::new(335, 9, 23), + Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(332, 9, 20), + &SourcePosition::new(335, 9, 23), + "bar", + ), + arguments: None, + directives: None, + selection_set: None, + }, + )), + Selection::Field(Spanning::start_end( + &SourcePosition::new(356, 10, 20), + &SourcePosition::new(441, 12, 21), + Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(356, 10, 20), + &SourcePosition::new(359, 10, 23), + "baz", + ), + arguments: None, + directives: Some(vec![Spanning::start_end( + &SourcePosition::new(360, 10, 24), + &SourcePosition::new(384, 10, 48), + Directive { + name: Spanning::start_end( + &SourcePosition::new(361, 10, 25), + &SourcePosition::new(368, 10, 32), + "include", + ), + arguments: Some(Spanning::start_end( + &SourcePosition::new(368, 10, 32), + &SourcePosition::new(384, 10, 48), + Arguments { + items: vec![( + Spanning::start_end( + &SourcePosition::new(369, 10, 33), + &SourcePosition::new(371, 10, 35), + "if", + ), + Spanning::start_end( + &SourcePosition::new(373, 10, 37), + &SourcePosition::new(383, 10, 47), + graphql_input_value!(@enableBaz), + ), + )], + }, + )), + }, + )]), + selection_set: Some(vec![Selection::FragmentSpread( + Spanning::start_end( + &SourcePosition::new(409, 11, 22), + &SourcePosition::new(419, 11, 32), + FragmentSpread { + name: Spanning::start_end( + &SourcePosition::new(412, 11, 25), + &SourcePosition::new(419, 11, 32), + "BazInfo", + ), + directives: None, + }, + ) + )]), + }, + )), + ]), + }, + ))], + }, + )), + Definition::Fragment(Spanning::start_end( + &SourcePosition::new(607, 20, 16), + &SourcePosition::new(679, 22, 17), + Fragment { + name: Spanning::start_end( + &SourcePosition::new(616, 20, 25), + &SourcePosition::new(623, 20, 32), + "BazInfo", + ), + description: Some(Spanning::start_end( + &SourcePosition::new(497, 16, 16), + &SourcePosition::new(590, 19, 19), + Cow::Borrowed("Some block description here\nMultiline"), + )), + type_condition: Spanning::start_end( + &SourcePosition::new(627, 20, 36), + &SourcePosition::new(630, 20, 39), + "Baz", + ), + directives: None, + selection_set: vec![Selection::Field(Spanning::start_end( + &SourcePosition::new(653, 21, 20), + &SourcePosition::new(661, 21, 28), + Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(653, 21, 20), + &SourcePosition::new(661, 21, 28), + "whatever", + ), + arguments: None, + directives: None, + selection_set: None, + }, + ))] + } + )) + ] + ) +} + #[test] fn errors() { assert_eq!( @@ -143,6 +388,17 @@ fn errors() { ParseError::unexpected_token(Token::CurlyClose) ) ); + + // Descriptions are not permitted on query shorthand. + // See: https://spec.graphql.org/September2025#sel-GAFTRJABAByBz7P + assert_eq!( + parse_document_error::(r#""description" { foo }"#), + Spanning::start_end( + &SourcePosition::new(14, 0, 14), + &SourcePosition::new(15, 0, 15), + ParseError::unexpected_token(Token::CurlyOpen) + ), + ); } #[test] From eb180ec35fb28ffa6451a57c422435dcc5d70e60 Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 1 Oct 2025 17:52:30 +0300 Subject: [PATCH 12/13] Support descriptions on variable definitions --- juniper/CHANGELOG.md | 4 +- juniper/src/ast.rs | 1 + juniper/src/parser/document.rs | 3 + juniper/src/parser/tests/document.rs | 131 +++++++++++++++------------ 4 files changed, 79 insertions(+), 60 deletions(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 08ee73937..f3de64115 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -16,7 +16,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) - Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) - - Added `description` field to `ast::Operation` and `ast::Fragment` field. ([#1349], [graphql/graphql-spec#1170]) + - Added `description` field to `ast::Operation`, `ast::Fragment` and `ast::VariableDefinition`. ([#1349], [graphql/graphql-spec#1170]) - Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added @@ -33,7 +33,7 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - `schema::meta::Argument::deprecation_status` field. - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - Full Unicode range support. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) - - Support parsing descriptions on fragments. ([#1349], [graphql/graphql-spec#1170]) + - Support parsing descriptions on operations, fragments and variable definitions. ([#1349], [graphql/graphql-spec#1170]) - Support for [block strings][0180-1]. ([#1349]) ### Changed diff --git a/juniper/src/ast.rs b/juniper/src/ast.rs index 657626363..ef7bddef3 100644 --- a/juniper/src/ast.rs +++ b/juniper/src/ast.rs @@ -307,6 +307,7 @@ pub enum InputValue { #[derive(Clone, Debug, PartialEq)] pub struct VariableDefinition<'a, S> { + pub description: Option>>, pub var_type: Spanning>, pub default_value: Option>>, pub directives: Option>>>, diff --git a/juniper/src/parser/document.rs b/juniper/src/parser/document.rs index 870417998..26981e398 100644 --- a/juniper/src/parser/document.rs +++ b/juniper/src/parser/document.rs @@ -440,6 +440,8 @@ fn parse_variable_definition<'a, S>( where S: ScalarValue, { + let description = parse_description(parser)?; + let start_pos = parser.expect(&Token::Dollar)?.span.start; let var_name = parser.expect_name()?; parser.expect(&Token::Colon)?; @@ -463,6 +465,7 @@ where ( Spanning::start_end(&start_pos, &var_name.span.end, var_name.item), VariableDefinition { + description, var_type, default_value, directives: directives.map(|s| s.item), diff --git a/juniper/src/parser/tests/document.rs b/juniper/src/parser/tests/document.rs index 56853d994..345e1255a 100644 --- a/juniper/src/parser/tests/document.rs +++ b/juniper/src/parser/tests/document.rs @@ -127,9 +127,12 @@ fn description() { r#" "Some description with \u90AB symbol" query SomeOperation( - #"ID you should provide" + "ID you should provide and \u{90AB} symbol" $id: String - #"Switch for experiment ...." + """ + Switch for experiment .... + Multiline + """ $enableBaz: Boolean = false ) { foo(id: $id) { @@ -152,7 +155,7 @@ fn description() { vec![ ast::Definition::Operation(Spanning::start_end( &SourcePosition::new(71, 2, 16), - &SourcePosition::new(479, 14, 17), + &SourcePosition::new(567, 17, 17), ast::Operation { operation_type: ast::OperationType::Query, name: Some(Spanning::start_end( @@ -167,19 +170,26 @@ fn description() { )), variable_definitions: Some(Spanning::start_end( &SourcePosition::new(90, 2, 35), - &SourcePosition::new(276, 7, 17), + &SourcePosition::new(364, 10, 17), ast::VariableDefinitions { items: vec![ ( Spanning::start_end( - &SourcePosition::new(153, 4, 18), - &SourcePosition::new(156, 4, 21), + &SourcePosition::new(172, 4, 18), + &SourcePosition::new(175, 4, 21), "id", ), ast::VariableDefinition { + description: Some(Spanning::start_end( + &SourcePosition::new(110, 3, 18), + &SourcePosition::new(153, 3, 61), + Cow::Owned( + "ID you should provide and \u{90AB} symbol".into(), + ), + )), var_type: Spanning::start_end( - &SourcePosition::new(158, 4, 23), - &SourcePosition::new(164, 4, 29), + &SourcePosition::new(177, 4, 23), + &SourcePosition::new(183, 4, 29), ast::Type::nullable("String"), ), default_value: None, @@ -188,19 +198,24 @@ fn description() { ), ( Spanning::start_end( - &SourcePosition::new(231, 6, 18), - &SourcePosition::new(241, 6, 28), + &SourcePosition::new(319, 9, 18), + &SourcePosition::new(329, 9, 28), "enableBaz", ), ast::VariableDefinition { + description: Some(Spanning::start_end( + &SourcePosition::new(202, 5, 18), + &SourcePosition::new(300, 8, 21), + Cow::Borrowed("Switch for experiment ....\nMultiline"), + )), var_type: Spanning::start_end( - &SourcePosition::new(243, 6, 30), - &SourcePosition::new(250, 6, 37), + &SourcePosition::new(331, 9, 30), + &SourcePosition::new(338, 9, 37), ast::Type::nullable("Boolean"), ), default_value: Some(Spanning::start_end( - &SourcePosition::new(253, 6, 40), - &SourcePosition::new(258, 6, 45), + &SourcePosition::new(341, 9, 40), + &SourcePosition::new(346, 9, 45), graphql::input_value!(false), )), directives: None, @@ -211,28 +226,28 @@ fn description() { )), directives: None, selection_set: vec![ast::Selection::Field(Spanning::start_end( - &SourcePosition::new(297, 8, 18), - &SourcePosition::new(461, 13, 19), + &SourcePosition::new(385, 11, 18), + &SourcePosition::new(549, 16, 19), ast::Field { alias: None, name: Spanning::start_end( - &SourcePosition::new(297, 8, 18), - &SourcePosition::new(300, 8, 21), + &SourcePosition::new(385, 11, 18), + &SourcePosition::new(388, 11, 21), "foo", ), arguments: Some(Spanning::start_end( - &SourcePosition::new(300, 8, 21), - &SourcePosition::new(309, 8, 30), + &SourcePosition::new(388, 11, 21), + &SourcePosition::new(397, 11, 30), ast::Arguments { items: vec![( Spanning::start_end( - &SourcePosition::new(301, 8, 22), - &SourcePosition::new(303, 8, 24), + &SourcePosition::new(389, 11, 22), + &SourcePosition::new(391, 11, 24), "id", ), Spanning::start_end( - &SourcePosition::new(305, 8, 26), - &SourcePosition::new(308, 8, 29), + &SourcePosition::new(393, 11, 26), + &SourcePosition::new(396, 11, 29), graphql::input_value!(@id), ), )], @@ -241,13 +256,13 @@ fn description() { directives: None, selection_set: Some(vec![ ast::Selection::Field(Spanning::start_end( - &SourcePosition::new(332, 9, 20), - &SourcePosition::new(335, 9, 23), + &SourcePosition::new(420, 12, 20), + &SourcePosition::new(423, 12, 23), ast::Field { alias: None, name: Spanning::start_end( - &SourcePosition::new(332, 9, 20), - &SourcePosition::new(335, 9, 23), + &SourcePosition::new(420, 12, 20), + &SourcePosition::new(423, 12, 23), "bar", ), arguments: None, @@ -256,38 +271,38 @@ fn description() { }, )), ast::Selection::Field(Spanning::start_end( - &SourcePosition::new(356, 10, 20), - &SourcePosition::new(441, 12, 21), + &SourcePosition::new(444, 13, 20), + &SourcePosition::new(529, 15, 21), ast::Field { alias: None, name: Spanning::start_end( - &SourcePosition::new(356, 10, 20), - &SourcePosition::new(359, 10, 23), + &SourcePosition::new(444, 13, 20), + &SourcePosition::new(447, 13, 23), "baz", ), arguments: None, directives: Some(vec![Spanning::start_end( - &SourcePosition::new(360, 10, 24), - &SourcePosition::new(384, 10, 48), + &SourcePosition::new(448, 13, 24), + &SourcePosition::new(472, 13, 48), ast::Directive { name: Spanning::start_end( - &SourcePosition::new(361, 10, 25), - &SourcePosition::new(368, 10, 32), + &SourcePosition::new(449, 13, 25), + &SourcePosition::new(456, 13, 32), "include", ), arguments: Some(Spanning::start_end( - &SourcePosition::new(368, 10, 32), - &SourcePosition::new(384, 10, 48), + &SourcePosition::new(456, 13, 32), + &SourcePosition::new(472, 13, 48), ast::Arguments { items: vec![( Spanning::start_end( - &SourcePosition::new(369, 10, 33), - &SourcePosition::new(371, 10, 35), + &SourcePosition::new(457, 13, 33), + &SourcePosition::new(459, 13, 35), "if", ), Spanning::start_end( - &SourcePosition::new(373, 10, 37), - &SourcePosition::new(383, 10, 47), + &SourcePosition::new(461, 13, 37), + &SourcePosition::new(471, 13, 47), graphql::input_value!(@enableBaz), ), )], @@ -297,12 +312,12 @@ fn description() { )]), selection_set: Some(vec![ast::Selection::FragmentSpread( Spanning::start_end( - &SourcePosition::new(409, 11, 22), - &SourcePosition::new(419, 11, 32), + &SourcePosition::new(497, 14, 22), + &SourcePosition::new(507, 14, 32), ast::FragmentSpread { name: Spanning::start_end( - &SourcePosition::new(412, 11, 25), - &SourcePosition::new(419, 11, 32), + &SourcePosition::new(500, 14, 25), + &SourcePosition::new(507, 14, 32), "BazInfo", ), directives: None, @@ -317,33 +332,33 @@ fn description() { }, )), ast::Definition::Fragment(Spanning::start_end( - &SourcePosition::new(607, 20, 16), - &SourcePosition::new(679, 22, 17), + &SourcePosition::new(695, 23, 16), + &SourcePosition::new(767, 25, 17), ast::Fragment { name: Spanning::start_end( - &SourcePosition::new(616, 20, 25), - &SourcePosition::new(623, 20, 32), + &SourcePosition::new(704, 23, 25), + &SourcePosition::new(711, 23, 32), "BazInfo", ), description: Some(Spanning::start_end( - &SourcePosition::new(497, 16, 16), - &SourcePosition::new(590, 19, 19), + &SourcePosition::new(585, 19, 16), + &SourcePosition::new(678, 22, 19), Cow::Borrowed("Some block description here\nMultiline"), )), type_condition: Spanning::start_end( - &SourcePosition::new(627, 20, 36), - &SourcePosition::new(630, 20, 39), + &SourcePosition::new(715, 23, 36), + &SourcePosition::new(718, 23, 39), "Baz", ), directives: None, selection_set: vec![ast::Selection::Field(Spanning::start_end( - &SourcePosition::new(653, 21, 20), - &SourcePosition::new(661, 21, 28), + &SourcePosition::new(741, 24, 20), + &SourcePosition::new(749, 24, 28), ast::Field { alias: None, name: Spanning::start_end( - &SourcePosition::new(653, 21, 20), - &SourcePosition::new(661, 21, 28), + &SourcePosition::new(741, 24, 20), + &SourcePosition::new(749, 24, 28), "whatever", ), arguments: None, From e4adeab326558953b9d80dff54ae0d06e9c507a0 Mon Sep 17 00:00:00 2001 From: tyranron Date: Wed, 1 Oct 2025 18:31:20 +0300 Subject: [PATCH 13/13] Polish --- juniper/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index f3de64115..da6c19d4e 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -15,8 +15,8 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - [September 2025] GraphQL spec: ([#1347]) - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) -- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) - Added `description` field to `ast::Operation`, `ast::Fragment` and `ast::VariableDefinition`. ([#1349], [graphql/graphql-spec#1170]) +- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) - Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added