diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 4863ea4bf..da6c19d4e 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -15,19 +15,26 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - [September 2025] GraphQL spec: ([#1347]) - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) + - Added `description` field to `ast::Operation`, `ast::Fragment` and `ast::VariableDefinition`. ([#1349], [graphql/graphql-spec#1170]) +- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) +- Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added - [September 2025] GraphQL spec: ([#1347]) - `__Type.isOneOf` field. ([#1348], [graphql/graphql-spec#825]) - `SCHEMA`, `OBJECT`, `ARGUMENT_DEFINITION`, `INTERFACE`, `UNION`, `ENUM`, `INPUT_OBJECT` and `INPUT_FIELD_DEFINITION` values to `__DirectiveLocation` enum. ([#1348]) - - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) + - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) - Placing `#[graphql(deprecated)]` and `#[deprecated]` attributes on struct fields in `#[derive(GraphQLInputObject)]` macro. - Placing `#[graphql(deprecated)]` attribute on method arguments in `#[graphql_object]` and `#[graphql_interface]` macros. - Placing `@deprecated` directive on arguments and input object fields. - `includeDeprecated` argument to `__Type.inputFields`, `__Field.args` and `__Directive.args` fields. - `__InputValue.isDeprecated` and `__InputValue.deprecationReason` fields. - `schema::meta::Argument::deprecation_status` field. + - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Full Unicode range support. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Support parsing descriptions on operations, fragments and variable definitions. ([#1349], [graphql/graphql-spec#1170]) +- Support for [block strings][0180-1]. ([#1349]) ### Changed @@ -38,15 +45,21 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Incorrect `__Type.specifiedByUrl` field to `__Type.specifiedByURL`. ([#1348]) - Missing `@specifiedBy(url:)` directive in [SDL] generated by `RootNode::as_sdl()` and `RootNode::as_document()` methods. ([#1348]) +- Incorrect double escaping in `ScalarToken::String` `Display`ing. ([#1349]) [#864]: /../../issues/864 [#1347]: /../../issues/1347 [#1348]: /../../pull/1348 +[#1349]: /../../pull/1349 [graphql/graphql-spec#525]: https://github.com/graphql/graphql-spec/pull/525 +[graphql/graphql-spec#687]: https://github.com/graphql/graphql-spec/issues/687 [graphql/graphql-spec#805]: https://github.com/graphql/graphql-spec/pull/805 [graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825 +[graphql/graphql-spec#849]: https://github.com/graphql/graphql-spec/pull/849 [graphql/graphql-spec#1040]: https://github.com/graphql/graphql-spec/pull/1040 [graphql/graphql-spec#1142]: https://github.com/graphql/graphql-spec/pull/1142 +[graphql/graphql-spec#1170]: https://github.com/graphql/graphql-spec/pull/1170 +[0180-1]: https://spec.graphql.org/September2025/#sec-String-Value.Block-Strings diff --git a/juniper/src/ast.rs b/juniper/src/ast.rs index 398b1c8b5..ef7bddef3 100644 --- a/juniper/src/ast.rs +++ b/juniper/src/ast.rs @@ -307,6 +307,7 @@ pub enum InputValue { #[derive(Clone, Debug, PartialEq)] pub struct VariableDefinition<'a, S> { + pub description: Option>>, pub var_type: Spanning>, pub default_value: Option>>, pub directives: Option>>>, @@ -384,6 +385,7 @@ pub enum OperationType { #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Debug, PartialEq)] pub struct Operation<'a, S> { + pub description: Option>>, pub operation_type: OperationType, pub name: Option>, pub variable_definitions: Option>>, @@ -394,6 +396,7 @@ pub struct Operation<'a, S> { #[derive(Clone, Debug, PartialEq)] pub struct Fragment<'a, S> { pub name: Spanning<&'a str>, + pub description: Option>>, pub type_condition: Spanning<&'a str>, pub directives: Option>>>, pub selection_set: Vec>, @@ -406,6 +409,16 @@ pub enum Definition<'a, S> { Fragment(Spanning>), } +impl<'a, S> Definition<'a, S> { + /// Sets or resets the provided `description` for this [`Definition`]. + pub(crate) fn set_description(&mut self, description: Option>>) { + match self { + Self::Operation(op) => op.item.description = description, + Self::Fragment(frag) => frag.item.description = description, + } + } +} + #[doc(hidden)] pub type Document<'a, S> = [Definition<'a, S>]; #[doc(hidden)] diff --git a/juniper/src/lib.rs b/juniper/src/lib.rs index 70c04a108..ff83576d2 100644 --- a/juniper/src/lib.rs +++ b/juniper/src/lib.rs @@ -13,6 +13,15 @@ extern crate self as juniper; mod for_benches_only { use bencher as _; } +#[cfg(test)] +mod for_feature_gated_tests_only { + #[cfg(not(feature = "chrono"))] + use chrono as _; + #[cfg(not(feature = "jiff"))] + use jiff as _; + #[cfg(not(feature = "anyhow"))] + use serial_test as _; +} // These are required by the code generated via the `juniper_codegen` macros. #[doc(hidden)] @@ -87,7 +96,7 @@ pub use crate::{ }, introspection::IntrospectionFormat, macros::helper::subscription::{ExtractTypeFromStream, IntoFieldResult}, - parser::{ParseError, ScalarToken, Span, Spanning}, + parser::{ParseError, ScalarToken, Span, Spanning, StringLiteral}, schema::{ meta, model::{RootNode, SchemaType}, diff --git a/juniper/src/parser/document.rs b/juniper/src/parser/document.rs index fdb4922f9..26981e398 100644 --- a/juniper/src/parser/document.rs +++ b/juniper/src/parser/document.rs @@ -1,12 +1,13 @@ -use crate::ast::{ - Arguments, Definition, Directive, Field, Fragment, FragmentSpread, InlineFragment, InputValue, - Operation, OperationType, OwnedDocument, Selection, Type, VariableDefinition, - VariableDefinitions, -}; +use std::borrow::Cow; use crate::{ + ast::{ + Arguments, Definition, Directive, Field, Fragment, FragmentSpread, InlineFragment, + InputValue, Operation, OperationType, OwnedDocument, Selection, Type, VariableDefinition, + VariableDefinitions, + }, parser::{ - Lexer, OptionParseResult, ParseError, ParseResult, Parser, Spanning, Token, + Lexer, OptionParseResult, ParseError, ParseResult, Parser, ScalarToken, Spanning, Token, UnlocatedParseResult, value::parse_value_literal, }, schema::{ @@ -25,7 +26,7 @@ where S: ScalarValue, { let mut lexer = Lexer::new(s); - let mut parser = Parser::new(&mut lexer).map_err(|s| s.map(ParseError::LexerError))?; + let mut parser = Parser::new(&mut lexer).map_err(|s| s.map(Into::into))?; parse_document(&mut parser, schema) } @@ -54,18 +55,25 @@ fn parse_definition<'a, S>( where S: ScalarValue, { - match parser.peek().item { + let description = parse_description(parser)?; + + let mut def = match parser.peek().item { + // Descriptions are not permitted on query shorthand. + // See: https://spec.graphql.org/September2025#sel-GAFTRJABAByBz7P + Token::CurlyOpen if description.is_some() => { + return Err(parser.next_token()?.map(ParseError::unexpected_token)); + } Token::CurlyOpen | Token::Name("query") | Token::Name("mutation") - | Token::Name("subscription") => Ok(Definition::Operation(parse_operation_definition( - parser, schema, - )?)), - Token::Name("fragment") => Ok(Definition::Fragment(parse_fragment_definition( - parser, schema, - )?)), - _ => Err(parser.next_token()?.map(ParseError::unexpected_token)), - } + | Token::Name("subscription") => { + Definition::Operation(parse_operation_definition(parser, schema)?) + } + Token::Name("fragment") => Definition::Fragment(parse_fragment_definition(parser, schema)?), + _ => return Err(parser.next_token()?.map(ParseError::unexpected_token)), + }; + def.set_description(description); + Ok(def) } fn parse_operation_definition<'a, S>( @@ -85,6 +93,7 @@ where Operation { operation_type: OperationType::Query, name: None, + description: None, variable_definitions: None, directives: None, selection_set: selection_set.item, @@ -115,6 +124,7 @@ where Operation { operation_type: operation_type.item, name, + description: None, variable_definitions, directives: directives.map(|s| s.item), selection_set: selection_set.item, @@ -158,6 +168,7 @@ where &selection_set.span.end, Fragment { name, + description: None, type_condition: type_cond, directives: directives.map(|s| s.item), selection_set: selection_set.item, @@ -429,6 +440,8 @@ fn parse_variable_definition<'a, S>( where S: ScalarValue, { + let description = parse_description(parser)?; + let start_pos = parser.expect(&Token::Dollar)?.span.start; let var_name = parser.expect_name()?; parser.expect(&Token::Colon)?; @@ -452,6 +465,7 @@ where ( Spanning::start_end(&start_pos, &var_name.span.end, var_name.item), VariableDefinition { + description, var_type, default_value, directives: directives.map(|s| s.item), @@ -460,6 +474,21 @@ where )) } +fn parse_description<'a>(parser: &mut Parser<'a>) -> OptionParseResult> { + if !matches!(parser.peek().item, Token::Scalar(ScalarToken::String(_))) { + Ok(None) + } else { + let token = parser.next_token()?; + let Token::Scalar(ScalarToken::String(lit)) = token.item else { + unreachable!("already checked to be `ScalarToken::String`") + }; + Ok(Some(Spanning::new( + token.span, + lit.parse().map_err(|e| Spanning::new(token.span, e))?, + ))) + } +} + fn parse_directives<'a, S>( parser: &mut Parser<'a>, schema: &SchemaType, diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 56b1a61de..c7b77088f 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,4 +1,4 @@ -use std::{char, iter::Peekable, str::CharIndices}; +use std::{char, fmt, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; @@ -7,25 +7,51 @@ use crate::parser::{SourcePosition, Spanning}; #[doc(hidden)] #[derive(Debug)] pub struct Lexer<'a> { - iterator: Peekable>, + iterator: itertools::PeekNth>, source: &'a str, length: usize, position: SourcePosition, has_reached_eof: bool, } -/// A single scalar value literal +/// Representation of a raw unparsed scalar value literal. /// /// This is only used for tagging how the lexer has interpreted a value literal #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] pub enum ScalarToken<'a> { - #[display("\"{}\"", _0.replace('\\', "\\\\").replace('"', "\\\""))] - String(&'a str), + String(StringLiteral<'a>), Float(&'a str), Int(&'a str), } +/// Representation of a raw unparsed [String Value] literal (with quotes included). +/// +/// [String Value]: https://spec.graphql.org/October2021#sec-String-Value +#[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] +pub enum StringLiteral<'a> { + /// [Quoted][0] literal (denoted by single quotes `"`). + /// + /// [0]: https://spec.graphql.org/October2021#StringCharacter + Quoted(&'a str), + + /// [Block][0] literal (denoted by triple quotes `"""`). + /// + /// [0]: https://spec.graphql.org/October2021#BlockStringCharacter + Block(&'a str), +} + +impl Deref for StringLiteral<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match self { + Self::Quoted(s) => s, + Self::Block(s) => s, + } + } +} + /// A single token in the input source #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] @@ -87,12 +113,9 @@ pub enum LexerError { #[display("Unterminated string literal")] UnterminatedString, - /// An unknown character in a string literal was found - /// - /// This occurs when an invalid source character is found in a string - /// literal, such as ASCII control characters. - #[display("Unknown character \"{_0}\" in string literal")] - UnknownCharacterInString(#[error(not(source))] char), + /// An unterminated block string literal was found. + #[display("Unterminated block string literal")] + UnterminatedBlockString, /// An unknown escape sequence in a string literal was found /// @@ -119,7 +142,7 @@ impl<'a> Lexer<'a> { #[doc(hidden)] pub fn new(source: &'a str) -> Lexer<'a> { Lexer { - iterator: source.char_indices().peekable(), + iterator: itertools::peek_nth(source.char_indices()), source, length: source.len(), position: SourcePosition::new_origin(), @@ -162,25 +185,51 @@ impl<'a> Lexer<'a> { Spanning::single_width(&start_pos, t) } + /// Advances this [`Lexer`] over any [ignored] character until a non-[ignored] is met. + /// + /// [ignored]: https://spec.graphql.org/September2025#Ignored fn scan_over_whitespace(&mut self) { while let Some((_, ch)) = self.peek_char() { - if ch == '\t' || ch == ' ' || ch == '\n' || ch == '\r' || ch == ',' { - self.next_char(); - } else if ch == '#' { - self.next_char(); - - while let Some((_, ch)) = self.peek_char() { - if is_source_char(ch) && (ch == '\n' || ch == '\r') { - self.next_char(); - break; - } else if is_source_char(ch) { - self.next_char(); - } else { - break; + // Ignored :: + // UnicodeBOM + // WhiteSpace + // LineTerminator + // Comment + // Comma + match ch { + // UnicodeBOM :: + // Byte Order Mark (U+FEFF) + // Whitespace :: + // Horizontal Tab (U+0009) + // Space (U+0020) + // LineTerminator :: + // New Line (U+000A) + // Carriage Return (U+000D) [lookahead != New Line (U+000A)] + // Carriage Return (U+000D) New Line (U+000A) + // Comma :: + // , + '\u{FEFF}' | '\t' | ' ' | '\n' | '\r' | ',' => _ = self.next_char(), + // Comment :: + // #CommentChar[list][opt] [lookahead != CommentChar] + // CommentChar :: + // SourceCharacter but not LineTerminator + '#' => { + _ = self.next_char(); + while let Some((_, ch)) = self.peek_char() { + _ = self.next_char(); + match ch { + '\r' if matches!(self.peek_char(), Some((_, '\n'))) => { + _ = self.next_char(); + break; + } + '\n' | '\r' => break, + // Continue scanning `Comment`. + _ => {} + } } } - } else { - break; + // Any other character is not `Ignored`. + _ => break, } } } @@ -232,7 +281,16 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [string] by this [`Lexer`], but not a [block string]. + /// + /// [string]: https://spec.graphql.org/September2025#StringValue + /// [block string]: https://spec.graphql.org/September2025#BlockString fn scan_string(&mut self) -> LexerResult<'a> { + // StringValue :: + // "" [lookahead != "] + // "StringCharacter[list]" + // BlockString + let start_pos = self.position; let (start_idx, start_ch) = self .next_char() @@ -247,12 +305,44 @@ impl<'a> Lexer<'a> { let mut escaped = false; let mut old_pos = self.position; while let Some((idx, ch)) = self.next_char() { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter match ch { - 'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => { + // EscapedCharacter :: one of + // " \ / b f n r t + '"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' if escaped => { escaped = false; } + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit 'u' if escaped => { - self.scan_escaped_unicode(&old_pos)?; + let mut code_point = self.scan_escaped_unicode(&old_pos)?; + if code_point.is_high_surrogate() { + let new_pos = self.position; + let (Some((_, '\\')), Some((_, 'u'))) = + (self.next_char(), self.next_char()) + else { + return Err(Spanning::zero_width( + &old_pos, + LexerError::UnknownEscapeSequence(code_point.to_string()), + )); + }; + let trailing_code_point = self.scan_escaped_unicode(&new_pos)?; + if !trailing_code_point.is_low_surrogate() { + return Err(Spanning::zero_width( + &old_pos, + LexerError::UnknownEscapeSequence(code_point.to_string()), + )); + } + code_point = + UnicodeCodePoint::from_surrogate_pair(code_point, trailing_code_point); + } + _ = code_point + .try_into_char() + .map_err(|e| Spanning::zero_width(&old_pos, e))?; escaped = false; } c if escaped => { @@ -266,7 +356,9 @@ impl<'a> Lexer<'a> { return Ok(Spanning::start_end( &start_pos, &self.position, - Token::Scalar(ScalarToken::String(&self.source[start_idx + 1..idx])), + Token::Scalar(ScalarToken::String(StringLiteral::Quoted( + &self.source[start_idx..=idx], + ))), )); } '\n' | '\r' => { @@ -275,12 +367,8 @@ impl<'a> Lexer<'a> { LexerError::UnterminatedString, )); } - c if !is_source_char(c) => { - return Err(Spanning::zero_width( - &old_pos, - LexerError::UnknownCharacterInString(ch), - )); - } + // Any other valid Unicode scalar value is a `SourceCharacter`: + // https://spec.graphql.org/September2025#SourceCharacter _ => {} } old_pos = self.position; @@ -292,27 +380,105 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [block string] by this [`Lexer`]. + /// + /// [block string]: https://spec.graphql.org/September2025#BlockString + fn scan_block_string(&mut self) -> LexerResult<'a> { + // BlockString :: + // """BlockStringCharacter[list][opt]""" + + let start_pos = self.position; + let (start_idx, mut start_ch) = self + .next_char() + .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile))?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedString, + )); + } + for _ in 0..2 { + (_, start_ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile) + })?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnexpectedCharacter(start_ch), + )); + } + } + let (mut quotes, mut escaped) = (0, false); + while let Some((idx, ch)) = self.next_char() { + // BlockStringCharacter :: + // SourceCharacter but not """ or \""" + // \""" + match ch { + '\\' => (quotes, escaped) = (0, true), + '"' if escaped => (quotes, escaped) = (0, false), + '"' if quotes < 2 => quotes += 1, + '"' if quotes == 2 => { + return Ok(Spanning::start_end( + &start_pos, + &self.position, + Token::Scalar(ScalarToken::String(StringLiteral::Block( + &self.source[start_idx..=idx], + ))), + )); + } + _ => (quotes, escaped) = (0, false), + } + } + + Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedBlockString, + )) + } + + /// Scans an [escaped unicode] character by this [`Lexer`]. + /// + /// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, - ) -> Result<(), Spanning> { - let (start_idx, _) = self + ) -> Result> { + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let (start_idx, mut curr_ch) = self .peek_char() .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; let mut end_idx = start_idx; let mut len = 0; - for _ in 0..4 { - let (idx, ch) = self.next_char().ok_or_else(|| { - Spanning::zero_width(&self.position, LexerError::UnterminatedString) - })?; - - if !ch.is_alphanumeric() { - break; + let is_variable_width = curr_ch == '{'; + if is_variable_width { + _ = self.next_char(); + loop { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + end_idx = idx; + len += 1; + if !curr_ch.is_alphanumeric() { + break; + } + } + } else { + for _ in 0..4 { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + if !curr_ch.is_alphanumeric() { + break; + } + end_idx = idx; + len += 1; } - - end_idx = idx; - len += 1; } // Make sure we are on a valid char boundary. @@ -321,28 +487,44 @@ impl<'a> Lexer<'a> { .get(start_idx..=end_idx) .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; - if len != 4 { - return Err(Spanning::zero_width( - start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), - )); + let code_point = if is_variable_width { + if curr_ch != '}' { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!( + r"\u{}", + &escape[..escape.len() - 1], + )), + )); + } + // `\u{10FFFF}` is max code point + if escape.len() - 2 > 6 { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!(r"\u{}", &escape[..escape.len()])), + )); + } + u32::from_str_radix(&escape[1..escape.len() - 1], 16) + } else { + if len != 4 { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), + )); + } + u32::from_str_radix(escape, 16) } - - let code_point = u32::from_str_radix(escape, 16).map_err(|_| { + .map_err(|_| { Spanning::zero_width( start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), ) })?; - char::from_u32(code_point) - .ok_or_else(|| { - Spanning::zero_width( - start_pos, - LexerError::UnknownEscapeSequence("\\u".to_owned() + escape), - ) - }) - .map(|_| ()) + Ok(UnicodeCodePoint { + code: code_point, + is_variable_width, + }) } fn scan_number(&mut self) -> LexerResult<'a> { @@ -480,7 +662,15 @@ impl<'a> Iterator for Lexer<'a> { Some('@') => Ok(self.emit_single_char(Token::At)), Some('|') => Ok(self.emit_single_char(Token::Pipe)), Some('.') => self.scan_ellipsis(), - Some('"') => self.scan_string(), + Some('"') => { + if self.iterator.peek_nth(1).map(|&(_, ch)| ch) == Some('"') + && self.iterator.peek_nth(2).map(|&(_, ch)| ch) == Some('"') + { + self.scan_block_string() + } else { + self.scan_string() + } + } Some(ch) => { if is_number_start(ch) { self.scan_number() @@ -501,10 +691,6 @@ impl<'a> Iterator for Lexer<'a> { } } -fn is_source_char(c: char) -> bool { - c == '\t' || c == '\n' || c == '\r' || c >= ' ' -} - fn is_name_start(c: char) -> bool { c == '_' || c.is_ascii_alphabetic() } @@ -516,3 +702,1159 @@ fn is_name_cont(c: char) -> bool { fn is_number_start(c: char) -> bool { c == '-' || c.is_ascii_digit() } + +/// Representation of a [Unicode code point]. +/// +/// This is different from a [Unicode scalar value] (aka "character") represented by a [`char`], +/// because can denote a [surrogate code point]. +/// +/// [surrogate code point]: https://unicode.org/glossary#surrogate_code_point +/// [Unicode code point]: https://unicode.org/glossary#code_point +/// [Unicode scalar value]: https://unicode.org/glossary#unicode_scalar_value +#[derive(Clone, Copy, Debug)] +pub(crate) struct UnicodeCodePoint { + /// Code representing this [`UnicodeCodePoint`]. + pub(crate) code: u32, + + /// Indicator whether this [`UnicodeCodePoint`] should be [`Display`]ed in variable-width form. + pub(crate) is_variable_width: bool, +} + +impl Display for UnicodeCodePoint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_variable_width { + write!(f, r"\u{{{:X}}}", self.code) + } else { + write!(f, r"\u{:04X}", self.code) + } + } +} + +impl UnicodeCodePoint { + /// Indicates whether this [`UnicodeCodePoint`] is a high (leading) [surrogate]. + /// + /// [surrogate]: https://unicode.org/glossary#surrogate_code_point + pub(crate) fn is_high_surrogate(self) -> bool { + (0xD800..=0xDBFF).contains(&self.code) + } + + /// Indicates whether this [`UnicodeCodePoint`] is a low (trailing) [surrogate]. + /// + /// [surrogate]: https://unicode.org/glossary#surrogate_code_point + pub(crate) fn is_low_surrogate(self) -> bool { + (0xDC00..=0xDFFF).contains(&self.code) + } + + /// Joins a [`UnicodeCodePoint`] from the provided [surrogate pair][0]. + /// + /// [0]: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs + pub(crate) fn from_surrogate_pair(high: Self, low: Self) -> Self { + debug_assert!(high.is_high_surrogate(), "`{high}` is not a high surrogate"); + debug_assert!(low.is_low_surrogate(), "`{high}` is not a low surrogate"); + Self { + code: 0x10000 + ((high.code & 0x03FF) << 10) + (low.code & 0x03FF), + is_variable_width: true, + } + } + + /// Tries to convert this [`UnicodeCodePoint`] into a [`char`]. + /// + /// # Errors + /// + /// If this [`UnicodeCodePoint`] doesn't represent a [Unicode scalar value]. + /// + /// [Unicode scalar value]: https://unicode.org/glossary#unicode_scalar_value + pub(crate) fn try_into_char(self) -> Result { + char::from_u32(self.code).ok_or_else(|| LexerError::UnknownEscapeSequence(self.to_string())) + } +} + +#[cfg(test)] +mod test { + use crate::parser::{ + Lexer, LexerError, ScalarToken, SourcePosition, Spanning, + StringLiteral::{Block, Quoted}, + Token, + }; + + #[track_caller] + fn tokenize_to_vec(s: &str) -> Vec>> { + let mut tokens = Vec::new(); + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + let at_eof = t.item == Token::EndOfFile; + tokens.push(t); + if at_eof { + break; + } + } + Some(Err(e)) => panic!("error in input stream: {e} for {s:#?}"), + None => panic!("EOF before `Token::EndOfFile` in {s:#?}"), + } + } + + tokens + } + + #[track_caller] + fn tokenize_single(s: &str) -> Spanning> { + let mut tokens = tokenize_to_vec(s); + + assert_eq!(tokens.len(), 2); + assert_eq!(tokens[1].item, Token::EndOfFile); + + tokens.remove(0) + } + + #[track_caller] + fn tokenize_error(s: &str) -> Spanning { + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + if t.item == Token::EndOfFile { + panic!("lexer did not return error for {s:#?}"); + } + } + Some(Err(e)) => { + return e; + } + None => panic!("lexer did not return error for {s:#?}"), + } + } + } + + #[test] + fn empty_source() { + assert_eq!( + tokenize_to_vec(""), + vec![Spanning::zero_width( + &SourcePosition::new_origin(), + Token::EndOfFile, + )] + ); + } + + #[test] + fn disallow_control_codes() { + assert_eq!( + Lexer::new("\u{0007}").next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new_origin(), + LexerError::UnknownCharacter('\u{0007}'), + ))) + ); + } + + #[test] + fn skip_whitespace() { + assert_eq!( + tokenize_to_vec( + r#" + + foo + + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(14, 2, 12), + &SourcePosition::new(17, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_comments() { + assert_eq!( + tokenize_to_vec( + r#" + #comment + foo#comment + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(34, 2, 12), + &SourcePosition::new(37, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_commas() { + assert_eq!( + tokenize_to_vec(r#",,,foo,,,"#), + vec![ + Spanning::start_end( + &SourcePosition::new(3, 0, 3), + &SourcePosition::new(6, 0, 6), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), + ] + ); + } + + #[test] + fn error_positions() { + assert_eq!( + Lexer::new( + r#" + + ? + + "#, + ) + .next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new(14, 2, 12), + LexerError::UnknownCharacter('?'), + ))), + ); + } + + #[test] + fn strings() { + assert_eq!( + tokenize_single(r#""simple""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(8, 0, 8), + Token::Scalar(ScalarToken::String(Quoted(r#""simple""#))), + ), + ); + + assert_eq!( + tokenize_single(r#"" white space ""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#"" white space ""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""quote \"""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(10, 0, 10), + Token::Scalar(ScalarToken::String(Quoted(r#""quote \"""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""escaped \n\r\b\t\f""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(20, 0, 20), + Token::Scalar(ScalarToken::String(Quoted(r#""escaped \n\r\b\t\f""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""slashes \\ \/""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#""slashes \\ \/""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(34, 0, 34), + Token::Scalar(ScalarToken::String(Quoted( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(58, 0, 58), + Token::Scalar(ScalarToken::String(Quoted( + r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with unicode escape outside BMP \u{1F600}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(50, 0, 50), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with unicode escape outside BMP \u{1F600}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with minimal unicode escape \u{0}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(42, 0, 42), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with minimal unicode escape \u{0}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal unicode escape \u{10FFFF}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(47, 0, 47), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal unicode escape \u{10FFFF}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal minimal unicode escape \u{000000}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(55, 0, 55), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal minimal unicode escape \u{000000}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with unicode surrogate pair escape \uD83D\uDE00""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with minimal surrogate pair escape \uD800\uDC00""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(56, 0, 56), + Token::Scalar(ScalarToken::String(Quoted( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + ))), + ), + ); + + assert_eq!( + tokenize_single("\"contains unescaped \u{0007} control char\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(35, 0, 35), + Token::Scalar(ScalarToken::String(Quoted( + "\"contains unescaped \u{0007} control char\"", + ))), + ), + ); + + assert_eq!( + tokenize_single("\"null-byte is not \u{0000} end of file\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Quoted( + "\"null-byte is not \u{0000} end of file\"", + ))), + ), + ); + } + + #[test] + fn string_errors() { + assert_eq!( + tokenize_error(r#"""#), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(13, 0, 13), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\nline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\rline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""bad \z esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\z".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \x esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\x".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u0XX1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u0XX1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uFXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uFXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{110000} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{110000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FXXX} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FXXX}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFFF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFFF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{FFFF""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{FFFF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""too high \u{110000} esc""#), + Spanning::zero_width( + &SourcePosition::new(11, 0, 11), + LexerError::UnknownEscapeSequence(r"\u{110000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""way too high \u{12345678} esc""#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnknownEscapeSequence(r"\u{12345678}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""too long \u{000000000} esc""#), + Spanning::zero_width( + &SourcePosition::new(11, 0, 11), + LexerError::UnknownEscapeSequence(r"\u{000000000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad surrogate \uDEAD esc""#), + Spanning::zero_width( + &SourcePosition::new(16, 0, 16), + LexerError::UnknownEscapeSequence(r"\uDEAD".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad surrogate \u{DEAD} esc""#), + Spanning::zero_width( + &SourcePosition::new(16, 0, 16), + LexerError::UnknownEscapeSequence(r"\u{DEAD}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad high surrogate pair \uDEAD\uDEAD esc""#), + Spanning::zero_width( + &SourcePosition::new(26, 0, 26), + LexerError::UnknownEscapeSequence(r"\uDEAD".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad low surrogate pair \uD800\uD800 esc""#), + Spanning::zero_width( + &SourcePosition::new(25, 0, 25), + LexerError::UnknownEscapeSequence(r"\uD800".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated in string \""#), + Spanning::zero_width( + &SourcePosition::new(26, 0, 26), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated \"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedString, + ), + ); + + // Found by fuzzing. + assert_eq!( + tokenize_error(r#""\uɠ^A"#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedString, + ), + ); + } + + #[test] + fn block_strings() { + assert_eq!( + tokenize_single(r#""""""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(6, 0, 6), + Token::Scalar(ScalarToken::String(Block(r#""""""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""simple""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(12, 0, 12), + Token::Scalar(ScalarToken::String(Block(r#""""simple""""#))), + ), + ); + assert_eq!( + tokenize_single(r#"""" white space """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(19, 0, 19), + Token::Scalar(ScalarToken::String(Block(r#"""" white space """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains " quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""contains " quote""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \"" double quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \"" double quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \\""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(33, 0, 33), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \\""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""\"""quote" """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""\"""quote" """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\nline""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""multi\nline""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\rline\r\nnormalized""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""multi\rline\r\nnormalized""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(38, 0, 38), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped unicode outside BMP \u{1f600}""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(45, 0, 45), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""slashes \\\\ \\/""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""slashes \\\\ \\/""""#))), + ), + ); + assert_eq!( + tokenize_single( + r#"""" + + spans + multiple + lines + + """"#, + ), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(76, 6, 11), + Token::Scalar(ScalarToken::String(Block( + r#"""" + + spans + multiple + lines + + """"#, + ))), + ), + ); + } + + #[test] + fn block_string_errors() { + assert_eq!( + tokenize_error(r#""""""#), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#"""""""#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#""""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedBlockString, + ), + ); + } + + #[test] + fn numbers() { + fn assert_float_token_eq( + source: &str, + start: SourcePosition, + end: SourcePosition, + expected: &str, + ) { + let parsed = tokenize_single(source); + assert_eq!(parsed.span.start, start); + assert_eq!(parsed.span.end, end); + + match parsed.item { + Token::Scalar(ScalarToken::Float(actual)) => assert_eq!(actual, expected), + _ => assert!(false), + } + } + + assert_eq!( + tokenize_single("4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("4")) + ) + ); + + assert_float_token_eq( + "4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "4.123", + ); + + assert_float_token_eq( + "4.0", + SourcePosition::new(0, 0, 0), + SourcePosition::new(3, 0, 3), + "4.0", + ); + + assert_eq!( + tokenize_single("-4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(2, 0, 2), + Token::Scalar(ScalarToken::Int("-4")), + ) + ); + + assert_eq!( + tokenize_single("9"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("9")), + ) + ); + + assert_eq!( + tokenize_single("0"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("0")), + ) + ); + + assert_float_token_eq( + "-4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "-4.123", + ); + + assert_float_token_eq( + "0.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "0.123", + ); + + assert_float_token_eq( + "123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123e4", + ); + + assert_float_token_eq( + "123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123E4", + ); + + assert_float_token_eq( + "123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e-4", + ); + + assert_float_token_eq( + "123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e+4", + ); + + assert_float_token_eq( + "-1.123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123e4", + ); + + assert_float_token_eq( + "-1.123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123E4", + ); + + assert_float_token_eq( + "-1.123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e-4", + ); + + assert_float_token_eq( + "-1.123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e+4", + ); + + assert_float_token_eq( + "-1.123e45", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e45", + ); + } + + #[test] + fn numbers_errors() { + assert_eq!( + tokenize_error("00"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('0'), + ) + ); + + assert_eq!( + tokenize_error("+1"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('+'), + ) + ); + + assert_eq!( + tokenize_error("1."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error(".123"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnexpectedCharacter('.'), + ) + ); + + assert_eq!( + tokenize_error("1.A"), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("-A"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("1.0e"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("1.0eA"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedCharacter('A'), + ) + ); + } + + #[test] + fn punctuation() { + assert_eq!( + tokenize_single("!"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark), + ); + + assert_eq!( + tokenize_single("$"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar), + ); + + assert_eq!( + tokenize_single("("), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen), + ); + + assert_eq!( + tokenize_single(")"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose), + ); + + assert_eq!( + tokenize_single("..."), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(3, 0, 3), + Token::Ellipsis, + ) + ); + + assert_eq!( + tokenize_single(":"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon), + ); + + assert_eq!( + tokenize_single("="), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals), + ); + + assert_eq!( + tokenize_single("@"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At), + ); + + assert_eq!( + tokenize_single("["), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen), + ); + + assert_eq!( + tokenize_single("]"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose), + ); + + assert_eq!( + tokenize_single("{"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen), + ); + + assert_eq!( + tokenize_single("}"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose), + ); + + assert_eq!( + tokenize_single("|"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe), + ); + } + + #[test] + fn punctuation_error() { + assert_eq!( + tokenize_error(".."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("?"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('?'), + ) + ); + + assert_eq!( + tokenize_error("\u{203b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{203b}'), + ) + ); + + assert_eq!( + tokenize_error("\u{200b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{200b}'), + ) + ); + } + + #[test] + fn display() { + for (input, expected) in [ + (Token::Name("identifier"), "identifier"), + (Token::Scalar(ScalarToken::Int("123")), "123"), + (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), + ( + Token::Scalar(ScalarToken::String(Quoted(r#""some string""#))), + r#""some string""#, + ), + ( + Token::Scalar(ScalarToken::String(Quoted( + r#""string with \\ escape and \" quote""#, + ))), + r#""string with \\ escape and \" quote""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""string with \\ escape and \" quote""""#, + ))), + r#""""string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block string with \\ escape and \" quote""""#, + ))), + r#""""block string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block + multiline + string"""#, + ))), + r#""""block + multiline + string"""#, + ), + (Token::ExclamationMark, "!"), + (Token::Dollar, "$"), + (Token::ParenOpen, "("), + (Token::ParenClose, ")"), + (Token::BracketOpen, "["), + (Token::BracketClose, "]"), + (Token::CurlyOpen, "{"), + (Token::CurlyClose, "}"), + (Token::Ellipsis, "..."), + (Token::Colon, ":"), + (Token::Equals, "="), + (Token::At, "@"), + (Token::Pipe, "|"), + ] { + assert_eq!(input.to_string(), expected); + } + } +} diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index eb2d0d3c1..038864d15 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -12,8 +12,9 @@ mod tests; pub use self::document::parse_document_source; +pub(crate) use self::lexer::UnicodeCodePoint; pub use self::{ - lexer::{Lexer, LexerError, ScalarToken, Token}, + lexer::{Lexer, LexerError, ScalarToken, StringLiteral, Token}, parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index 2459df205..d2a2dadc9 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -1,12 +1,14 @@ -use std::fmt; +use std::{borrow::Cow, fmt, iter}; use compact_str::{CompactString, format_compact}; -use derive_more::with_trait::{Display, Error}; +use derive_more::with_trait::{Display, Error, From}; -use crate::parser::{Lexer, LexerError, Spanning, Token}; +use crate::parser::{ + Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token, UnicodeCodePoint, +}; /// Error while parsing a GraphQL query -#[derive(Clone, Debug, Display, Eq, Error, PartialEq)] +#[derive(Clone, Debug, Display, Eq, Error, From, PartialEq)] pub enum ParseError { /// An unexpected token occurred in the source // TODO: Previously was `Token<'a>`. @@ -19,6 +21,7 @@ pub enum ParseError { UnexpectedEndOfFile, /// An error during tokenization occurred + #[from] LexerError(LexerError), /// A scalar of unexpected type occurred in the source @@ -199,3 +202,399 @@ impl<'a> Parser<'a> { } } } + +impl<'a> StringLiteral<'a> { + /// Parses this [`StringLiteral`] returning an unescaped and unquoted string value. + /// + /// # Errors + /// + /// If this [`StringLiteral`] is invalid. + pub fn parse(self) -> Result, ParseError> { + match self { + Self::Quoted(lit) => { + if !lit.starts_with('"') { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with('"') { + return Err(LexerError::UnterminatedString.into()); + } + + let unquoted = &lit[1..lit.len() - 1]; + if !unquoted.contains('\\') { + return Ok(unquoted.into()); + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut char_iter = unquoted.chars(); + while let Some(ch) = char_iter.next() { + match ch { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter + '\\' => match char_iter.next() { + // EscapedCharacter :: one of + // " \ / b f n r t + Some('"') => unescaped.push('"'), + Some('\\') => unescaped.push('\\'), + Some('/') => unescaped.push('/'), + Some('b') => unescaped.push('\u{0008}'), + Some('f') => unescaped.push('\u{000C}'), + Some('n') => unescaped.push('\n'), + Some('r') => unescaped.push('\r'), + Some('t') => unescaped.push('\t'), + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + Some('u') => { + let mut code_point = + UnicodeCodePoint::parse_escaped(&mut char_iter)?; + if code_point.is_high_surrogate() { + let (Some('\\'), Some('u')) = + (char_iter.next(), char_iter.next()) + else { + return Err(LexerError::UnknownEscapeSequence( + code_point.to_string(), + ) + .into()); + }; + + let trailing_code_point = + UnicodeCodePoint::parse_escaped(&mut char_iter)?; + if !trailing_code_point.is_low_surrogate() { + return Err(LexerError::UnknownEscapeSequence( + code_point.to_string(), + ) + .into()); + } + code_point = UnicodeCodePoint::from_surrogate_pair( + code_point, + trailing_code_point, + ); + } + unescaped.push(code_point.try_into_char()?); + } + Some(s) => { + return Err( + LexerError::UnknownEscapeSequence(format!(r"\{s}")).into() + ); + } + None => { + return Err(LexerError::UnterminatedString.into()); + } + }, + ch => { + unescaped.push(ch); + } + } + } + Ok(unescaped.into()) + } + Self::Block(lit) => { + if !lit.starts_with(r#"""""#) { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with(r#"""""#) { + return Err(LexerError::UnterminatedBlockString.into()); + } + + let unquoted = &lit[3..lit.len() - 3]; + + let (mut indent, mut total_lines) = (usize::MAX, 0); + let (mut first_text_line, mut last_text_line) = (None, 0); + for (n, line) in unquoted.lines().enumerate() { + total_lines += 1; + + let trimmed = line.trim_start(); + if trimmed.is_empty() { + continue; + } + + _ = first_text_line.get_or_insert(n); + last_text_line = n; + + if n != 0 { + indent = indent.min(line.len() - trimmed.len()); + } + } + + let Some(first_text_line) = first_text_line else { + return Ok("".into()); // no text, only whitespaces + }; + if (indent == 0 || total_lines == 1) && !unquoted.contains(r#"\""""#) { + return Ok(unquoted.into()); // nothing to dedent or unescape + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut lines = unquoted + .lines() + .enumerate() + .skip(first_text_line) + .take(last_text_line - first_text_line + 1) + .map(|(n, line)| { + if n != 0 && line.len() >= indent { + &line[indent..] + } else { + line + } + }) + .map(|x| x.replace(r#"\""""#, r#"""""#)); + if let Some(line) = lines.next() { + unescaped.push_str(&line); + for line in lines { + unescaped.push('\n'); + unescaped.push_str(&line); + } + } + Ok(unescaped.into()) + } + } + } +} + +impl UnicodeCodePoint { + /// Parses a [`UnicodeCodePoint`] from an [escaped] value in the provided [`Iterator`]. + /// + /// [escaped]: https://spec.graphql.org/September2025#EscapedUnicode + pub(crate) fn parse_escaped( + char_iter: &mut impl Iterator, + ) -> Result { + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let Some(mut curr_ch) = char_iter.next() else { + return Err(LexerError::UnknownEscapeSequence(r"\u".into()).into()); + }; + let mut escaped_code_point = String::with_capacity(6); // `\u{10FFFF}` is max code point + + let is_variable_width = curr_ch == '{'; + if is_variable_width { + loop { + curr_ch = char_iter.next().ok_or_else(|| { + LexerError::UnknownEscapeSequence(format!(r"\u{{{escaped_code_point}")) + })?; + if curr_ch == '}' { + break; + } else if !curr_ch.is_alphanumeric() { + return Err(LexerError::UnknownEscapeSequence(format!( + r"\u{{{escaped_code_point}" + )) + .into()); + } + escaped_code_point.push(curr_ch); + } + } else { + let mut char_iter = iter::once(curr_ch).chain(char_iter); + for _ in 0..4 { + curr_ch = char_iter.next().ok_or_else(|| { + LexerError::UnknownEscapeSequence(format!(r"\u{escaped_code_point}")) + })?; + if !curr_ch.is_alphanumeric() { + return Err(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}" + )) + .into()); + } + escaped_code_point.push(curr_ch); + } + } + + let Ok(code) = u32::from_str_radix(&escaped_code_point, 16) else { + return Err(LexerError::UnknownEscapeSequence(if is_variable_width { + format!(r"\u{{{escaped_code_point}}}") + } else { + format!(r"\u{escaped_code_point}") + }) + .into()); + }; + + Ok(Self { + code, + is_variable_width, + }) + } +} + +#[cfg(test)] +mod string_literal_tests { + use super::StringLiteral; + + #[test] + fn quoted() { + for (input, expected) in [ + (r#""""#, ""), + (r#""simple""#, "simple"), + (r#"" white space ""#, " white space "), + (r#""quote \"""#, r#"quote ""#), + (r#""escaped \n\r\b\t\f""#, "escaped \n\r\u{0008}\t\u{000c}"), + (r#""slashes \\ \/""#, r"slashes \ /"), + ( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", + ), + ( + r#""string with unicode escape outside BMP \u{1F600}""#, + "string with unicode escape outside BMP \u{1F600}", + ), + ( + r#""string with minimal unicode escape \u{0}""#, + "string with minimal unicode escape \u{0}", + ), + ( + r#""string with maximal unicode escape \u{10FFFF}""#, + "string with maximal unicode escape \u{10FFFF}", + ), + ( + r#""string with maximal minimal unicode escape \u{000000}""#, + "string with maximal minimal unicode escape \u{000000}", + ), + ( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + "string with unicode surrogate pair escape \u{1f600}", + ), + ( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + "string with minimal surrogate pair escape \u{10000}", + ), + ( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + "string with maximal surrogate pair escape \u{10FFFF}", + ), + ] { + let res = StringLiteral::Quoted(input).parse(); + assert!( + res.is_ok(), + "parsing error occurred on {input}: {}", + res.unwrap_err(), + ); + + assert_eq!(res.unwrap(), expected); + } + } + + #[test] + fn quoted_errors() { + for (input, expected) in [ + ( + r#""bad surrogate \uDEAD""#, + r#"Unknown escape sequence "\uDEAD" in string"#, + ), + ( + r#""bad low surrogate pair \uD800\uD800""#, + r#"Unknown escape sequence "\uD800" in string"#, + ), + ] { + let res = StringLiteral::Quoted(input).parse(); + assert!(res.is_err(), "parsing error doesn't occur on {input}"); + + let err = res.unwrap_err(); + assert!( + err.to_string().contains(expected), + "returned error `{err}` doesn't contain `{expected}`", + ); + } + } + + #[test] + fn block() { + for (input, expected) in [ + (r#""""""""#, ""), + (r#""""simple""""#, "simple"), + (r#"""" white space """"#, " white space "), + (r#""""contains " quote""""#, r#"contains " quote"#), + ( + r#""""contains \""" triple quote""""#, + r#"contains """ triple quote"#, + ), + ( + r#""""contains \"" double quote""""#, + r#"contains \"" double quote"#, + ), + ( + r#""""contains \\""" triple quote""""#, + r#"contains \""" triple quote"#, + ), + (r#""""\"""quote" """"#, r#""""quote" "#), + (r#""""multi\nline""""#, r"multi\nline"), + ( + r#""""multi\rline\r\nnormalized""""#, + r"multi\rline\r\nnormalized", + ), + ( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#, + r"unescaped \\n\\r\\b\\t\\f\\u1234", + ), + ( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + r"unescaped unicode outside BMP \u{1f600}", + ), + (r#""""slashes \\\\ \\/""""#, r"slashes \\\\ \\/"), + ( + r#"""" + + spans + multiple + lines + + """"#, + "spans\n multiple\n lines", + ), + // removes uniform indentation + ( + r#"""" + Hello, + World! + + Yours, + GraphQL.""""#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // removes empty leading and trailing lines + ( + r#"""" + + Hello, + World! + + Yours, + GraphQL. + + """"#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // retains indentation from first line + ( + r#"""" Hello, + World! + + Yours, + GraphQL.""""#, + " Hello,\n World!\n\nYours,\n GraphQL.", + ), + // does not alter trailing spaces + ( + r#"""" + Hello, + World! + + Yours, + GraphQL. """"#, + "Hello,\n World!\n\nYours,\n GraphQL. ", + ), + ] { + let res = StringLiteral::Block(input).parse(); + assert!( + res.is_ok(), + "parsing error occurred on {input}: {}", + res.unwrap_err(), + ); + + assert_eq!(res.unwrap(), expected); + } + } +} diff --git a/juniper/src/parser/tests/document.rs b/juniper/src/parser/tests/document.rs index 42ecfd202..345e1255a 100644 --- a/juniper/src/parser/tests/document.rs +++ b/juniper/src/parser/tests/document.rs @@ -1,6 +1,7 @@ +use std::borrow::Cow; + use crate::{ - ast::{Arguments, Definition, Field, Operation, OperationType, OwnedDocument, Selection}, - graphql, + ast, graphql, parser::{ParseError, SourcePosition, Spanning, Token, document::parse_document_source}, schema::model::SchemaType, types::scalars::{EmptyMutation, EmptySubscription}, @@ -8,7 +9,7 @@ use crate::{ value::{DefaultScalarValue, ScalarValue}, }; -fn parse_document(s: &str) -> OwnedDocument<'_, S> +fn parse_document(s: &str) -> ast::OwnedDocument<'_, S> where S: ScalarValue, { @@ -16,7 +17,7 @@ where s, &SchemaType::new::(&(), &(), &()), ) - .unwrap_or_else(|_| panic!("Parse error on input {s:#?}")) + .unwrap_or_else(|e| panic!("parse error on input {s:#?}:\n{e}")) } fn parse_document_error(s: &str) -> Spanning { @@ -33,6 +34,7 @@ fn parse_document_error(s: &str) -> Spanning { fn simple_ast() { assert_eq!( parse_document::( + // language=GraphQL r#"{ node(id: 4) { id @@ -40,18 +42,19 @@ fn simple_ast() { } }"#, ), - vec![Definition::Operation(Spanning::start_end( + vec![ast::Definition::Operation(Spanning::start_end( &SourcePosition::new(0, 0, 0), &SourcePosition::new(111, 5, 13), - Operation { - operation_type: OperationType::Query, + ast::Operation { + operation_type: ast::OperationType::Query, name: None, + description: None, variable_definitions: None, directives: None, - selection_set: vec![Selection::Field(Spanning::start_end( + selection_set: vec![ast::Selection::Field(Spanning::start_end( &SourcePosition::new(18, 1, 16), &SourcePosition::new(97, 4, 17), - Field { + ast::Field { alias: None, name: Spanning::start_end( &SourcePosition::new(18, 1, 16), @@ -61,7 +64,7 @@ fn simple_ast() { arguments: Some(Spanning::start_end( &SourcePosition::new(22, 1, 20), &SourcePosition::new(29, 1, 27), - Arguments { + ast::Arguments { items: vec![( Spanning::start_end( &SourcePosition::new(23, 1, 21), @@ -78,10 +81,10 @@ fn simple_ast() { )), directives: None, selection_set: Some(vec![ - Selection::Field(Spanning::start_end( + ast::Selection::Field(Spanning::start_end( &SourcePosition::new(52, 2, 20), &SourcePosition::new(54, 2, 22), - Field { + ast::Field { alias: None, name: Spanning::start_end( &SourcePosition::new(52, 2, 20), @@ -93,10 +96,10 @@ fn simple_ast() { selection_set: None, }, )), - Selection::Field(Spanning::start_end( + ast::Selection::Field(Spanning::start_end( &SourcePosition::new(75, 3, 20), &SourcePosition::new(79, 3, 24), - Field { + ast::Field { alias: None, name: Spanning::start_end( &SourcePosition::new(75, 3, 20), @@ -116,6 +119,259 @@ fn simple_ast() { ) } +#[test] +fn description() { + assert_eq!( + parse_document::( + // language=GraphQL + r#" + "Some description with \u90AB symbol" + query SomeOperation( + "ID you should provide and \u{90AB} symbol" + $id: String + """ + Switch for experiment .... + Multiline + """ + $enableBaz: Boolean = false + ) { + foo(id: $id) { + bar + baz @include(if: $enableBaz) { + ...BazInfo + } + } + } + + """ + Some block description here + Multiline + """ + fragment BazInfo on Baz { + whatever + } + "#, + ), + vec![ + ast::Definition::Operation(Spanning::start_end( + &SourcePosition::new(71, 2, 16), + &SourcePosition::new(567, 17, 17), + ast::Operation { + operation_type: ast::OperationType::Query, + name: Some(Spanning::start_end( + &SourcePosition::new(77, 2, 22), + &SourcePosition::new(90, 2, 35), + "SomeOperation", + )), + description: Some(Spanning::start_end( + &SourcePosition::new(17, 1, 16), + &SourcePosition::new(54, 1, 53), + Cow::Owned("Some description with \u{90AB} symbol".into()), + )), + variable_definitions: Some(Spanning::start_end( + &SourcePosition::new(90, 2, 35), + &SourcePosition::new(364, 10, 17), + ast::VariableDefinitions { + items: vec![ + ( + Spanning::start_end( + &SourcePosition::new(172, 4, 18), + &SourcePosition::new(175, 4, 21), + "id", + ), + ast::VariableDefinition { + description: Some(Spanning::start_end( + &SourcePosition::new(110, 3, 18), + &SourcePosition::new(153, 3, 61), + Cow::Owned( + "ID you should provide and \u{90AB} symbol".into(), + ), + )), + var_type: Spanning::start_end( + &SourcePosition::new(177, 4, 23), + &SourcePosition::new(183, 4, 29), + ast::Type::nullable("String"), + ), + default_value: None, + directives: None, + }, + ), + ( + Spanning::start_end( + &SourcePosition::new(319, 9, 18), + &SourcePosition::new(329, 9, 28), + "enableBaz", + ), + ast::VariableDefinition { + description: Some(Spanning::start_end( + &SourcePosition::new(202, 5, 18), + &SourcePosition::new(300, 8, 21), + Cow::Borrowed("Switch for experiment ....\nMultiline"), + )), + var_type: Spanning::start_end( + &SourcePosition::new(331, 9, 30), + &SourcePosition::new(338, 9, 37), + ast::Type::nullable("Boolean"), + ), + default_value: Some(Spanning::start_end( + &SourcePosition::new(341, 9, 40), + &SourcePosition::new(346, 9, 45), + graphql::input_value!(false), + )), + directives: None, + }, + ) + ], + } + )), + directives: None, + selection_set: vec![ast::Selection::Field(Spanning::start_end( + &SourcePosition::new(385, 11, 18), + &SourcePosition::new(549, 16, 19), + ast::Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(385, 11, 18), + &SourcePosition::new(388, 11, 21), + "foo", + ), + arguments: Some(Spanning::start_end( + &SourcePosition::new(388, 11, 21), + &SourcePosition::new(397, 11, 30), + ast::Arguments { + items: vec![( + Spanning::start_end( + &SourcePosition::new(389, 11, 22), + &SourcePosition::new(391, 11, 24), + "id", + ), + Spanning::start_end( + &SourcePosition::new(393, 11, 26), + &SourcePosition::new(396, 11, 29), + graphql::input_value!(@id), + ), + )], + }, + )), + directives: None, + selection_set: Some(vec![ + ast::Selection::Field(Spanning::start_end( + &SourcePosition::new(420, 12, 20), + &SourcePosition::new(423, 12, 23), + ast::Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(420, 12, 20), + &SourcePosition::new(423, 12, 23), + "bar", + ), + arguments: None, + directives: None, + selection_set: None, + }, + )), + ast::Selection::Field(Spanning::start_end( + &SourcePosition::new(444, 13, 20), + &SourcePosition::new(529, 15, 21), + ast::Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(444, 13, 20), + &SourcePosition::new(447, 13, 23), + "baz", + ), + arguments: None, + directives: Some(vec![Spanning::start_end( + &SourcePosition::new(448, 13, 24), + &SourcePosition::new(472, 13, 48), + ast::Directive { + name: Spanning::start_end( + &SourcePosition::new(449, 13, 25), + &SourcePosition::new(456, 13, 32), + "include", + ), + arguments: Some(Spanning::start_end( + &SourcePosition::new(456, 13, 32), + &SourcePosition::new(472, 13, 48), + ast::Arguments { + items: vec![( + Spanning::start_end( + &SourcePosition::new(457, 13, 33), + &SourcePosition::new(459, 13, 35), + "if", + ), + Spanning::start_end( + &SourcePosition::new(461, 13, 37), + &SourcePosition::new(471, 13, 47), + graphql::input_value!(@enableBaz), + ), + )], + }, + )), + }, + )]), + selection_set: Some(vec![ast::Selection::FragmentSpread( + Spanning::start_end( + &SourcePosition::new(497, 14, 22), + &SourcePosition::new(507, 14, 32), + ast::FragmentSpread { + name: Spanning::start_end( + &SourcePosition::new(500, 14, 25), + &SourcePosition::new(507, 14, 32), + "BazInfo", + ), + directives: None, + }, + ) + )]), + }, + )), + ]), + }, + ))], + }, + )), + ast::Definition::Fragment(Spanning::start_end( + &SourcePosition::new(695, 23, 16), + &SourcePosition::new(767, 25, 17), + ast::Fragment { + name: Spanning::start_end( + &SourcePosition::new(704, 23, 25), + &SourcePosition::new(711, 23, 32), + "BazInfo", + ), + description: Some(Spanning::start_end( + &SourcePosition::new(585, 19, 16), + &SourcePosition::new(678, 22, 19), + Cow::Borrowed("Some block description here\nMultiline"), + )), + type_condition: Spanning::start_end( + &SourcePosition::new(715, 23, 36), + &SourcePosition::new(718, 23, 39), + "Baz", + ), + directives: None, + selection_set: vec![ast::Selection::Field(Spanning::start_end( + &SourcePosition::new(741, 24, 20), + &SourcePosition::new(749, 24, 28), + ast::Field { + alias: None, + name: Spanning::start_end( + &SourcePosition::new(741, 24, 20), + &SourcePosition::new(749, 24, 28), + "whatever", + ), + arguments: None, + directives: None, + selection_set: None, + }, + ))] + } + )) + ] + ) +} + #[test] fn errors() { assert_eq!( @@ -143,6 +399,17 @@ fn errors() { ParseError::unexpected_token(Token::CurlyClose) ) ); + + // Descriptions are not permitted on query shorthand. + // See: https://spec.graphql.org/September2025#sel-GAFTRJABAByBz7P + assert_eq!( + parse_document_error::(r#""description" { foo }"#), + Spanning::start_end( + &SourcePosition::new(14, 0, 14), + &SourcePosition::new(15, 0, 15), + ParseError::unexpected_token(Token::CurlyOpen) + ), + ); } #[test] diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs deleted file mode 100644 index 0eb62ae73..000000000 --- a/juniper/src/parser/tests/lexer.rs +++ /dev/null @@ -1,691 +0,0 @@ -use crate::parser::{Lexer, LexerError, ScalarToken, SourcePosition, Spanning, Token}; - -fn tokenize_to_vec(s: &str) -> Vec>> { - let mut tokens = Vec::new(); - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - let at_eof = t.item == Token::EndOfFile; - tokens.push(t); - if at_eof { - break; - } - } - Some(Err(e)) => panic!("Error in input stream: {e:#?} for {s:#?}"), - None => panic!("EOF before EndOfFile token in {s:#?}"), - } - } - - tokens -} - -fn tokenize_single(s: &str) -> Spanning> { - let mut tokens = tokenize_to_vec(s); - - assert_eq!(tokens.len(), 2); - assert_eq!(tokens[1].item, Token::EndOfFile); - - tokens.remove(0) -} - -fn tokenize_error(s: &str) -> Spanning { - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - if t.item == Token::EndOfFile { - panic!("Tokenizer did not return error for {s:#?}"); - } - } - Some(Err(e)) => { - return e; - } - None => panic!("Tokenizer did not return error for {s:#?}"), - } - } -} - -#[test] -fn empty_source() { - assert_eq!( - tokenize_to_vec(""), - vec![Spanning::zero_width( - &SourcePosition::new_origin(), - Token::EndOfFile, - )] - ); -} - -#[test] -fn disallow_control_codes() { - assert_eq!( - Lexer::new("\u{0007}").next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new_origin(), - LexerError::UnknownCharacter('\u{0007}') - ))) - ); -} - -#[test] -fn skip_whitespace() { - assert_eq!( - tokenize_to_vec( - r#" - - foo - - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(14, 2, 12), - &SourcePosition::new(17, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_comments() { - assert_eq!( - tokenize_to_vec( - r#" - #comment - foo#comment - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(34, 2, 12), - &SourcePosition::new(37, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_commas() { - assert_eq!( - tokenize_to_vec(r#",,,foo,,,"#), - vec![ - Spanning::start_end( - &SourcePosition::new(3, 0, 3), - &SourcePosition::new(6, 0, 6), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), - ] - ); -} - -#[test] -fn error_positions() { - assert_eq!( - Lexer::new( - r#" - - ? - - "# - ) - .next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new(14, 2, 12), - LexerError::UnknownCharacter('?') - ))) - ); -} - -#[test] -fn strings() { - assert_eq!( - tokenize_single(r#""simple""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String("simple")) - ) - ); - - assert_eq!( - tokenize_single(r#"" white space ""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(" white space ")) - ) - ); - - assert_eq!( - tokenize_single(r#""quote \"""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(10, 0, 10), - Token::Scalar(ScalarToken::String(r#"quote \""#)) - ) - ); - - assert_eq!( - tokenize_single(r#""escaped \n\r\b\t\f""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(20, 0, 20), - Token::Scalar(ScalarToken::String(r"escaped \n\r\b\t\f")) - ) - ); - - assert_eq!( - tokenize_single(r#""slashes \\ \/""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(r"slashes \\ \/")) - ) - ); - - assert_eq!( - tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(34, 0, 34), - Token::Scalar(ScalarToken::String(r"unicode \u1234\u5678\u90AB\uCDEF")), - ) - ); -} - -#[test] -fn string_errors() { - assert_eq!( - tokenize_error("\""), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"no end quote"), - Spanning::zero_width( - &SourcePosition::new(13, 0, 13), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"contains unescaped \u{0007} control char\""), - Spanning::zero_width( - &SourcePosition::new(20, 0, 20), - LexerError::UnknownCharacterInString('\u{0007}'), - ) - ); - - assert_eq!( - tokenize_error("\"null-byte is not \u{0000} end of file\""), - Spanning::zero_width( - &SourcePosition::new(18, 0, 18), - LexerError::UnknownCharacterInString('\u{0000}'), - ) - ); - - assert_eq!( - tokenize_error("\"multi\nline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"multi\rline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error(r#""bad \z esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\z".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \x esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\x".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \u1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u1".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \u0XX1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u0XX1".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXX".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uFXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uFXXX".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXF esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXF".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""unterminated in string \""#), - Spanning::zero_width( - &SourcePosition::new(26, 0, 26), - LexerError::UnterminatedString - ) - ); - - assert_eq!( - tokenize_error(r#""unterminated \"#), - Spanning::zero_width( - &SourcePosition::new(15, 0, 15), - LexerError::UnterminatedString - ) - ); - - // Found by fuzzing. - assert_eq!( - tokenize_error(r#""\uɠ^A"#), - Spanning::zero_width( - &SourcePosition::new(5, 0, 5), - LexerError::UnterminatedString - ) - ); -} - -#[test] -fn numbers() { - fn assert_float_token_eq( - source: &str, - start: SourcePosition, - end: SourcePosition, - expected: &str, - ) { - let parsed = tokenize_single(source); - assert_eq!(parsed.span.start, start); - assert_eq!(parsed.span.end, end); - - match parsed.item { - Token::Scalar(ScalarToken::Float(actual)) => { - assert!( - expected == actual, - "[expected] {expected} != {actual} [actual]", - ); - } - _ => assert!(false), - } - } - - assert_eq!( - tokenize_single("4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("4")) - ) - ); - - assert_float_token_eq( - "4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "4.123", - ); - - assert_float_token_eq( - "4.0", - SourcePosition::new(0, 0, 0), - SourcePosition::new(3, 0, 3), - "4.0", - ); - - assert_eq!( - tokenize_single("-4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(2, 0, 2), - Token::Scalar(ScalarToken::Int("-4")) - ) - ); - - assert_eq!( - tokenize_single("9"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("9")) - ) - ); - - assert_eq!( - tokenize_single("0"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("0")) - ) - ); - - assert_float_token_eq( - "-4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "-4.123", - ); - - assert_float_token_eq( - "0.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "0.123", - ); - - assert_float_token_eq( - "123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123e4", - ); - - assert_float_token_eq( - "123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123E4", - ); - - assert_float_token_eq( - "123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e-4", - ); - - assert_float_token_eq( - "123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e+4", - ); - - assert_float_token_eq( - "-1.123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123e4", - ); - - assert_float_token_eq( - "-1.123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123E4", - ); - - assert_float_token_eq( - "-1.123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e-4", - ); - - assert_float_token_eq( - "-1.123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e+4", - ); - - assert_float_token_eq( - "-1.123e45", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e45", - ); -} - -#[test] -fn numbers_errors() { - assert_eq!( - tokenize_error("00"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('0') - ) - ); - - assert_eq!( - tokenize_error("+1"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('+') - ) - ); - - assert_eq!( - tokenize_error("1."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error(".123"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnexpectedCharacter('.') - ) - ); - - assert_eq!( - tokenize_error("1.A"), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("-A"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("1.0e"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("1.0eA"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedCharacter('A') - ) - ); -} - -#[test] -fn punctuation() { - assert_eq!( - tokenize_single("!"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark) - ); - - assert_eq!( - tokenize_single("$"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar) - ); - - assert_eq!( - tokenize_single("("), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen) - ); - - assert_eq!( - tokenize_single(")"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose) - ); - - assert_eq!( - tokenize_single("..."), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(3, 0, 3), - Token::Ellipsis - ) - ); - - assert_eq!( - tokenize_single(":"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon) - ); - - assert_eq!( - tokenize_single("="), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals) - ); - - assert_eq!( - tokenize_single("@"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At) - ); - - assert_eq!( - tokenize_single("["), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen) - ); - - assert_eq!( - tokenize_single("]"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose) - ); - - assert_eq!( - tokenize_single("{"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen) - ); - - assert_eq!( - tokenize_single("}"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose) - ); - - assert_eq!( - tokenize_single("|"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe) - ); -} - -#[test] -fn punctuation_error() { - assert_eq!( - tokenize_error(".."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("?"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('?') - ) - ); - - assert_eq!( - tokenize_error("\u{203b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{203b}') - ) - ); - - assert_eq!( - tokenize_error("\u{200b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{200b}') - ) - ); -} - -#[test] -fn display() { - for (input, expected) in [ - (Token::Name("identifier"), "identifier"), - (Token::Scalar(ScalarToken::Int("123")), "123"), - (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), - ( - Token::Scalar(ScalarToken::String("some string")), - "\"some string\"", - ), - ( - Token::Scalar(ScalarToken::String("string with \\ escape and \" quote")), - "\"string with \\\\ escape and \\\" quote\"", - ), - (Token::ExclamationMark, "!"), - (Token::Dollar, "$"), - (Token::ParenOpen, "("), - (Token::ParenClose, ")"), - (Token::BracketOpen, "["), - (Token::BracketClose, "]"), - (Token::CurlyOpen, "{"), - (Token::CurlyClose, "}"), - (Token::Ellipsis, "..."), - (Token::Colon, ":"), - (Token::Equals, "="), - (Token::At, "@"), - (Token::Pipe, "|"), - ] { - assert_eq!(input.to_string(), expected); - } -} diff --git a/juniper/src/parser/tests/mod.rs b/juniper/src/parser/tests/mod.rs index 18df2c92d..ab77d55c7 100644 --- a/juniper/src/parser/tests/mod.rs +++ b/juniper/src/parser/tests/mod.rs @@ -1,3 +1,2 @@ mod document; -mod lexer; mod value; diff --git a/juniper/src/tests/fixtures/starwars/schema.rs b/juniper/src/tests/fixtures/starwars/schema.rs index 10d1f87a6..f16940863 100644 --- a/juniper/src/tests/fixtures/starwars/schema.rs +++ b/juniper/src/tests/fixtures/starwars/schema.rs @@ -1,4 +1,11 @@ -#![expect(missing_docs, reason = "GraphQL schema testing")] +#![cfg_attr( + not(feature = "expose-test-schema"), + expect(dead_code, reason = "GraphQL schema testing") +)] +#![cfg_attr( + feature = "expose-test-schema", + expect(missing_docs, reason = "GraphQL schema testing") +)] use std::{collections::HashMap, pin::Pin}; diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index 8467e8c0b..7fa70b425 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -1,4 +1,4 @@ -use std::{char, marker::PhantomData, rc::Rc, thread::JoinHandle}; +use std::{marker::PhantomData, rc::Rc, thread::JoinHandle}; use derive_more::with_trait::{Deref, Display, From, Into}; use serde::{Deserialize, Serialize}; @@ -9,7 +9,7 @@ use crate::{ executor::{ExecutionResult, Executor, Registry}, graphql_scalar, macros::reflect, - parser::{LexerError, ParseError, ScalarToken, Token}, + parser::{ParseError, ScalarToken, Token}, schema::meta::MetaType, types::{ async_await::GraphQLValueAsync, @@ -81,109 +81,16 @@ mod impl_string_scalar { } pub(super) fn parse_token(value: ScalarToken<'_>) -> ParseScalarResult { - if let ScalarToken::String(value) = value { - let mut ret = String::with_capacity(value.len()); - let mut char_iter = value.chars(); - while let Some(ch) = char_iter.next() { - match ch { - '\\' => match char_iter.next() { - Some('"') => { - ret.push('"'); - } - Some('/') => { - ret.push('/'); - } - Some('n') => { - ret.push('\n'); - } - Some('r') => { - ret.push('\r'); - } - Some('t') => { - ret.push('\t'); - } - Some('\\') => { - ret.push('\\'); - } - Some('f') => { - ret.push('\u{000c}'); - } - Some('b') => { - ret.push('\u{0008}'); - } - Some('u') => { - ret.push(parse_unicode_codepoint(&mut char_iter)?); - } - Some(s) => { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - format!("\\{s}"), - ))); - } - None => return Err(ParseError::LexerError(LexerError::UnterminatedString)), - }, - ch => { - ret.push(ch); - } - } - } - Ok(ret.into()) + if let ScalarToken::String(lit) = value { + let parsed = lit.parse()?; + // TODO: Allow cheaper from `Cow<'_, str>` conversions for `ScalarValue`. + Ok(parsed.into_owned().into()) } else { Err(ParseError::unexpected_token(Token::Scalar(value))) } } } -fn parse_unicode_codepoint(char_iter: &mut I) -> Result -where - I: Iterator, -{ - let escaped_code_point = char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(String::from("\\u"))) - }) - .and_then(|c1| { - char_iter - .next() - .map(|c2| format!("{c1}{c2}")) - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{c1}"))) - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - })?; - let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - })?; - char::from_u32(code_point).ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - }) -} - #[graphql_scalar] #[graphql( name = "String", @@ -537,7 +444,7 @@ impl Default for EmptySubscription { #[cfg(test)] mod tests { use crate::{ - parser::ScalarToken, + parser::{ScalarToken, StringLiteral}, value::{DefaultScalarValue, ParseScalarValue, ScalarValue as _}, }; @@ -571,24 +478,152 @@ mod tests { #[test] fn parse_strings() { - fn parse_string(s: &str, expected: &str) { - let s = - >::from_str(ScalarToken::String(s)); - assert!(s.is_ok(), "A parsing error occurred: {s:?}"); - let s: Option = s.unwrap().try_to().ok(); - assert!(s.is_some(), "No string returned"); + for (input, expected) in [ + (r#""simple""#, "simple"), + (r#"" white space ""#, " white space "), + (r#""quote \"""#, r#"quote ""#), + (r#""escaped \n\r\b\t\f""#, "escaped \n\r\u{0008}\t\u{000c}"), + (r#""slashes \\ \/""#, r"slashes \ /"), + ( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", + ), + ( + r#""string with unicode escape outside BMP \u{1F600}""#, + "string with unicode escape outside BMP \u{1F600}", + ), + ( + r#""string with minimal unicode escape \u{0}""#, + "string with minimal unicode escape \u{0}", + ), + ( + r#""string with maximal unicode escape \u{10FFFF}""#, + "string with maximal unicode escape \u{10FFFF}", + ), + ( + r#""string with maximal minimal unicode escape \u{000000}""#, + "string with maximal minimal unicode escape \u{000000}", + ), + ( + r#""string with unicode surrogate pair escape \uD83D\uDE00""#, + "string with unicode surrogate pair escape \u{1f600}", + ), + ( + r#""string with minimal surrogate pair escape \uD800\uDC00""#, + "string with minimal surrogate pair escape \u{10000}", + ), + ( + r#""string with maximal surrogate pair escape \uDBFF\uDFFF""#, + "string with maximal surrogate pair escape \u{10FFFF}", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Quoted(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); + + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); assert_eq!(s.unwrap(), expected); } + } + + #[test] + fn parse_block_strings() { + for (input, expected) in [ + (r#""""""""#, ""), + (r#""""simple""""#, "simple"), + (r#"""" white space """"#, " white space "), + (r#""""contains " quote""""#, r#"contains " quote"#), + ( + r#""""contains \""" triple quote""""#, + r#"contains """ triple quote"#, + ), + ( + r#""""contains \"" double quote""""#, + r#"contains \"" double quote"#, + ), + ( + r#""""contains \\""" triple quote""""#, + r#"contains \""" triple quote"#, + ), + (r#""""\"""quote" """"#, r#""""quote" "#), + (r#""""multi\nline""""#, r"multi\nline"), + ( + r#""""multi\rline\r\nnormalized""""#, + r"multi\rline\r\nnormalized", + ), + ( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#, + r"unescaped \\n\\r\\b\\t\\f\\u1234", + ), + ( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + r"unescaped unicode outside BMP \u{1f600}", + ), + (r#""""slashes \\\\ \\/""""#, r"slashes \\\\ \\/"), + ( + r#"""" + + spans + multiple + lines + + """"#, + "spans\n multiple\n lines", + ), + // removes uniform indentation + ( + r#"""" + Hello, + World! + + Yours, + GraphQL.""""#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // removes empty leading and trailing lines + ( + r#"""" + + Hello, + World! + + Yours, + GraphQL. + + """"#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // retains indentation from first line + ( + r#"""" Hello, + World! + + Yours, + GraphQL.""""#, + " Hello,\n World!\n\nYours,\n GraphQL.", + ), + // does not alter trailing spaces + ( + r#"""" + Hello, + World! + + Yours, + GraphQL. """"#, + "Hello,\n World!\n\nYours,\n GraphQL. ", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Block(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); - parse_string("simple", "simple"); - parse_string(" white space ", " white space "); - parse_string(r#"quote \""#, "quote \""); - parse_string(r"escaped \n\r\b\t\f", "escaped \n\r\u{0008}\t\u{000c}"); - parse_string(r"slashes \\ \/", "slashes \\ /"); - parse_string( - r"unicode \u1234\u5678\u90AB\uCDEF", - "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", - ); + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); + assert_eq!(s.unwrap(), expected); + } } #[test]