diff --git a/Cargo.lock b/Cargo.lock index 0590a4b8..f02fa6e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1073,6 +1073,7 @@ version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" dependencies = [ + "indexmap", "itoa", "ryu", "serde", diff --git a/proto/substrait/validator/simple_extensions.proto b/proto/substrait/validator/simple_extensions.proto index 5d87b555..a5f44ab2 100644 --- a/proto/substrait/validator/simple_extensions.proto +++ b/proto/substrait/validator/simple_extensions.proto @@ -211,6 +211,25 @@ message ExtensionDefinition { WindowProperties window_function = 9; } + // For table functions, when defined; to be added to above oneof. + reserved 10; + + // Nullability behavior that the function was declared with. Note that the + // patterns have already been desugared to represent this, so this can be + // ignored; it exists only for completeness. + oneof consistency { + // Nullability of toplevel binding and data type patterns of all argument + // and return types was overridden to `??nullable`. + google.protobuf.Empty mirror = 11; + + // Nullability of toplevel binding and data type patterns of all argument + // types was overridden to `??nullable`. + google.protobuf.Empty declared_output = 12; + + // No desugaring overrides were applied. + google.protobuf.Empty discrete = 13; + } + // Properties common to aggregate and window functions. message AggregateProperties { // When specified, the function is decomposable. @@ -250,9 +269,8 @@ message ExtensionDefinition { } } - // Represents a parameter pack for a user-defined compound type class or a - // function argument slot list. In the latter case, the patterns will only - // ever be passed typenames. + // Represents a positional parameter pack for a user-defined compound type + // class or a function argument slot list. // // The order of operations for the various patterns is: // @@ -303,7 +321,7 @@ message ExtensionDefinition { // for aggregate and window functions. google.protobuf.Empty literal = 6; - // An data value must be bound to the slot. This is done by means of + // A data value must be bound to the slot. This is done by means of // binding an expression, but the expression can always be evaluated or // reduced before the function is invoked. This is used for value // function arguments that are not marked as constant. The data type of @@ -350,6 +368,18 @@ message ExtensionDefinition { // The maximum number of arguments that can be bound to the slot. Zero // is treated as unspecified/no upper limit. uint64 maximum = 2; + + // Consistency that the variadic slot was declared with. Note that the + // patterns have already been desugared to represent this, so this can be + // ignored; it exists only for completeness. + oneof consistency { + // No desugaring overrides were applied. + google.protobuf.Empty consistent = 3; + + // All consistent bindings in the last argument slot were overridden to + // inconsistent bindings. + google.protobuf.Empty inconsistent = 4; + } } // Optional additional constraints to apply when determining whether a diff --git a/rs/Cargo.toml b/rs/Cargo.toml index 588e2621..bd5551eb 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -40,7 +40,7 @@ serde_yaml = "0.9" # both the schema and the input, so we need to depend on that as well, even # though we don't actually do any JSON serialization and deserialization. jsonschema = { version = "=0.15.0", default-features = false } -serde_json = "1" +serde_json = { version = "1", features = ["preserve_order"] } # Used for checking identifier syntax (could be removed if regexes don't end up # being useful elsewhere too). diff --git a/rs/src/export/proto.rs b/rs/src/export/proto.rs index 053e760b..4e9c3068 100644 --- a/rs/src/export/proto.rs +++ b/rs/src/export/proto.rs @@ -347,31 +347,12 @@ impl From<&extension::simple::type_class::Reference> for validator::data_type::U extension_id: node .definition .as_ref() - .map(|x| x.extension_id) + .map(|x| x.identifier.extension_id) .unwrap_or_default(), } } } -impl From<&extension::simple::type_class::Definition> - for validator::data_type::user_defined_type::Definition -{ - fn from(node: &extension::simple::type_class::Definition) -> Self { - Self { - structure: node - .structure - .iter() - .map( - |(name, simple)| validator::data_type::user_defined_type::Element { - name: name.to_string(), - kind: simple.into(), - }, - ) - .collect(), - } - } -} - impl From<&data::Variation> for validator::data_type::Variation { fn from(node: &data::Variation) -> Self { match node { @@ -388,7 +369,7 @@ impl From<&data::Variation> for validator::data_type::Variation { extension_id: variation .definition .as_ref() - .map(|x| x.extension_id) + .map(|x| x.identifier.extension_id) .unwrap_or_default(), }, ) diff --git a/rs/src/input/config.rs b/rs/src/input/config.rs index c559aba4..51697b93 100644 --- a/rs/src/input/config.rs +++ b/rs/src/input/config.rs @@ -39,6 +39,7 @@ fn resolve_with_curl(uri: &str) -> Result, curl::Error> { } /// Configuration structure. +#[derive(Default)] pub struct Config { /// When set, do not generate warnings for unknown protobuf fields that are /// set to their protobuf-defined default value. @@ -86,21 +87,6 @@ pub struct Config { pub max_uri_resolution_depth: Option, } -// TODO: enable URI resolution by default once all that works. Then this can -// be derived again. Also still need to expose the depth option in extensions. -impl Default for Config { - fn default() -> Self { - Self { - ignore_unknown_fields: Default::default(), - allowed_proto_any_urls: Default::default(), - diagnostic_level_overrides: Default::default(), - uri_overrides: Default::default(), - uri_resolver: Default::default(), - max_uri_resolution_depth: Some(0), - } - } -} - impl Config { /// Creates a default configuration. pub fn new() -> Self { diff --git a/rs/src/output/diagnostic.rs b/rs/src/output/diagnostic.rs index b1be397a..e132eec9 100644 --- a/rs/src/output/diagnostic.rs +++ b/rs/src/output/diagnostic.rs @@ -249,6 +249,9 @@ pub enum Classification { #[strum(props(HiddenDescription = "invalid compound vs. simple function name usage"))] LinkCompoundVsSimpleFunctionName = 3010, + #[strum(props(Description = "discouraged name"))] + LinkDiscouragedName = 3011, + // Type-related diagnostics (group 4). #[strum(props(HiddenDescription = "type-related diagnostics"))] Type = 4000, diff --git a/rs/src/output/extension/namespace.rs b/rs/src/output/extension/namespace.rs index 49adb8bd..d49186b3 100644 --- a/rs/src/output/extension/namespace.rs +++ b/rs/src/output/extension/namespace.rs @@ -531,6 +531,22 @@ impl ResolutionResult { self.expect(parse_context, if_not_applicable, |_, _| true, true, false) } + /// Emits an error if one or more definitions were found for this name + /// resolution, to be used just before defining a new item. + pub fn expect_not_yet_defined(&self, parse_context: &mut context::Context) { + if !self.visible.is_empty() { + traversal::push_diagnostic( + parse_context, + diagnostic::Level::Error, + cause!( + LinkDuplicateDefinition, + "{} is already defined", + self.unresolved_reference + ), + ); + } + } + /// Silently returns the first matching item, if any. If there are none, /// this just returns an unresolved reference. Use /// filter_items().expect_one() to formulate error messages if there are @@ -568,17 +584,11 @@ impl ResolutionResult { .flatten() } - /// Return an error if one or more definitions were found for this name - /// resolution, to be used just before defining a new item. - pub fn expect_not_yet_defined(&self) -> diagnostic::Result<()> { - if self.visible.is_empty() { - Ok(()) - } else { - Err(cause!( - LinkDuplicateDefinition, - "{} is already defined", - self.unresolved_reference - )) - } + /// Calls the given function for each visible item. + pub fn for_each_visible_item(&self, mut f: F) { + self.visible + .iter() + .filter_map(|x| x.1.as_item()) + .for_each(|x| f(&x)) } } diff --git a/rs/src/output/extension/reference.rs b/rs/src/output/extension/reference.rs index 0a5fb86a..5b5a2b3b 100644 --- a/rs/src/output/extension/reference.rs +++ b/rs/src/output/extension/reference.rs @@ -6,10 +6,11 @@ use crate::output::path; use crate::util; use std::sync::Arc; -/// Represents an identifier that was used to reference something. It is -/// stored along with a resolution result to retain information about the -/// reference even if the resolution failed, and is generally only used for -/// identity/equality checks and diagnostic information. +/// Represents an identifier that was used to reference an extension at the +/// protobuf level. It is stored along with a resolution result to retain +/// information about the reference even if the resolution failed, and is +/// generally only used for identity/equality checks and diagnostic +/// information. #[derive(Clone, Debug, Default)] pub struct Identifier { /// The name of the object being referred to, if known. Always stored using diff --git a/rs/src/output/extension/simple/common.rs b/rs/src/output/extension/simple/common.rs new file mode 100644 index 00000000..d47a01d9 --- /dev/null +++ b/rs/src/output/extension/simple/common.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Module for the common types involved with representing extension +//! definitions. + +use crate::output; + +/// Identifying information associated with an extension, that can be used to +/// refer to the extension from elsewhere. +#[derive(Clone, Debug)] +pub struct Identifier { + /// The URI that the extension was declared with. Matched case-sensitively. + pub uri: String, + + /// One or more aliases for this extension within the scope of the URI. + /// Matched case-insensitively. + pub names: Vec, + + /// Unique number for the extension, generated during traversal. The + /// number is only unique within the scope of a single run of the + /// validator, and may change between runs. + pub extension_id: u64, + + /// The path that the extension is defined in. + pub definition_path: output::path::PathBuf, +} + +/// Non-functional metadata common to all extension types. +#[derive(Clone, Debug, Default)] +pub struct Metadata { + // Optional description of the extension. Only serves as documentation. + pub description: String, +} diff --git a/rs/src/output/extension/simple/function.rs b/rs/src/output/extension/simple/function.rs index 3521e51e..b604a359 100644 --- a/rs/src/output/extension/simple/function.rs +++ b/rs/src/output/extension/simple/function.rs @@ -11,9 +11,11 @@ use std::sync::Arc; /// The definition of a function implementation. #[derive(Clone, Debug)] pub struct Definition { - /// Unique number within the tree that can be used to refer to this - /// extension when exporting in protobuf form. - pub extension_id: u64, + /// Identifier for the extension. + pub identifier: extension::simple::common::Identifier, + + /// Common metadata for the extension. + pub metadata: extension::simple::common::Metadata, /// Link to information common to a set of function implementations going by /// the same name. diff --git a/rs/src/output/extension/simple/mod.rs b/rs/src/output/extension/simple/mod.rs index dc58ba54..1193cb04 100644 --- a/rs/src/output/extension/simple/mod.rs +++ b/rs/src/output/extension/simple/mod.rs @@ -2,6 +2,7 @@ //! Module for representing simple extensions. +pub mod common; pub mod function; pub mod module; pub mod type_class; diff --git a/rs/src/output/extension/simple/module.rs b/rs/src/output/extension/simple/module.rs index 6d92bc95..ffe358dc 100644 --- a/rs/src/output/extension/simple/module.rs +++ b/rs/src/output/extension/simple/module.rs @@ -72,14 +72,13 @@ impl DynScope for T { } /// A parsed simple extension module/file. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct Definition { - /// Unique number within the tree that can be used to refer to this - /// extension when exporting in protobuf form. - pub extension_id: u64, + /// Identifier for the extension. + pub identifier: extension::simple::common::Identifier, - /// Description of the module. - pub description: String, + /// Common metadata for the extension. + pub metadata: extension::simple::common::Metadata, /// The URI that was actually used to resolve the module. pub actual_uri: String, diff --git a/rs/src/output/extension/simple/type_class.rs b/rs/src/output/extension/simple/type_class.rs index 0765f725..42c1c7db 100644 --- a/rs/src/output/extension/simple/type_class.rs +++ b/rs/src/output/extension/simple/type_class.rs @@ -10,32 +10,47 @@ use crate::output::type_system::meta; use crate::output::type_system::meta::pattern::Pattern; /// A definition of a user-defined type class. -#[derive(Clone, Debug, PartialEq, Eq, Default)] +#[derive(Clone, Debug)] pub struct Definition { - /// Unique number within the tree that can be used to refer to this - /// extension when exporting in protobuf form. - pub extension_id: u64, + /// Identifier for the extension. + pub identifier: extension::simple::common::Identifier, - /// Description of the type class. - pub description: String, - - /// The underlying structure of the type. - pub structure: Vec<(String, data::class::Simple)>, + /// Common metadata for the extension. + pub metadata: extension::simple::common::Metadata, /// The parameters expected by the data type. pub parameter_slots: Vec, /// Whether or not the last parameter slot is variadic. pub parameters_variadic: bool, + + /// Constraint program for checking the parameters. + pub contraints: Vec, + + /// The underlying structure of the type. Empty for opaque types. + pub structure: Option, +} + +impl From for Definition { + fn from(identifier: extension::simple::common::Identifier) -> Self { + Definition { + identifier, + metadata: Default::default(), + parameter_slots: Default::default(), + parameters_variadic: Default::default(), + contraints: Default::default(), + structure: Default::default(), + } + } } /// A parameter slot for a user-defined data type. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Default)] pub struct ParameterSlot { - /// YAML-provided name of the parameter. + /// Name of the parameter. pub name: String, - /// YAML-provided human-readable description of the parameter. + /// Human-readable description of the parameter. pub description: String, /// Pattern for type- and bounds-checking parameters bound to this slot. @@ -89,6 +104,7 @@ impl ParameterInfo for Definition { } // Match parameters to slots positionally. + let mut context = meta::Context::default(); for (index, param) in params.iter().enumerate() { // Determine the slot that corresponds to this parameter. let slot = self @@ -119,7 +135,10 @@ impl ParameterInfo for Definition { )); } if let Some(value) = ¶m.value { - if !slot.pattern.match_pattern(value)? { + if !slot + .pattern + .match_pattern_with_context(&mut context, value)? + { return Err(cause!( TypeMismatchedParameters, "parameter {} does not match pattern {}", @@ -135,6 +154,17 @@ impl ParameterInfo for Definition { )); } } + + // Check constraints. + for constraint in self.contraints.iter() { + constraint.execute(&mut context)?; + } + + // If there is a structure pattern, check that it can be evaluated. + if let Some(structure) = &self.structure { + structure.evaluate_with_context(&mut context)?; + } + Ok(()) } diff --git a/rs/src/output/extension/simple/type_variation.rs b/rs/src/output/extension/simple/type_variation.rs index 28dea1a0..78e12fa6 100644 --- a/rs/src/output/extension/simple/type_variation.rs +++ b/rs/src/output/extension/simple/type_variation.rs @@ -6,11 +6,13 @@ use crate::output::extension; use crate::output::type_system::data; /// Type variation extension. -#[derive(Clone, Debug, PartialEq, Eq, Default)] +#[derive(Clone, Debug)] pub struct Definition { - /// Unique number within the tree that can be used to refer to this - /// extension when exporting in protobuf form. - pub extension_id: u64, + /// Identifier for the extension. + pub identifier: extension::simple::common::Identifier, + + /// Common metadata for the extension. + pub metadata: extension::simple::common::Metadata, /// Description of the type variation. pub description: String, diff --git a/rs/src/output/type_system/meta/pattern.rs b/rs/src/output/type_system/meta/pattern.rs index afa6658f..150386dc 100644 --- a/rs/src/output/type_system/meta/pattern.rs +++ b/rs/src/output/type_system/meta/pattern.rs @@ -117,6 +117,26 @@ pub enum Value { /// `a >= b`; /// - Ternary: `if a then b else c`. Function(meta::Function, Vec), + + /// A union acting on the set of values that the patterns match. That is, + /// a union pattern matches iff any of the contained patterns match. The + /// patterns are matched lazily from left to right. At least two patterns + /// should be specified. Cannot be evaluated. + /// FIXME: syntax TBD. + Union(Vec), + + /// An intersection acting on the set of values that the patterns match. + /// That is, an intersection pattern matches iff all of the contained + /// patterns match. The patterns are matched lazily from left to right. + /// At least two patterns should be specified. Cannot be evaluated. + /// FIXME: syntax TBD. + Intersection(Vec), + + /// Inverts the match result of a pattern. That is, if the contained + /// pattern matches, the complement does not match, and vice versa. Cannot + /// be evaluated. + /// FIXME: syntax TBD. + Complement(Arc), } impl Describe for Value { @@ -149,7 +169,36 @@ impl Describe for Value { write!(f, "{func}(")?; util::string::describe_sequence(f, args, limit, 10, |f, arg, _, limit| { arg.describe(f, limit) - }) + })?; + write!(f, ")") + } + Value::Union(args) => { + write!(f, "(")?; + util::string::describe_sequence_with_sep( + f, + args, + limit, + 10, + " | ", + |f, arg, _, limit| arg.describe(f, limit), + )?; + write!(f, ")") + } + Value::Intersection(args) => { + write!(f, "(")?; + util::string::describe_sequence_with_sep( + f, + args, + limit, + 10, + " & ", + |f, arg, _, limit| arg.describe(f, limit), + )?; + write!(f, ")") + } + Value::Complement(arg) => { + write!(f, "~")?; + arg.describe(f, limit) } } } @@ -234,6 +283,23 @@ impl Value { } Value::Function(func, args) => Value::exactly(func.evaluate(context, args)?) .match_pattern_with_context(context, value)?, + Value::Union(expected) => { + for expected in expected.iter() { + if expected.match_pattern_with_context(context, value)? { + return Ok(true); + } + } + false + } + Value::Intersection(expected) => { + for expected in expected.iter() { + if !expected.match_pattern_with_context(context, value)? { + return Ok(false); + } + } + true + } + Value::Complement(expected) => !expected.match_pattern_with_context(context, value)?, }) } @@ -258,7 +324,7 @@ impl Value { /// multiple types can be matched, yield unresolved. pub fn determine_type(&self) -> meta::Type { match self { - Value::Unresolved | Value::Any => meta::Type::Unresolved, + Value::Unresolved | Value::Any | Value::Complement(_) => meta::Type::Unresolved, Value::Binding(binding) => binding.determine_type(), Value::Boolean(_) => meta::Type::Boolean, Value::Integer(_, _) => meta::Type::Integer, @@ -266,8 +332,27 @@ impl Value { Value::String(_) => meta::Type::String, Value::DataType(_) => meta::Type::DataType, Value::Function(function, arguments) => function.determine_type(arguments), + Value::Union(patterns) | Value::Intersection(patterns) => { + let mut result = meta::Type::Unresolved; + for pattern in patterns.iter() { + let pattern_type = pattern.determine_type(); + if !matches!(pattern_type, meta::Type::Unresolved) { + if matches!(result, meta::Type::Unresolved) { + result = pattern_type; + } else if pattern_type != result { + return meta::Type::Unresolved; + } + } + } + result + } } } + + /// Returns whether this could be a data type pattern. + pub fn is_data_type(&self) -> bool { + matches!(self, Value::Unresolved | Value::DataType(_)) + } } impl Pattern for Value { @@ -352,6 +437,18 @@ impl Pattern for Value { } Value::DataType(value) => value.evaluate_with_context(context).map(meta::Value::from), Value::Function(func, args) => func.evaluate(context, args), + Value::Union(_) => Err(cause!( + TypeDerivationInvalid, + "cannot evaluate union pattern" + )), + Value::Intersection(_) => Err(cause!( + TypeDerivationInvalid, + "cannot evaluate intersection pattern" + )), + Value::Complement(_) => Err(cause!( + TypeDerivationInvalid, + "cannot evaluate complement pattern" + )), } } @@ -366,6 +463,9 @@ impl Pattern for Value { Value::String(x) => x.is_some(), Value::DataType(x) => x.can_evaluate(), Value::Function(_, _) => true, + Value::Union(_) => false, + Value::Intersection(_) => false, + Value::Complement(_) => false, } } } diff --git a/rs/src/output/type_system/meta/program.rs b/rs/src/output/type_system/meta/program.rs index 49ed42a1..99e78773 100644 --- a/rs/src/output/type_system/meta/program.rs +++ b/rs/src/output/type_system/meta/program.rs @@ -41,7 +41,7 @@ impl Describe for Program { &self.statements, stmts_limit, 50, - ';', + "; ", |f, stmt, _, limit| stmt.describe(f, limit), )?; write!(f, "; ")?; diff --git a/rs/src/parse/context.rs b/rs/src/parse/context.rs index 15338a3e..eeeee2a7 100644 --- a/rs/src/parse/context.rs +++ b/rs/src/parse/context.rs @@ -431,6 +431,17 @@ impl<'a> Context<'a> { pub fn uri_stack(&mut self) -> &mut Vec { &mut self.state.uri_stack } + + /// Returns the next extension ID. Extension IDs just count up from 1. + pub fn make_extension_id(&mut self) -> extension::simple::common::Identifier { + self.state.extension_id_counter += 1; + extension::simple::common::Identifier { + uri: Default::default(), + names: Default::default(), + extension_id: self.state.extension_id_counter, + definition_path: self.path_buf(), + } + } } #[derive(Clone, Debug)] @@ -542,6 +553,10 @@ pub struct State { /// Stack for URIs being parsed. Used to detect recursion and limit depth. pub uri_stack: Vec, + + /// Used to generate unique extension IDs. Set to the latest ID handed out, + /// or 0 if no ID has been handed out yet. + pub extension_id_counter: u64, } /// Breadcrumbs structure. Each breadcrumb is associated with a node, and diff --git a/rs/src/parse/extensions/simple/builder.rs b/rs/src/parse/extensions/simple/builder.rs deleted file mode 100644 index 37845c9d..00000000 --- a/rs/src/parse/extensions/simple/builder.rs +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Module providing a builder structure to be used while parsing a simple -//! extension file. - -use crate::output::extension; -use std::collections::HashMap; -use std::sync::Arc; - -#[derive(Clone, Debug, Default)] -pub struct Builder { - /// Unique identifier for this extension. - pub extension_id: u64, - - /// Description of the extension. - pub description: String, - - /// The URI that was actually used to resolve this extension. - pub actual_uri: String, - - /// Map with references to dependencies. - pub dependencies: HashMap, - - /// Namespace used for type classes defined in this extension and its - /// dependencies. - pub type_classes: extension::simple::type_class::NamespaceDefinition, - - /// Namespace used for type variations defined in this extension and its - /// dependencies. - pub type_variations: extension::simple::type_variation::NamespaceDefinition, - - /// Namespace used for functions defined in this extension and its - /// dependencies. Both simple and compound names are registered. - pub function_impls: extension::simple::function::NamespaceDefinition, -} - -impl From for extension::simple::module::Definition { - fn from(builder: Builder) -> Self { - extension::simple::module::Definition { - extension_id: builder.extension_id, - description: builder.description, - actual_uri: builder.actual_uri, - dependencies: builder.dependencies, - type_classes: Arc::new(builder.type_classes), - type_variations: Arc::new(builder.type_variations), - function_impls: Arc::new(builder.function_impls), - } - } -} - -impl extension::simple::module::Scope for Builder { - /// Resolves a to-be-resolved reference to a type class. - fn resolve_type_class(&self, name: T) -> extension::simple::type_class::ResolutionResult - where - T: Into, - { - self.type_classes.resolve_local(name.into()) - } - - /// Resolves a to-be-resolved reference to a type variation. - fn resolve_type_variation( - &self, - name: T, - ) -> extension::simple::type_variation::ResolutionResult - where - T: Into, - { - self.type_variations.resolve_local(name.into()) - } - - /// Resolves a to-be-resolved reference to a function. - fn resolve_function(&self, name: T) -> extension::simple::function::ResolutionResult - where - T: Into, - { - self.function_impls.resolve_local(name.into()) - } -} diff --git a/rs/src/parse/extensions/simple/common.rs b/rs/src/parse/extensions/simple/common.rs new file mode 100644 index 00000000..74bcb9af --- /dev/null +++ b/rs/src/parse/extensions/simple/common.rs @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Module providing private helper functions that are not specific to any +//! particular simple extension construct. + +use crate::output::diagnostic::Result; +use crate::parse::context; + +/// Parser for names given to things. +pub fn parse_name(x: &str, y: &mut context::Context, construct: &str) -> Result { + static IDENTIFIER_RE: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(|| regex::Regex::new("[a-zA-Z_][a-zA-Z0-9_\\.]*").unwrap()); + + if x.is_empty() { + diagnostic!( + y, + Info, + LinkDiscouragedName, + "using the empty string as a {construct} name is not explicitly \ + illegal, but probably not a good idea" + ); + } else if !IDENTIFIER_RE.is_match(x) { + diagnostic!( + y, + Info, + LinkDiscouragedName, + "it is recommended for {construct} names to case-insensitively \ + match [a-z_][a-z0-9_]* for maximum compatibility" + ); + } else if x.contains('.') { + diagnostic!( + y, + Info, + LinkDiscouragedName, + "using periods within a {construct} name is not explicitly \ + illegal, but probably not a good idea, as they are also used as \ + namespace separators for dependencies" + ); + } + Ok(x.to_owned()) +} diff --git a/rs/src/parse/extensions/simple/derivations/mod.rs b/rs/src/parse/extensions/simple/derivations/mod.rs index dc6e62bc..e0d847e1 100644 --- a/rs/src/parse/extensions/simple/derivations/mod.rs +++ b/rs/src/parse/extensions/simple/derivations/mod.rs @@ -122,12 +122,7 @@ impl<'a> AnalysisContext<'a> { if let Some(def) = &type_class.definition { for slot in def.parameter_slots.iter() { if let meta::pattern::Value::Enum(Some(variants)) = &slot.pattern { - for variant in variants { - self.pattern_names.insert( - variant.to_ascii_lowercase(), - PatternObject::EnumVariant(variant.clone()), - ); - } + self.register_enum_variants(variants); } } } @@ -142,6 +137,16 @@ impl<'a> AnalysisContext<'a> { object } + /// Registers the given list of enum variants as enum identifiers. + pub fn register_enum_variants(&mut self, variants: &[String]) { + for variant in variants { + self.pattern_names.insert( + variant.to_ascii_lowercase(), + PatternObject::EnumVariant(variant.clone()), + ); + } + } + /// Resolve a type variation identifier path. pub fn resolve_type_variation( &mut self, @@ -1052,6 +1057,7 @@ fn analyze_pattern_misc( TypeDerivationNotSupported, "the enum set pattern is not officially supported" ); + z.register_enum_variants(&names); Ok(meta::pattern::Value::Enum(Some(names))) } PatternMiscContextAll::StrAnyContext(_) => { diff --git a/rs/src/parse/extensions/simple/function_decls.rs b/rs/src/parse/extensions/simple/functions.rs similarity index 83% rename from rs/src/parse/extensions/simple/function_decls.rs rename to rs/src/parse/extensions/simple/functions.rs index 0e6ad2c4..eb983850 100644 --- a/rs/src/parse/extensions/simple/function_decls.rs +++ b/rs/src/parse/extensions/simple/functions.rs @@ -6,13 +6,13 @@ use crate::input::yaml; use crate::output::diagnostic::Result; use crate::parse::context; -use crate::parse::extensions::simple::builder; +use crate::parse::extensions::simple::modules; /// Parse a scalar function declaration. pub fn parse_scalar_function( _x: &yaml::Value, _y: &mut context::Context, - _z: &mut builder::Builder, + _z: &mut modules::Builder, ) -> Result<()> { // TODO Ok(()) @@ -22,7 +22,7 @@ pub fn parse_scalar_function( pub fn parse_aggregate_function( _x: &yaml::Value, _y: &mut context::Context, - _z: &mut builder::Builder, + _z: &mut modules::Builder, ) -> Result<()> { // TODO Ok(()) diff --git a/rs/src/parse/extensions/simple/mod.rs b/rs/src/parse/extensions/simple/mod.rs index a2d8fa37..719ce766 100644 --- a/rs/src/parse/extensions/simple/mod.rs +++ b/rs/src/parse/extensions/simple/mod.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 -//! Module providing parse/validation functions for advanced extensions, i.e. +//! Module providing parse/validation functions for simple extensions, i.e. //! those based around YAML files. use crate::input::proto::substrait; @@ -10,12 +10,12 @@ use crate::output::extension::simple::module::Scope; use crate::output::type_system::data; use crate::parse::context; -mod builder; +mod common; mod derivations; -mod function_decls; -mod type_decls; -mod type_variation_decls; -mod yaml; +mod functions; +mod modules; +mod type_classes; +mod type_variations; /// Parse a user-defined name. Note that names are matched case-insensitively /// because we return the name as lowercase. @@ -51,7 +51,7 @@ fn parse_simple_extension_yaml_uri_mapping( ) -> Result<()> { // Parse the fields. let anchor = proto_primitive_field!(x, y, extension_uri_anchor, parse_anchor).1; - let yaml_data = proto_primitive_field!(x, y, uri, yaml::parse_uri) + let yaml_data = proto_primitive_field!(x, y, uri, modules::parse_uri) .1 .unwrap(); @@ -253,6 +253,11 @@ fn parse_extension_mapping_data( extension::namespace::ResolutionResult::new(reference_data) }); + // Link to YAML definitions. + resolution_result.for_each_visible_item(|item| { + link!(y, item.identifier.definition_path.clone(), "Possible YAML definition was here."); + }); + // If the specified anchor is valid, insert a mapping for it. if let Some(anchor) = anchor { if let Err((prev_data, prev_path)) = y.define_type(anchor, resolution_result) { @@ -300,6 +305,11 @@ fn parse_extension_mapping_data( extension::namespace::ResolutionResult::new(reference_data) }); + // Link to YAML definitions. + resolution_result.for_each_visible_item(|item| { + link!(y, item.identifier.definition_path.clone(), "Possible YAML definition was here."); + }); + // If the specified anchor is valid, insert a mapping for it. if let Some(anchor) = anchor { if let Err((prev_data, prev_path)) = y.define_type_variation(anchor, resolution_result) { @@ -367,6 +377,11 @@ fn parse_extension_mapping_data( extension::namespace::ResolutionResult::new(reference_data) }); + // Link to YAML definitions. + resolution_result.for_each_visible_item(|item| { + link!(y, item.identifier.definition_path.clone(), "Possible YAML definition was here."); + }); + // If the specified anchor is valid, insert a mapping for it. if let Some(anchor) = anchor { if let Err((prev_data, prev_path)) = y.define_function(anchor, resolution_result) { diff --git a/rs/src/parse/extensions/simple/modules.rs b/rs/src/parse/extensions/simple/modules.rs new file mode 100644 index 00000000..93f133c4 --- /dev/null +++ b/rs/src/parse/extensions/simple/modules.rs @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Module providing a builder structure to be used while parsing a simple +//! extension file. + +use crate::input::yaml; +use crate::output::diagnostic::Result; +use crate::output::extension; +use crate::output::path; +use crate::parse::context; +use crate::parse::extensions::simple::functions; +use crate::parse::extensions::simple::modules; +use crate::parse::extensions::simple::type_classes; +use crate::parse::extensions::simple::type_variations; +use crate::parse::traversal; +use crate::util; +use std::collections::HashMap; +use std::sync::Arc; + +#[derive(Clone, Debug)] +pub struct Builder { + /// Identifier for the extension. + pub identifier: extension::simple::common::Identifier, + + /// Common metadata for the extension. + pub metadata: extension::simple::common::Metadata, + + /// The URI that was actually used to resolve the module. + pub actual_uri: String, + + /// Map with references to dependencies. + pub dependencies: HashMap, + + /// Namespace used for type classes defined in this extension and its + /// dependencies. + pub type_classes: extension::simple::type_class::NamespaceDefinition, + + /// Namespace used for type variations defined in this extension and its + /// dependencies. + pub type_variations: extension::simple::type_variation::NamespaceDefinition, + + /// Namespace used for functions defined in this extension and its + /// dependencies. Both simple and compound names are registered. + pub function_impls: extension::simple::function::NamespaceDefinition, +} + +impl From for Builder { + fn from(identifier: extension::simple::common::Identifier) -> Self { + modules::Builder { + identifier, + metadata: Default::default(), + actual_uri: Default::default(), + dependencies: Default::default(), + type_classes: Default::default(), + type_variations: Default::default(), + function_impls: Default::default(), + } + } +} + +impl From for extension::simple::module::Definition { + fn from(builder: Builder) -> Self { + extension::simple::module::Definition { + identifier: builder.identifier, + metadata: builder.metadata, + actual_uri: builder.actual_uri, + dependencies: builder.dependencies, + type_classes: Arc::new(builder.type_classes), + type_variations: Arc::new(builder.type_variations), + function_impls: Arc::new(builder.function_impls), + } + } +} + +impl extension::simple::module::Scope for Builder { + /// Resolves a to-be-resolved reference to a type class. + fn resolve_type_class(&self, name: T) -> extension::simple::type_class::ResolutionResult + where + T: Into, + { + self.type_classes.resolve_local(name.into()) + } + + /// Resolves a to-be-resolved reference to a type variation. + fn resolve_type_variation( + &self, + name: T, + ) -> extension::simple::type_variation::ResolutionResult + where + T: Into, + { + self.type_variations.resolve_local(name.into()) + } + + /// Resolves a to-be-resolved reference to a function. + fn resolve_function(&self, name: T) -> extension::simple::function::ResolutionResult + where + T: Into, + { + self.function_impls.resolve_local(name.into()) + } +} + +/// Toplevel parse function for a simple extension YAML file. +fn parse_root( + x: &yaml::Value, + y: &mut context::Context, + uri: &str, + remapped_uri: &str, +) -> Result { + // Make an the module builder and configure metadata. + let mut identifier = y.make_extension_id(); + identifier.uri = uri.to_string(); + let mut builder = modules::Builder::from(identifier); + builder.metadata.description = yaml_field!(x, y, "description", yaml_prim!(str))? + .1 + .unwrap_or_default(); + builder.actual_uri = remapped_uri.to_string(); + + // FIXME: dependencies? + yaml_repeated_field!( + x, + y, + "types", + type_classes::parse_type_class, + 0, + &mut builder + )?; + yaml_repeated_field!( + x, + y, + "type_variations", + type_variations::parse_type_variation, + 0, + &mut builder + )?; + yaml_repeated_field!( + x, + y, + "scalar_functions", + functions::parse_scalar_function, + 0, + &mut builder + )?; + // FIXME: window functions? + yaml_repeated_field!( + x, + y, + "aggregate_functions", + functions::parse_aggregate_function, + 0, + &mut builder + )?; + Ok(builder.into()) +} + +fn make_module_reference( + uri: &str, + definition: Option>, + parse_context: &mut context::Context, +) -> extension::simple::module::Reference { + Arc::new(extension::reference::Data { + name: Default::default(), + uri: extension::reference::Identifier::new(Some(uri), Some(parse_context.path_buf())), + definition, + }) +} + +/// Parse a YAML extension URI string. +pub fn parse_uri>( + x: &S, + y: &mut context::Context, +) -> Result { + // Check URI syntax. + let uri = x.as_ref(); + if let Err(e) = util::string::check_uri(uri) { + diagnostic!(y, Error, e); + } + + // See if we've parsed this URI before. If we have, don't parse it again; + // just link to the previous node. + let module = if let Some(module) = y.extension_modules().get(uri).cloned() { + if let Some(path) = module.uri.anchor_path() { + link!( + y, + path.clone(), + "Module was previously used here; not parsing again" + ); + } + make_module_reference(uri, module.definition.clone(), y) + } else { + // Load the schema for YAML extension files when this function is first + // called. + static SCHEMA: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(|| { + jsonschema::JSONSchema::compile( + &yaml::yaml_to_json( + serde_yaml::from_str::(include_str!( + "../../../resources/text/simple_extensions_schema.yaml" + )) + .unwrap(), + &path::Path::default(), + ) + .unwrap(), + ) + .unwrap() + }); + + // Parse the file. + let definition = traversal::parse_uri( + uri, + y, + |x, y| traversal::read_yaml(x, y, Some(&SCHEMA)), + parse_root, + ) + .1 + .map(Arc::new); + + // Create reference and insert into extension module list. + let module = make_module_reference(uri, definition, y); + y.extension_modules() + .insert(uri.to_string(), module.clone()); + module + }; + + Ok(module) +} diff --git a/rs/src/parse/extensions/simple/type_classes.rs b/rs/src/parse/extensions/simple/type_classes.rs new file mode 100644 index 00000000..ed63cf55 --- /dev/null +++ b/rs/src/parse/extensions/simple/type_classes.rs @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Module providing parse/validation functions for parsing YAML type class +//! declarations. + +use itertools::Itertools; + +use crate::input::yaml; +use crate::output::diagnostic::Result; +use crate::output::extension; +use crate::output::type_system::data; +use crate::output::type_system::meta; +use crate::output::type_system::meta::Pattern; +use crate::parse::context; +use crate::parse::extensions::simple::common; +use crate::parse::extensions::simple::derivations; +use crate::parse::extensions::simple::modules; +use crate::util; +use std::collections::HashSet; +use std::fmt::Write; +use std::sync::Arc; + +/// Builder for type classes. +pub struct Builder<'a> { + /// The definition we're constructing. + pub definition: extension::simple::type_class::Definition, + + /// Context for analyzing type patterns and derivations. + pub analysis_context: derivations::AnalysisContext<'a>, +} + +/// Tries to check that the given pattern is evaluable. +fn check_pattern_is_evaluable(x: &meta::pattern::Value, y: &mut context::Context) { + if !x.can_evaluate() { + diagnostic!( + y, + Error, + TypeDerivationInvalid, + "pattern cannot be evaluated" + ); + } +} + +/// Tries to check that the given pattern might match a data type. +fn check_pattern_is_data_type(x: &meta::pattern::Value, y: &mut context::Context) { + if !x.is_data_type() { + diagnostic!(y, Error, TypeDerivationInvalid, "expected data type"); + } +} + +/// Parse a metatype. +fn parse_metatype(x: &str, _y: &mut context::Context) -> Result { + match x { + "dataType" => Ok(meta::Type::DataType), + "boolean" => Ok(meta::Type::Boolean), + "integer" => Ok(meta::Type::Integer), + "enumeration" => Ok(meta::Type::Enum), + "string" => Ok(meta::Type::String), + _ => Err(cause!(IllegalValue, "unknown type parameter type {x}")), + } +} + +// Parse the minimum/maximum value constraint. +fn parse_min_max(x: &i64, _y: &mut context::Context, metatype: meta::Type) -> Result { + if !matches!(metatype, meta::Type::Integer | meta::Type::Unresolved) { + Err(cause!( + IllegalValue, + "min/max is only applicable for integer metatypes" + )) + } else { + Ok(*x) + } +} + +/// Parse a parameter slot definition. +fn parse_parameter( + x: &yaml::Value, + y: &mut context::Context, + z: &mut Builder, +) -> Result { + // Parse name. Names are optional, but if specified, an inconsistent + // binding is inferred such that the parameter can be used in the + // structure declaration. + let name = yaml_field!(x, y, "name", yaml_prim!(str))? + .1 + .unwrap_or_default(); + + // Description is also optional, and only used for docs. + let description = yaml_field!(x, y, "description", yaml_prim!(str))? + .1 + .unwrap_or_default(); + + // Parse parameter type. This is the only thing that's actually required. + let metatype = yaml_required_field!(x, y, "type", yaml_prim!(str, parse_metatype))? + .1 + .unwrap_or_default(); + + // Parse and check integer constraint fields. + let int_min = yaml_field!(x, y, "min", yaml_prim!(i64, parse_min_max, metatype))? + .1 + .unwrap_or(i64::MIN); + let int_max = yaml_field!(x, y, "max", yaml_prim!(i64, parse_min_max, metatype))? + .1 + .unwrap_or(i64::MAX); + let int_max = if int_min > int_max { + diagnostic!( + y, + Error, + IllegalValue, + "minimum value cannot be less than maximum value" + ); + int_min + } else { + if int_min == int_max { + diagnostic!( + y, + Info, + Redundant, + "parameter slot only matches a single value ({int_min})" + ); + } + int_max + }; + + // Parse and check enum constraint fields. + let enum_options = yaml_repeated_field!(x, y, "options", yaml_prim!(str))? + .1 + .into_iter() + .flatten() + .collect::>(); + if !enum_options.is_empty() { + if !matches!(metatype, meta::Type::Enum | meta::Type::Unresolved) + && !enum_options.is_empty() + { + diagnostic!( + y, + Error, + IllegalValue, + "enum options are only applicable for enum metatypes" + ) + } + let mut unique_names = HashSet::new(); + let mut repeated_names = HashSet::new(); + for name in enum_options.iter() { + if !unique_names.insert(name.to_ascii_uppercase()) { + repeated_names.insert(name.to_ascii_uppercase()); + } + } + if !repeated_names.is_empty() { + diagnostic!( + y, + Error, + RedundantEnumVariant, + "enumeration variant names should be case-insensitively unique: {}", + repeated_names.iter().join(", ") + ); + } + if unique_names.len() == 1 { + diagnostic!( + y, + Info, + Redundant, + "parameter slot only matches a single value ({})", + unique_names.into_iter().next().unwrap() + ); + } + z.analysis_context.register_enum_variants(&enum_options); + } + + // Construct constraint pattern from the above information. + let pattern = match metatype { + meta::Type::Integer => meta::pattern::Value::Integer(int_min, int_max), + meta::Type::Enum => { + if enum_options.is_empty() { + meta::pattern::Value::Enum(None) + } else { + meta::pattern::Value::Enum(Some(enum_options)) + } + } + _ => meta::pattern::Value::exactly_type(metatype), + }; + + // If the parameter has a name, also match it as an inconsistent binding. + // Annoying special case here... because of the way nullability works for + // patterns, we need a different pattern depending on whether we're + // matching a data type or something else. Note that we use inconsistent + // bindings such that the binding never imposes an unintentional + // constraint, for example when someone names two parameters the same way. + let pattern = if name.is_empty() { + pattern + } else { + meta::pattern::Value::Intersection(vec![ + meta::pattern::Value::Binding(meta::pattern::Binding { + name: name.clone(), + inconsistent: true, + nullability: if metatype == meta::Type::DataType { + Some(Arc::new(meta::pattern::Value::Boolean(None))) + } else { + None + }, + }), + pattern, + ]) + }; + + // Determine whether the parameter is optional. + let optional = yaml_field!(x, y, "optional", yaml_prim!(bool))? + .1 + .unwrap_or_default(); + + Ok(extension::simple::type_class::ParameterSlot { + name, + description, + pattern, + optional, + }) +} + +/// Parse the structure field of a type class. +fn parse_structure_element( + k: &str, + x: &yaml::Value, + y: &mut context::Context, + z: &mut Builder, +) -> Result { + let pattern = if let serde_json::Value::String(x) = x { + match derivations::parse_pattern(x, y, &mut z.analysis_context) { + Ok(pattern) => pattern, + Err(e) => { + diagnostic!(y, Error, e); + meta::pattern::Value::Unresolved + } + } + } else { + diagnostic!(y, Error, YamlInvalidType, "expected string"); + meta::pattern::Value::Unresolved + }; + check_pattern_is_evaluable(&pattern, y); + check_pattern_is_data_type(&pattern, y); + Ok(meta::pattern::Parameter { + name: Some(k.to_string()), + value: Some(pattern), + }) +} + +/// Parse the structure field of a type class. +fn parse_structure(x: &yaml::Value, y: &mut context::Context, z: &mut Builder) -> Result<()> { + match &x { + serde_json::Value::String(x) => { + let program = match derivations::parse_program(x, y, &mut z.analysis_context) { + Ok(program) => program, + Err(e) => { + diagnostic!(y, Error, e); + meta::Program::default() + } + }; + check_pattern_is_evaluable(&program.expression, y); + check_pattern_is_data_type(&program.expression, y); + z.definition.structure = Some(program.expression); + z.definition + .contraints + .extend(program.statements.into_iter()); + } + serde_json::Value::Object(_) => { + let fields = yaml_object!(x, y, parse_structure_element, 0, z)? + .1 + .into_iter() + .map(|x| x.unwrap_or_default()) + .collect(); + z.definition.structure = + Some(meta::pattern::Value::DataType(meta::pattern::DataType { + class: Some(data::class::Class::Compound( + data::class::Compound::NamedStruct, + )), + nullable: Arc::new(meta::pattern::Value::Boolean(Some(false))), + variation: meta::pattern::Variation::Compatible, + parameters: Some(fields), + })); + } + _ => diagnostic!(y, Error, YamlInvalidType, "expected string or object"), + } + Ok(()) +} + +/// Parse a type class declaration. +pub fn parse_type_class( + x: &yaml::Value, + y: &mut context::Context, + z: &mut modules::Builder, +) -> Result<()> { + // Parse name. + let name = yaml_required_field!( + x, + y, + "name", + yaml_prim!(str, common::parse_name, "type class") + )? + .1; + + // Check uniqueness of name. + if let Some(name) = &name { + z.type_classes + .resolve_local(&name[..]) + .expect_not_yet_defined(y); + } + + // Make identifier and builder. + let mut identifier = y.make_extension_id(); + if let Some(name) = &name { + identifier.names.push(name.to_string()); + } + identifier.uri = z.identifier.uri.clone(); + let mut builder = Builder { + definition: identifier.into(), + analysis_context: derivations::AnalysisContext::new(Some(z)), + }; + + // Parse parameters. + builder.definition.parameter_slots = + yaml_repeated_field!(x, y, "parameters", parse_parameter, 0, &mut builder)? + .1 + .into_iter() + .map(|x| x.unwrap_or_default()) + .collect(); + + // Parse variadicity of parameters. + if let Some(variadic) = yaml_field!(x, y, "variadic", yaml_prim!(bool))?.1 { + if builder.definition.parameter_slots.is_empty() { + diagnostic!( + y, + Error, + IllegalValue, + "variadic must be left unspecified for type classes \ + without parameters" + ); + } else { + builder.definition.parameters_variadic = variadic; + } + } + + // Parse structure. + yaml_field!(x, y, "structure", parse_structure, &mut builder)?; + + // Describe the type class. + let mut description = if builder.definition.structure.is_none() { + if builder.definition.parameter_slots.is_empty() { + "Opaque simple" + } else { + "Opaque compound" + } + } else if builder.definition.parameter_slots.is_empty() { + "Simple" + } else { + "Compound" + } + .to_string(); + write!(description, " type class declaration: ").unwrap(); + if let Some(name) = &name { + write!(description, "{name}").unwrap(); + } else { + write!(description, "!").unwrap(); + } + if !builder.definition.parameter_slots.is_empty() { + let mut parameters = builder + .definition + .parameter_slots + .iter() + .map(|slot| { + let mut description = String::new(); + if !slot.name.is_empty() { + write!( + description, + "{}: ", + util::string::as_ident_or_string(&slot.name) + ) + .unwrap(); + }; + if slot.optional { + write!(description, "opt ").unwrap(); + }; + if let meta::pattern::Value::Intersection(isec) = &slot.pattern { + write!(description, "{}", isec.last().unwrap()).unwrap(); + } else { + write!(description, "{}", slot.pattern).unwrap(); + }; + description + }) + .join(", "); + if builder.definition.parameters_variadic { + write!(parameters, "...").unwrap(); + } + write!(description, "<{parameters}>").unwrap(); + } + describe!(y, Misc, "{description}"); + + // Register the type class. + if let Some(name) = name { + z.type_classes + .define_item(name, Arc::new(builder.definition), true); + } + + Ok(()) +} diff --git a/rs/src/parse/extensions/simple/type_decls.rs b/rs/src/parse/extensions/simple/type_decls.rs deleted file mode 100644 index 29ab18f2..00000000 --- a/rs/src/parse/extensions/simple/type_decls.rs +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Module providing parse/validation functions for parsing YAML type -//! declarations. - -use crate::input::yaml; -use crate::output::diagnostic::Result; -use crate::parse::context; -use crate::parse::extensions::simple::builder; - -/// Parse a type declaration. -pub fn parse_type( - _x: &yaml::Value, - _y: &mut context::Context, - _z: &mut builder::Builder, -) -> Result<()> { - // TODO - Ok(()) -} diff --git a/rs/src/parse/extensions/simple/type_variation_decls.rs b/rs/src/parse/extensions/simple/type_variations.rs similarity index 83% rename from rs/src/parse/extensions/simple/type_variation_decls.rs rename to rs/src/parse/extensions/simple/type_variations.rs index a21a50fa..8bb77636 100644 --- a/rs/src/parse/extensions/simple/type_variation_decls.rs +++ b/rs/src/parse/extensions/simple/type_variations.rs @@ -6,13 +6,13 @@ use crate::input::yaml; use crate::output::diagnostic::Result; use crate::parse::context; -use crate::parse::extensions::simple::builder; +use crate::parse::extensions::simple::modules; /// Parse a type variation declaration. pub fn parse_type_variation( _x: &yaml::Value, _y: &mut context::Context, - _z: &mut builder::Builder, + _z: &mut modules::Builder, ) -> Result<()> { // TODO Ok(()) diff --git a/rs/src/parse/extensions/simple/yaml.rs b/rs/src/parse/extensions/simple/yaml.rs deleted file mode 100644 index dca74a38..00000000 --- a/rs/src/parse/extensions/simple/yaml.rs +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Module providing parse/validation functions for parsing YAML extension -//! files. - -use crate::input::yaml; -use crate::output::diagnostic::Result; -use crate::output::extension; -use crate::output::path; -use crate::parse::context; -use crate::parse::extensions::simple::builder; -use crate::parse::extensions::simple::function_decls; -use crate::parse::extensions::simple::type_decls; -use crate::parse::extensions::simple::type_variation_decls; -use crate::parse::traversal; -use crate::util; -use std::sync::Arc; - -/// Toplevel parse function for a simple extension YAML file. -fn parse_root( - x: &yaml::Value, - y: &mut context::Context, -) -> Result { - let mut builder = builder::Builder::default(); - yaml_repeated_field!(x, y, "types", type_decls::parse_type, 0, &mut builder)?; - yaml_repeated_field!( - x, - y, - "type_variations", - type_variation_decls::parse_type_variation, - 0, - &mut builder - )?; - yaml_repeated_field!( - x, - y, - "scalar_functions", - function_decls::parse_scalar_function, - 0, - &mut builder - )?; - yaml_repeated_field!( - x, - y, - "aggregate_functions", - function_decls::parse_aggregate_function, - 0, - &mut builder - )?; - Ok(builder.into()) -} - -fn make_module_reference( - uri: &str, - definition: Option>, - parse_context: &mut context::Context, -) -> extension::simple::module::Reference { - Arc::new(extension::reference::Data { - name: Default::default(), - uri: extension::reference::Identifier::new(Some(uri), Some(parse_context.path_buf())), - definition, - }) -} - -/// Parse a YAML extension URI string. -pub fn parse_uri>( - x: &S, - y: &mut context::Context, -) -> Result { - // Check URI syntax. - let uri = x.as_ref(); - if let Err(e) = util::string::check_uri(uri) { - diagnostic!(y, Error, e); - } - - // See if we've parsed this URI before. If we have, don't parse it again; - // just link to the previous node. - let module = if let Some(module) = y.extension_modules().get(uri).cloned() { - if let Some(path) = module.uri.anchor_path() { - link!( - y, - path.clone(), - "Module was previously used here; not parsing again" - ); - } - make_module_reference(uri, module.definition.clone(), y) - } else { - // Load the schema for YAML extension files when this function is first - // called. - static SCHEMA: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(|| { - jsonschema::JSONSchema::compile( - &yaml::yaml_to_json( - serde_yaml::from_str::(include_str!( - "../../../resources/text/simple_extensions_schema.yaml" - )) - .unwrap(), - &path::Path::default(), - ) - .unwrap(), - ) - .unwrap() - }); - - // Parse the file. - let definition = traversal::parse_uri( - uri, - y, - |x, y| traversal::read_yaml(x, y, Some(&SCHEMA)), - parse_root, - ) - .1 - .map(Arc::new); - - // Create reference and insert into extension module list. - let module = make_module_reference(uri, definition, y); - y.extension_modules() - .insert(uri.to_string(), module.clone()); - module - }; - - Ok(module) -} diff --git a/rs/src/parse/traversal.rs b/rs/src/parse/traversal.rs index 1d1879e6..1b77e014 100644 --- a/rs/src/parse/traversal.rs +++ b/rs/src/parse/traversal.rs @@ -618,7 +618,8 @@ where // YAML object handling //============================================================================= -/// Convenience/shorthand macro for parsing optional YAML fields. +/// Convenience/shorthand macro for parsing optional YAML fields. The input +/// must be an object, or this will fail. macro_rules! yaml_field { ($input:expr, $context:expr, $field:expr) => { yaml_field!($input, $context, $field, |_, _| Ok(())) @@ -666,7 +667,8 @@ where } } -/// Convenience/shorthand macro for parsing required YAML fields. +/// Convenience/shorthand macro for parsing required YAML fields. The input +/// must be an object, or this will fail. macro_rules! yaml_required_field { ($input:expr, $context:expr, $field:expr) => { yaml_required_field!($input, $context, $field, |_, _| Ok(())) @@ -707,11 +709,100 @@ where } } +/// Convenience/shorthand macro for parsing a YAML object as if it were an +/// array. The input must be an object or this will fail, but the object may +/// by default be empty. Use [yaml_required_object!] or set a minimum size to +/// override. The parser function will have the nonstandard argument list +/// (k, x, y, ...), where k is the String key and v is the value. Fields will +/// be parsed in the order in which they are specified in the YAML file. +macro_rules! yaml_object { + ($input:expr, $context:expr) => { + yaml_object!($input, $context, $field, |_, _, _| Ok(())) + }; + ($input:expr, $context:expr, $parser:expr) => { + yaml_object!($input, $context, $field, $parser, 0) + }; + ($input:expr, $context:expr, $parser:expr, $min_size:expr) => { + crate::parse::traversal::push_yaml_object($input, $context, $min_size, false, $parser) + }; + ($input:expr, $context:expr, $parser:expr, $min_size:expr, $($args:expr),*) => { + yaml_object!($input, $context, |k, x, y| $parser(k, x, y, $($args),*), $min_size) + }; +} + +/// Like [yaml_object!], but requires the object to have at least one element. +macro_rules! yaml_required_object { + ($input:expr, $context:expr) => { + yaml_required_object!($input, $context, |_, _, _| Ok(())) + }; + ($input:expr, $context:expr, $parser:expr) => { + yaml_object!($input, $context, $parser, 1) + }; + ($input:expr, $context:expr, $parser:expr, $($args:expr),*) => { + yaml_object!($input, $context, $parser, 1, $($args:expr),*) + }; +} + +/// Parse and push all fields in a YAML object. +pub fn push_yaml_object( + input: &yaml::Value, + context: &mut context::Context, + min_size: usize, + unknown_subtree: bool, + mut parser: FP, +) -> diagnostic::Result> +where + FP: FnMut(&str, &yaml::Value, &mut context::Context) -> diagnostic::Result, +{ + if let serde_json::Value::Object(input) = input { + if input.len() < min_size { + if min_size == 1 { + diagnostic!( + context, + Error, + YamlMissingKey, + "at least one field must be specified" + ); + } else { + diagnostic!( + context, + Error, + YamlMissingKey, + "at least {min_size} fields must be specified" + ); + } + } + Ok(input + .iter() + .map(|(key, value)| { + if !context.set_field_parsed(key) { + panic!("field {key} was parsed multiple times"); + } + + push_child( + context, + value, + path::PathElement::Field(key.clone()), + unknown_subtree, + |x, y| parser(key, x, y), + ) + }) + .unzip()) + } else { + Err(cause!(YamlInvalidType, "object expected")) + } +} + //============================================================================= // YAML array handling //============================================================================= /// Convenience/shorthand macro for parsing a YAML array that may be empty. +/// This differs from [yaml_repeated_field!] in that no field name is inserted +/// in the node paths. This is useful when the input to a parser function is +/// an array with no further context, the array having been pushed as a single +/// child earlier. This, in turn, is necessary when the field may be something +/// other than an array, too. macro_rules! yaml_array { ($input:expr, $context:expr) => { yaml_array!($input, $context, $field, |_, _| Ok(())) @@ -727,8 +818,7 @@ macro_rules! yaml_array { }; } -/// Convenience/shorthand macro for parsing a YAML array that must have at -/// least one value. +/// Like [yaml_array!], but requiring that at least one entry exists. macro_rules! yaml_required_array { ($input:expr, $context:expr) => { yaml_required_array!($input, $context, |_, _| Ok(())) @@ -821,7 +911,15 @@ where } } -/// Shorthand for fields that must be arrays if specified. +/// Shorthand for fields that must be arrays if specified. The input must be +/// an object. This mimics the protobuf repeated field logic and tree +/// structure, but will fail if the field is set to something other than an +/// array. It will also fail if the input is not an object. This macro allows +/// the field to not be specified; use [yaml_required_repeated_field!] if it +/// must be. By default, it also allows the array to be empty if specified, +/// but a minimum size can be specified. If the field is supposed to be +/// variadic, use [yaml_field!] instead, use the parse function to distinguish +/// between YAML types, and (if array) use [yaml_array!]. macro_rules! yaml_repeated_field { ($input:expr, $context:expr, $field:expr) => { yaml_repeated_field!($input, $context, $field, |_, _| Ok(())) @@ -839,7 +937,8 @@ macro_rules! yaml_repeated_field { }; } -/// Shorthand for fields that must be arrays. +/// Like [yaml_repeated_field!], but fails if the array is empty or if the +/// field does not exist. macro_rules! yaml_required_repeated_field { ($input:expr, $context:expr, $field:expr) => { yaml_required_repeated_field!($input, $context, $field, |_, _| Ok(())) @@ -898,6 +997,9 @@ macro_rules! yaml_prim { ($typ:ident, $parser:expr) => { |x, y| crate::parse::traversal::yaml_primitive_parsers::$typ(x, y, $parser) }; + ($typ:ident, $parser:expr, $($args:expr),*) => { + yaml_prim!($typ, |x, y| $parser(x, y, $($args),*)) + }; } pub mod yaml_primitive_parsers { @@ -915,7 +1017,7 @@ pub mod yaml_primitive_parsers { if let serde_json::Value::Bool(x) = x { parser(x, y) } else { - Err(cause!(YamlInvalidType, "string expected")) + Err(cause!(YamlInvalidType, "boolean expected")) } } @@ -992,7 +1094,7 @@ pub mod yaml_primitive_parsers { //============================================================================= /// Worker for resolve_uri(). -fn resolve_uri(uri: &str, context: &mut context::Context) -> Option { +fn resolve_uri(uri: &str, context: &mut context::Context) -> Option<(String, config::BinaryData)> { // Check for cyclic dependencies. let uri_stack = context.uri_stack(); if let Some((index, _)) = uri_stack.iter().enumerate().find(|(_, x)| &x[..] == uri) { @@ -1054,7 +1156,7 @@ fn resolve_uri(uri: &str, context: &mut context::Context) -> Option Some(x), + Ok(x) => Some((remapped_uri, x)), Err(e) => { diagnostic!(context, Warning, YamlResolutionFailed, "{e}"); None @@ -1129,7 +1231,7 @@ fn resolve_uri(uri: &str, context: &mut context::Context) -> Option Some(Box::new(data)), + Ok(data) => Some((remapped_uri, Box::new(data))), Err(e) => { if is_remapped { diagnostic!( @@ -1151,7 +1253,9 @@ fn resolve_uri(uri: &str, context: &mut context::Context) -> Option( uri: &str, context: &mut context::Context, @@ -1161,10 +1265,10 @@ pub fn parse_uri( where TF: InputNode, FR: FnOnce(config::BinaryData, &mut context::Context) -> Option, - FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result, + FP: FnOnce(&TF, &mut context::Context, &str, &str) -> diagnostic::Result, { // Try resolving the URI. - if let Some(data) = resolve_uri(uri, context) { + if let Some((remapped_uri, data)) = resolve_uri(uri, context) { // Parse the flat file to a traversable tree. if let Some(root) = reader(data, context) { // Update recursion stack. @@ -1177,7 +1281,7 @@ where &root, path::PathElement::Field("data".to_string()), false, - parser, + |x, y| parser(x, y, uri, &remapped_uri), ); // Revert recursion stack update. diff --git a/rs/src/parse/types.rs b/rs/src/parse/types.rs index e75c89ab..c51c4a45 100644 --- a/rs/src/parse/types.rs +++ b/rs/src/parse/types.rs @@ -5,7 +5,6 @@ use std::sync::Arc; use crate::input::proto::substrait; -use crate::output::comment; use crate::output::diagnostic; use crate::output::type_system::data; use crate::output::type_system::data::class::ParameterInfo; @@ -875,21 +874,11 @@ fn describe_type(y: &mut context::Context, data_type: &data::Type) { data::Class::UserDefined(u) => { summary!(y, "Extension type {u}."); if let Some(x) = &u.definition { - y.push_summary( - comment::Comment::new() - .plain("Internal structure corresponds to:") - .lo(), - ); - let mut first = true; - for (name, class) in &x.structure { - if first { - first = false; - } else { - y.push_summary(comment::Comment::new().li()); - } - summary!(y, "{}: {}", util::string::as_ident_or_string(name), class); + if let Some(structure) = &x.structure { + summary!(y, "Internal structure corresponds to {structure:#}"); + } else { + summary!(y, "This type is opaque."); } - y.push_summary(comment::Comment::new().lc()); } format!("Extension type {}", u.name) } diff --git a/rs/src/util/string.rs b/rs/src/util/string.rs index dc69c905..e1bb2d1d 100644 --- a/rs/src/util/string.rs +++ b/rs/src/util/string.rs @@ -341,6 +341,7 @@ fn describe_sequence_all( values: &[T], offset: usize, el_limit: Limit, + separator: &str, repr: &F, ) -> std::fmt::Result where @@ -351,7 +352,7 @@ where if first { first = false; } else { - write!(f, ", ")?; + write!(f, "{separator}")?; } repr(f, value, index + offset, el_limit)?; } @@ -369,7 +370,7 @@ pub fn describe_sequence( where F: Fn(&mut std::fmt::Formatter<'_>, &T, usize, Limit) -> std::fmt::Result, { - describe_sequence_with_sep(f, values, limit, element_size, ',', repr) + describe_sequence_with_sep(f, values, limit, element_size, ", ", repr) } /// Represent the given sequence with heuristic length limits. @@ -378,24 +379,24 @@ pub fn describe_sequence_with_sep( values: &[T], limit: Limit, element_size: usize, - separator: char, + separator: &str, repr: F, ) -> std::fmt::Result where F: Fn(&mut std::fmt::Formatter<'_>, &T, usize, Limit) -> std::fmt::Result, { let (n_left, n_right, el_limit) = limit.split_n(values.len(), element_size); - describe_sequence_all(f, &values[..n_left], 0, el_limit, &repr)?; + describe_sequence_all(f, &values[..n_left], 0, el_limit, separator, &repr)?; if let Some(n_right) = n_right { if n_left > 0 { - write!(f, "{separator} ")?; + write!(f, "{separator}")?; } write!(f, "..")?; if n_right > 0 { - write!(f, "{separator} ")?; + write!(f, "{separator}")?; } let offset = values.len() - n_right; - describe_sequence_all(f, &values[offset..], offset, el_limit, &repr)?; + describe_sequence_all(f, &values[offset..], offset, el_limit, separator, &repr)?; } Ok(()) } diff --git a/tests/tests/extensions/simple/type_class/core_yamls.yaml b/tests/tests/extensions/simple/type_class/core_yamls.yaml new file mode 100644 index 00000000..7e91cb0e --- /dev/null +++ b/tests/tests/extensions/simple/type_class/core_yamls.yaml @@ -0,0 +1,35 @@ +name: core-type-extensions +plan: + __test: + - level: i + extensionUris: + - extensionUriAnchor: 1 + uri: /extension_types.yaml + extensions: + - extensionType: + extensionUriReference: 1 + typeAnchor: 1 + name: point + - extensionType: + extensionUriReference: 1 + typeAnchor: 2 + name: line + relations: + - rel: + read: + common: + direct: {} + baseSchema: + names: [x, y] + struct: + nullability: NULLABILITY_REQUIRED + types: + - user_defined: + typeReference: 1 + nullability: NULLABILITY_REQUIRED + - user_defined: + typeReference: 2 + nullability: NULLABILITY_REQUIRED + namedTable: + names: ["test"] + __test: [ type: "NSTRUCT" ]