diff --git a/Cargo.lock b/Cargo.lock index d66e29d..c3ae914 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,6 +70,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "autofront" version = "0.0.1" @@ -77,6 +83,8 @@ dependencies = [ "annotate-snippets", "anstream", "fluent-i18n", + "macros", + "num-bigint", "unicode-ident", "unicode-normalization", "unicode-width", @@ -297,12 +305,49 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "macros" +version = "0.0.1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell_polyfill" version = "1.70.2" @@ -317,9 +362,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" dependencies = [ "unicode-ident", ] @@ -424,9 +469,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 143ce52..77641e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "autofront" -version = "0.0.1" -edition = "2024" -homepage = "https://autolang.dev" -repository = "https://github.com/AutoLang-Dev/autofront" -license = "Apache-2.0" +version.workspace = true +edition.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true [profile.release] opt-level = 3 @@ -19,7 +19,18 @@ unicode-normalization = "0.1.25" unicode-width = "0.2.2" annotate-snippets = "0.12.10" anstream = "0.6.21" +num-bigint = "0.4.6" +macros = { path = "macros" } [patch.crates-io] annotate-snippets = { git = "https://github.com/KeqingMoe/annotate-snippets-rs.git", branch = "fix-unicode-patch" } +[workspace] +members = ["macros/"] + +[workspace.package] +version = "0.0.1" +edition = "2024" +homepage = "https://autolang.dev" +repository = "https://github.com/AutoLang-Dev/autofront" +license = "Apache-2.0" diff --git a/locale/en-US/cli.ftl b/locale/en-US/cli.ftl index bf704b7..673a510 100644 --- a/locale/en-US/cli.ftl +++ b/locale/en-US/cli.ftl @@ -15,6 +15,7 @@ cli_help = Usage: autofront [OPTIONS] version Print version info lex Lexical analysis (unstable) tt Build TokenTree (unstable) + parse Parse file (unstable) Options: -o Write output to FILE @@ -24,6 +25,7 @@ cli_help_help = help []: Display help message cli_help_version = version: Print version info cli_help_lex = lex : Perform lexical analysis and print the results cli_help_tt = tt : Build a TokenTree +cli_help_parse = parse : Parse file and print AST cli_file = file cli_option = option diff --git a/locale/en-US/parser.ftl b/locale/en-US/parser.ftl new file mode 100644 index 0000000..8a1c33f --- /dev/null +++ b/locale/en-US/parser.ftl @@ -0,0 +1,9 @@ +mixed_and_or = cannot mix && and || operators +chained_range = cannot chain ~ or ~= operators +chained_assign = cannot chain assignment operators +unexpected_token = unexpected token {$token} +unexpected_group = unexpected group {$delim} +unexpected_end = expected more tokens, but reached end of file or closing bracket +bad_neq = != cannot be chained or mixed with other comparison operators +bad_3way = <=> cannot be chained or mixed with other comparison operators +mixed_greater_less = < and <= cannot be mixed with > and >= diff --git a/locale/zh-CN/cli.ftl b/locale/zh-CN/cli.ftl index 3075c70..bec81a3 100644 --- a/locale/zh-CN/cli.ftl +++ b/locale/zh-CN/cli.ftl @@ -15,6 +15,7 @@ cli_help = 用法:autofront <命令> [选项] version 显示版本 lex <文件> 词法分析(不稳定) tt <文件> 构建 TokenTree(不稳定) + parse <文件> 解析文件(不稳定) 选项: -o <文件> 写入输出到文件 @@ -23,7 +24,8 @@ cli_help = 用法:autofront <命令> [选项] cli_help_help = help [<命令>]:显示帮助信息 cli_help_version = version:显示版本信息 cli_help_lex = lex <文件>:对文件进行词法分析并打印结果 -cli_help_tt = tt <文件>:构建 ToenTree +cli_help_tt = tt <文件>:构建 TokenTree +cli_help_parse = parse <文件>:解析文件并打印 AST cli_file = 文件 cli_option = 选项 diff --git a/locale/zh-CN/parser.ftl b/locale/zh-CN/parser.ftl new file mode 100644 index 0000000..26b068c --- /dev/null +++ b/locale/zh-CN/parser.ftl @@ -0,0 +1,9 @@ +mixed_and_or = 不能混合使用 && 和 || 运算符 +chained_range = 不能连用 ~ 或 ~= 运算符 +chained_assign = 不能连用赋值运算符 +unexpected_token = 意料之外的 Token {$token} +unexpected_group = 意料之外的括号 {$delim} +unexpected_end = 期待更多 Token,但已抵达文件尾或闭括号 +bad_neq = != 不能连用或与其他比较运算符混用 +bad_3way = <=> 不能连用或与其他比较运算符混用 +mixed_greater_less = < 和 <= 不能与 > 和 >= 混用 diff --git a/macros/Cargo.toml b/macros/Cargo.toml new file mode 100644 index 0000000..9b9fdde --- /dev/null +++ b/macros/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "macros" +version.workspace = true +edition.workspace = true +license.workspace = true +publish = false + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0.104" +quote = "1.0.42" +syn = { version = "2.0.113", features = ["full", "extra-traits"] } diff --git a/macros/src/lib.rs b/macros/src/lib.rs new file mode 100644 index 0000000..3d3c9b5 --- /dev/null +++ b/macros/src/lib.rs @@ -0,0 +1,27 @@ +mod names; +mod parse; +mod print; +mod recover; +mod span; + +use proc_macro::TokenStream; + +#[proc_macro_derive(AstPrint)] +pub fn derive_ast_print(input: TokenStream) -> TokenStream { + print::derive_ast_print(input) +} + +#[proc_macro_derive(Parse, attributes(option, boxed, optbox))] +pub fn derive_parse(input: TokenStream) -> TokenStream { + parse::derive_parse(input) +} + +#[proc_macro_derive(Recover, attributes(recover))] +pub fn derive_recover(input: TokenStream) -> TokenStream { + recover::derive_recover(input) +} + +#[proc_macro_derive(Span, attributes(span))] +pub fn derive_span(input: TokenStream) -> TokenStream { + span::derive_span(input) +} diff --git a/macros/src/names.rs b/macros/src/names.rs new file mode 100644 index 0000000..0f82d62 --- /dev/null +++ b/macros/src/names.rs @@ -0,0 +1,34 @@ +#![allow(non_snake_case)] + +use proc_macro2::TokenStream; +use quote::quote; + +pub struct Names { + pub Parse: TokenStream, + pub ParseBuffer: TokenStream, + pub Result: TokenStream, + pub ParseError: TokenStream, + pub DiagSink: TokenStream, + pub AstPrint: TokenStream, + pub Spanned: TokenStream, + pub Span: TokenStream, + pub Error: TokenStream, +} + +impl Names { + pub fn new() -> Self { + let ns = quote! { crate::pipelines::parser }; + + Self { + Parse: quote! { #ns::syntax::parse::Parse }, + ParseBuffer: quote! { #ns::buffer::ParseBuffer }, + Result: quote! { #ns::syntax::parse::Result }, + ParseError: quote! { #ns::syntax::parse::ParseError }, + DiagSink: quote! { crate::utils::DiagSink }, + AstPrint: quote! { #ns::print::AstPrint }, + Spanned: quote! { #ns::span::Spanned }, + Span: quote! { crate::utils::Span }, + Error: quote! { #ns::syntax::token::Error }, + } + } +} diff --git a/macros/src/parse.rs b/macros/src/parse.rs new file mode 100644 index 0000000..4ea5e90 --- /dev/null +++ b/macros/src/parse.rs @@ -0,0 +1,89 @@ +use proc_macro::TokenStream; +use quote::{format_ident, quote}; +use syn::spanned::Spanned; +use syn::{Data, DeriveInput, Fields, parse_macro_input}; + +use crate::names::Names; + +pub fn derive_parse(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + expand_derive_parse(&input) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} + +fn expand_derive_parse(input: &DeriveInput) -> syn::Result { + let Names { + Parse, + ParseBuffer, + Result, + DiagSink, + .. + } = Names::new(); + + let name = &input.ident; + let generics = &input.generics; + + let Data::Struct(data_struct) = &input.data else { + return Err(syn::Error::new( + input.span(), + "Parse can only be derived for structs", + )); + }; + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let parse = quote! { parse }; + let parse_required = quote! { parse_required }; + let get_parser = |i| { + if i == 0 { &parse } else { &parse_required } + }; + + let (parse_stmts, construction) = match &data_struct.fields { + Fields::Named(fields) => { + let field_names = fields.named.iter().map(|f| &f.ident); + let parse_stmts = field_names.clone().enumerate().map(|(i, ident)| { + let parser = get_parser(i); + quote! { let #ident = input.#parser(sink)?; } + }); + let construction = quote! { #name { #( #field_names, )* } }; + (quote! { #(#parse_stmts)* }, construction) + } + + Fields::Unnamed(fields) => { + let vars: Vec<_> = (0..fields.unnamed.len()) + .map(|i| format_ident!("_{i}")) + .collect(); + + let parse_stmts = vars.iter().enumerate().map(|(i, var)| { + let parser = get_parser(i); + quote! { let #var = input.#parser(sink)?; } + }); + + ( + quote! { #(#parse_stmts)* }, + quote! { #name ( #( #vars, )* ) }, + ) + } + + Fields::Unit => (quote! {}, quote! { #name }), + }; + + Ok(quote! { + impl #impl_generics #Parse for #name #ty_generics #where_clause { + fn parse( + input: &#ParseBuffer, + sink: &mut #DiagSink + ) -> #Result { + #parse_stmts + Ok(#construction) + } + } + + impl #impl_generics #Parse for std::option::Option<#name> { + fn parse(input: &#ParseBuffer, sink: &mut #DiagSink) -> #Result { + input.try_parse(sink) + } + } + }) +} diff --git a/macros/src/print.rs b/macros/src/print.rs new file mode 100644 index 0000000..61da350 --- /dev/null +++ b/macros/src/print.rs @@ -0,0 +1,151 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Data, DeriveInput, Fields, Index, parse_macro_input}; + +use crate::names::Names; + +pub fn derive_ast_print(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + expand_derive_ast_print(&input) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} + +fn expand_derive_ast_print(input: &DeriveInput) -> syn::Result { + let name = &input.ident; + let generics = &input.generics; + + match &input.data { + Data::Struct(data_struct) => expand_struct(name, generics, data_struct), + Data::Enum(data_enum) => expand_enum(name, generics, data_enum), + _ => Err(syn::Error::new( + input.span(), + "AstPrint only supports structs and enums", + )), + } +} + +fn expand_struct( + name: &syn::Ident, + generics: &syn::Generics, + data_struct: &syn::DataStruct, +) -> syn::Result { + let Names { AstPrint, .. } = Names::new(); + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let (open_token, close_token, field_prints) = match &data_struct.fields { + Fields::Named(fields) => { + let prints = fields.named.iter().map(|f| { + let fname = &f.ident; + quote! { + { + let mut buffer = String::new(); + #AstPrint::print(&self.#fname, &mut buffer)?; + let mut first = true; + for line in buffer.lines() { + if !first { + writeln!(f)?; + } + write!(f, " ")?; + if first { + write!(f, "{}: ", stringify!(#fname))?; + first = false; + } + write!(f, "{}", line)?; + } + writeln!(f, ",")?; + } + } + }); + ("{", "}", quote! { #(#prints)* }) + } + Fields::Unnamed(fields) => { + let prints = fields.unnamed.iter().enumerate().map(|(i, _field)| { + let index = Index::from(i); + quote! { + { + let mut buffer = String::new(); + #AstPrint::print(&self.#index, &mut buffer)?; + + let mut first = true; + for line in buffer.lines() { + if !first { + writeln!(f)?; + } + write!(f, " ")?; + if first { + first = false; + } + write!(f, "{}", line)?; + } + writeln!(f, ",")?; + } + } + }); + ("(", ")", quote! { #(#prints)* }) + } + Fields::Unit => { + return Ok(quote! { + impl #impl_generics #AstPrint for #name #ty_generics #where_clause { + fn print(&self, f: &mut impl std::fmt::Write) -> std::fmt::Result { + write!(f, "{} {{}}", stringify!(#name)) + } + } + }); + } + }; + + Ok(quote! { + impl #impl_generics crate::pipelines::parser::print::AstPrint for #name #ty_generics #where_clause { + fn print(&self, f: &mut impl std::fmt::Write) -> std::fmt::Result { + writeln!(f, "{} {} ", stringify!(#name), #open_token)?; + #field_prints + write!(f, "{}", #close_token) + } + } + }) +} + +fn expand_enum( + name: &syn::Ident, + generics: &syn::Generics, + data_enum: &syn::DataEnum, +) -> syn::Result { + let Names { AstPrint, .. } = Names::new(); + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let match_arms = data_enum.variants.iter().map(|variant| { + let vname = &variant.ident; + match &variant.fields { + Fields::Unnamed(fields) if fields.unnamed.len() == 1 => { + Ok(quote! { + #name::#vname(inner) => #AstPrint::print(inner, f), + }) + } + + Fields::Unit => { + Ok(quote! { + #name::#vname => write!(f, "{}", stringify!(#vname)), + }) + } + + _ => Err(syn::Error::new( + variant.span(), + "Enum variants must be unit or single-element tuple for AstPrint (current limitation)", + )), + } + }).collect::>>()?; + + Ok(quote! { + impl #impl_generics #AstPrint for #name #ty_generics #where_clause { + fn print(&self, f: &mut impl std::fmt::Write) -> std::fmt::Result { + match self { + #(#match_arms)* + } + } + } + }) +} diff --git a/macros/src/recover.rs b/macros/src/recover.rs new file mode 100644 index 0000000..60e28fa --- /dev/null +++ b/macros/src/recover.rs @@ -0,0 +1,229 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Fields, parse_macro_input}; + +use crate::names::Names; + +pub fn derive_recover(input: TokenStream) -> TokenStream { + let Names { + Parse, + ParseBuffer, + Result, + ParseError, + DiagSink, + Span, + Error, + .. + } = Names::new(); + + let input = parse_macro_input!(input as DeriveInput); + + let mut sync_point = None; + let mut try_parse_fn = None; + let mut err_var = None; + + for attr in &input.attrs { + if attr.path().is_ident("recover") { + let result = attr.parse_args_with(|parser: syn::parse::ParseStream| { + while !parser.is_empty() { + let lookahead = parser.lookahead1(); + if lookahead.peek(syn::Ident) { + let ident: syn::Ident = parser.parse()?; + let _ = parser.parse::()?; + + if ident == "sync_point" { + let lit: syn::LitStr = parser.parse()?; + let tokens: proc_macro2::TokenStream = lit.value().parse().map_err(|e| { + syn::Error::new(lit.span(), format!("invalid token stream: {}", e)) + })?; + sync_point = Some(tokens); + } else if ident == "try_parse" { + let lit: syn::LitStr = parser.parse()?; + let tokens: proc_macro2::TokenStream = lit.value().parse().map_err(|e| { + syn::Error::new(lit.span(), format!("invalid token stream: {}", e)) + })?; + try_parse_fn = Some(tokens); + } else if ident == "err_var" { + let lit: syn::LitStr = parser.parse()?; + err_var = Some(lit.value()); + } else { + return Err(syn::Error::new(ident.span(), "unknown attribute")); + } + } else { + return Err(lookahead.error()); + } + + if !parser.is_empty() { + let _ = parser.parse::()?; + } + } + + if sync_point.is_none() || try_parse_fn.is_none() || err_var.is_none() { + return Err(syn::Error::new_spanned( + attr, + "missing required attributes: sync_point, try_parse, err_var", + )); + } + + Ok(()) + }); + + if let Err(e) = result { + return e.to_compile_error().into(); + } + } + } + + let sync_point = sync_point.expect("sync_point attribute required"); + let try_parse_fn = try_parse_fn.expect("try_parse attribute required"); + let err_var = err_var.expect("err_var attribute required"); + + let data = match &input.data { + Data::Enum(data) => data, + _ => { + return syn::Error::new_spanned(&input, "Recover can only be derived for enums") + .to_compile_error() + .into(); + } + }; + + let err_var_ident = syn::Ident::new(&err_var, proc_macro2::Span::call_site()); + let err_var_found = data.variants.iter().find(|v| v.ident == err_var_ident); + + if let Some(variant) = err_var_found { + if let Fields::Unnamed(fields) = &variant.fields { + if fields.unnamed.len() != 1 { + return syn::Error::new_spanned( + variant, + "Error variant must be a single-element tuple", + ) + .to_compile_error() + .into(); + } + + let field_type = &fields.unnamed[0].ty; + if let syn::Type::Path(type_path) = field_type { + if type_path.path.segments.last().unwrap().ident != "Error" { + return syn::Error::new_spanned(field_type, "Error variant must contain Error type") + .to_compile_error() + .into(); + } + } else { + return syn::Error::new_spanned(field_type, "Error variant must contain Error type") + .to_compile_error() + .into(); + } + } else { + return syn::Error::new_spanned(variant, "Error variant must be a tuple variant") + .to_compile_error() + .into(); + } + } else { + return syn::Error::new_spanned( + &input.ident, + format!("Error variant '{}' not found", err_var), + ) + .to_compile_error() + .into(); + } + + let ident = &input.ident; + + let expanded = quote! { + impl #Parse for #ident { + fn parse(input: &#ParseBuffer, sink: &mut #DiagSink) -> #Result { + let start = input.pos(); + let result = #try_parse_fn(input, sink); + if result.is_err() { + while !input.is_empty() && !crate::peek!(#sync_point where input) { + input.advance(); + } + + let error = #Error { + span: #Span { + start, + end: input.pos(), + }, + }; + + Ok(#ident::#err_var_ident(error)) + } else { + result + } + } + } + + impl #Parse for std::boxed::Box<#ident> { + fn parse(input: &#ParseBuffer, sink: &mut #DiagSink) -> #Result { + <#ident as #Parse>::parse(input, sink).map(std::boxed::Box::new) + } + } + + impl #Parse for std::option::Option<#ident> { + fn parse(input: &#ParseBuffer, sink: &mut #DiagSink) -> #Result { + let snapshot = input.snapshot(sink); + let start = input.pos(); + let result = #try_parse_fn(input, sink); + let result = match result { + Ok(node) => Some(node), + Err(#ParseError::Never) => { + input.restore(sink, snapshot); + None + } + _ => { + while !input.is_empty() && !crate::peek!(#sync_point where input) { + input.advance(); + } + + let error = #Error { + span: #Span { + start, + end: input.pos(), + }, + }; + + Some(#ident::#err_var_ident(error)) + } + }; + + Ok(result) + } + } + + + impl #Parse for std::option::Option> { + fn parse(input: &#ParseBuffer, sink: &mut #DiagSink) -> #Result { + let snapshot = input.snapshot(sink); + let start = input.pos(); + let result = #try_parse_fn(input, sink); + let result = match result { + Ok(node) => Some(std::boxed::Box::new(node)), + + Err(#ParseError::Never) => { + input.restore(sink, snapshot); + None + } + + _ => { + while !input.is_empty() && !crate::peek!(#sync_point where input) { + input.advance(); + } + + let error = #Error { + span: #Span { + start, + end: input.pos(), + }, + }; + + Some(std::boxed::Box::new(#ident::#err_var_ident(error))) + } + }; + + Ok(result) + } + } + }; + + TokenStream::from(expanded) +} diff --git a/macros/src/span.rs b/macros/src/span.rs new file mode 100644 index 0000000..978be3b --- /dev/null +++ b/macros/src/span.rs @@ -0,0 +1,172 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Field, Fields, Index, parse_macro_input}; + +use crate::names::Names; + +pub fn derive_span(input: TokenStream) -> TokenStream { + let Names { Spanned, Span, .. } = Names::new(); + + let input = parse_macro_input!(input as DeriveInput); + let name = &input.ident; + let generics = &input.generics; + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let impl_head = quote! { impl #impl_generics #Spanned for #name #ty_generics #where_clause }; + + match &input.data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => { + if fields.named.is_empty() { + return syn::Error::new_spanned(&data.fields, "struct must have at least one field") + .to_compile_error() + .into(); + } + + let span_fields: Vec<&Field> = fields + .named + .iter() + .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("span"))) + .collect(); + + if span_fields.is_empty() { + let first_field = &fields.named.first().unwrap().ident; + let last_field = &fields.named.last().unwrap().ident; + + quote! { + #impl_head { + fn span(&self) -> #Span { + let first = #Spanned::span(&self.#first_field); + let last = #Spanned::span(&self.#last_field); + #Span::merge(first, last) + } + } + } + .into() + } else if span_fields.len() == 1 { + let field_ident = &span_fields[0].ident; + quote! { + #impl_head { + fn span(&self) -> #Span { + #Spanned::span(&self.#field_ident) + } + } + } + .into() + } else { + syn::Error::new_spanned(&fields.named, "only one field can be marked with #[span]") + .to_compile_error() + .into() + } + } + + Fields::Unnamed(fields) => { + if fields.unnamed.is_empty() { + return syn::Error::new_spanned( + &data.fields, + "tuple struct must have at least one element", + ) + .to_compile_error() + .into(); + } + + let span_indices: Vec = fields + .unnamed + .iter() + .enumerate() + .filter(|(_, f)| f.attrs.iter().any(|attr| attr.path().is_ident("span"))) + .map(|(i, _)| i) + .collect(); + + if span_indices.is_empty() { + let first_index = Index::from(0); + let last_index = Index::from(fields.unnamed.len() - 1); + + quote! { + #impl_head { + fn span(&self) -> #Span { + let first = #Spanned::span(&self.#first_index); + let last = #Spanned::span(&self.#last_index); + #Span::merge(first, last) + } + } + } + .into() + } else if span_indices.len() == 1 { + let index = span_indices[0]; + quote! { + #impl_head { + fn span(&self) -> #Span { + #Spanned::span(&self.#index) + } + } + } + .into() + } else { + syn::Error::new_spanned(&data.fields, "only one element can be marked with #[span]") + .to_compile_error() + .into() + } + } + + Fields::Unit => syn::Error::new_spanned(&data.fields, "unit structs are not supported") + .to_compile_error() + .into(), + }, + + Data::Enum(data) => { + if data.variants.is_empty() { + return syn::Error::new_spanned(&data.variants, "enum must have at least one variant") + .to_compile_error() + .into(); + } + + for variant in &data.variants { + match &variant.fields { + Fields::Unnamed(fields) => { + if fields.unnamed.len() != 1 { + return syn::Error::new_spanned( + variant, + "enum variant must be a single-element tuple", + ) + .to_compile_error() + .into(); + } + } + + _ => { + return syn::Error::new_spanned( + variant, + "enum variant must be a single-element tuple", + ) + .to_compile_error() + .into(); + } + } + } + + let match_arms = data.variants.iter().map(|variant| { + let variant_name = &variant.ident; + quote! { + #name::#variant_name(inner) => #Spanned::span(&inner) + } + }); + + quote! { + #impl_head { + fn span(&self) -> #Span { + match self { + #(#match_arms),* + } + } + } + } + .into() + } + + Data::Union(_) => syn::Error::new_spanned(&input, "union types are not supported") + .to_compile_error() + .into(), + } +} diff --git a/src/cli/cmd.rs b/src/cli/cmd.rs index c319acf..1ee437b 100644 --- a/src/cli/cmd.rs +++ b/src/cli/cmd.rs @@ -16,6 +16,7 @@ pub enum Command { Version, Lex(DebugSubcommand), Tt(DebugSubcommand), + Parse(DebugSubcommand), } #[derive(Debug, Clone)] diff --git a/src/cli/help.rs b/src/cli/help.rs index 6083850..25bd607 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -7,6 +7,7 @@ pub fn print_help(topic: Option<&str>) -> Result<(), CliError> { Some("version") => print_version_help(), Some("lex") => print_lex_help(), Some("tt") => print_tt_help(), + Some("parse") => print_parse_help(), Some(cmd) => { return Err(CliError::UnknownCommand(cmd.to_string())); } @@ -33,3 +34,7 @@ fn print_lex_help() { fn print_tt_help() { anstream::println!("{}", tr!(cli_help_tt)); } + +fn print_parse_help() { + anstream::println!("{}", tr!(cli_help_parse)); +} diff --git a/src/cli/parser.rs b/src/cli/parser.rs index 9c9e4b1..00cf3ca 100644 --- a/src/cli/parser.rs +++ b/src/cli/parser.rs @@ -59,7 +59,7 @@ pub fn parse_args(args: &[String]) -> Result { Some(topic) => { let topic = topic.clone(); match topic.as_str() { - "help" | "version" | "lex" | "tt" => Some(topic), + "help" | "version" | "lex" | "tt" | "parse" => Some(topic), _ => Err(CliError::UnknownCommand(topic))?, } } @@ -72,6 +72,7 @@ pub fn parse_args(args: &[String]) -> Result { "lex" => Ok(Command::Lex(DebugSubcommand::parse(args)?)), "tt" => Ok(Command::Tt(DebugSubcommand::parse(args)?)), + "parse" => Ok(Command::Parse(DebugSubcommand::parse(args)?)), other => Err(CliError::UnknownCommand(other.into())), } diff --git a/src/driver.rs b/src/driver.rs index 0aeeb82..594b7e6 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -13,6 +13,7 @@ use crate::{ cli::{Command, DebugSubcommand, parse_args, print_help}, pipelines::{ lexer::{Source, lex}, + parser::parse, tokentree::parse_token_tree, }, tr, @@ -103,6 +104,30 @@ impl Driver { Ok(()) } + fn parse(&mut self, args: DebugSubcommand) -> Result<(), Box> { + let DebugSubcommand { + file, + output, + show_recovery, + } = args; + + let mut sink = DiagSink::default(); + let src = self.load(file)?; + + let tokens = lex(&src, &mut sink); + let ts = parse_token_tree(&tokens, &mut sink); + let ast = parse(&ts, &mut sink); + + let printer = DiagPrinter::new(&src); + printer.print(sink, show_recovery)?; + + let output = output.unwrap_or_else(|| format!("{}.out", src.file)); + let out = get_out!(Some(output)); + writeln!(out, "{ast:#?}")?; + + Ok(()) + } + fn drive(mut self) -> Result<(), Box> { use Command::*; @@ -115,6 +140,7 @@ impl Driver { Version => anstream::println!("{}", env!("CARGO_PKG_VERSION")), Lex(args) => self.lex(args)?, Tt(args) => self.tt(args)?, + Parse(args) => self.parse(args)?, } Ok(()) diff --git a/src/pipelines.rs b/src/pipelines.rs index f98ddc3..c3125f0 100644 --- a/src/pipelines.rs +++ b/src/pipelines.rs @@ -1,2 +1,3 @@ pub mod lexer; +pub mod parser; pub mod tokentree; diff --git a/src/pipelines/lexer.rs b/src/pipelines/lexer.rs index c74f9a7..ca03493 100644 --- a/src/pipelines/lexer.rs +++ b/src/pipelines/lexer.rs @@ -702,7 +702,14 @@ impl<'src, 'sink> Lexer<'src, 'sink> { _ if self.first_oper() => Oper(self.lex_oper()?), _ if self.first_bytes() => Bytes(self.lex_bytes()?), _ if self.first_byte() => Byte(self.lex_byte()?), - _ if self.first_ident() => Ident(self.lex_ident()), + _ if self.first_ident() => { + let ident = self.lex_ident(); + match ident.as_str() { + "true" => Bool(true), + "false" => Bool(false), + _ => Ident(ident), + } + } _ => 'outer: { let c = self.next().unwrap(); diff --git a/src/pipelines/lexer/print.rs b/src/pipelines/lexer/print.rs index f6cd510..5d0b88d 100644 --- a/src/pipelines/lexer/print.rs +++ b/src/pipelines/lexer/print.rs @@ -155,6 +155,7 @@ impl Display for TokenKind { write!(f, "\"") } Int(i) => write!(f, "{}", i), + Bool(flag) => write!(f, "{}", flag), Suffix(s) => write!(f, "{}", s), Delim(delim, kind) => { let ch = match kind { @@ -182,6 +183,7 @@ impl Display for Token { Byte(_) => (206, 145, 120), Bytes(_) => (206, 145, 120), Int(_) => (181, 206, 168), + Bool(_) => (78, 201, 176), Suffix(_) => (134, 198, 145), Delim(_, _) => (212, 212, 212), Error(_) => (244, 71, 71), diff --git a/src/pipelines/lexer/token.rs b/src/pipelines/lexer/token.rs index c519cf8..a9c0ed5 100644 --- a/src/pipelines/lexer/token.rs +++ b/src/pipelines/lexer/token.rs @@ -186,6 +186,7 @@ pub enum TokenKind { Byte(u8), Bytes(Vec), Int(IntLit), + Bool(bool), Suffix(String), Delim(Delimiter, DelimKind), Error(String), @@ -194,7 +195,10 @@ pub enum TokenKind { impl TokenKind { pub fn is_literal(&self) -> bool { use TokenKind::*; - matches!(self, Char(_) | Str(_) | Byte(_) | Bytes(_) | Int(_)) + matches!( + self, + Char(_) | Str(_) | Byte(_) | Bytes(_) | Int(_) | Bool(_) + ) } } diff --git a/src/pipelines/parser.rs b/src/pipelines/parser.rs new file mode 100644 index 0000000..6e7a3ec --- /dev/null +++ b/src/pipelines/parser.rs @@ -0,0 +1,22 @@ +mod buffer; +mod errors; +mod print; +mod span; +mod syntax; + +use crate::{ + pipelines::{ + parser::{ + buffer::ParseBuffer, + syntax::{Ast, Root, token::Separated}, + }, + tokentree::TokenStream, + }, + utils::DiagSink, +}; + +pub fn parse(ts: &TokenStream, sink: &mut DiagSink) -> Ast { + let input = ParseBuffer::new(ts); + let root = input.parse(sink).unwrap_or(Root(Separated::new())); + Ast { root } +} diff --git a/src/pipelines/parser/buffer.rs b/src/pipelines/parser/buffer.rs new file mode 100644 index 0000000..c516a38 --- /dev/null +++ b/src/pipelines/parser/buffer.rs @@ -0,0 +1,203 @@ +use std::{cell::Cell, fmt::Debug}; + +use crate::{ + pipelines::{ + lexer::{Delimiter, Token, TokenKind as TK}, + parser::{ + errors::*, + syntax::parse::{Parse, ParseError, Result}, + }, + tokentree::{Group, GroupDelim, TokenStream, TokenTree as TT}, + }, + utils::{DiagSink, DiagSnapshot}, +}; + +#[derive(Debug, Clone, Copy)] +pub struct ParseSnapshot(usize, DiagSnapshot); + +#[derive(Debug, Clone)] +pub struct ParseBuffer<'t> { + stream: &'t TokenStream, + cursor: Cell, +} + +impl<'t> ParseBuffer<'t> { + pub fn new(stream: &'t TokenStream) -> Self { + Self { + stream, + cursor: 0.into(), + } + } + + pub fn snapshot(&self, sink: &mut DiagSink) -> ParseSnapshot { + ParseSnapshot(self.cursor.get(), sink.snapshot()) + } + + pub fn restore(&self, sink: &mut DiagSink, snapshot: ParseSnapshot) { + self.cursor.set(snapshot.0); + sink.restore(snapshot.1); + } + + pub fn pos(&self) -> usize { + match self.peek() { + Some(tt) => tt.span().start, + None => self.stream.span.end, + } + } + + pub fn is_empty(&self) -> bool { + self.cursor.get() >= self.stream.len() + } + + pub fn peek(&self) -> Option<&TT> { + self.stream.get(self.cursor.get()) + } + + pub fn peek_token(&self) -> Option<&Token> { + match self.peek()? { + TT::Token(token) => Some(token), + _ => None, + } + } + + pub fn peek_kind(&self) -> Option<&TK> { + Some(&self.peek_token()?.kind) + } + + pub fn peek_token_or_delim(&self) -> Option { + match self.peek()? { + TT::Token(token) => Some(token.clone()), + TT::Delimited(group) => Some(group.token_open()), + } + } + + pub fn advance(&self) { + if !self.is_empty() { + self.cursor.update(|x| x + 1); + } + } + + pub fn parse(&self, sink: &mut DiagSink) -> Result { + T::parse(self, sink) + } + + pub fn parse_required(&self, sink: &mut DiagSink) -> Result { + self.parse_required_with(T::parse, sink) + } + + pub fn parse_required_with(&self, parser: F, sink: &mut DiagSink) -> Result + where + F: FnOnce(&Self, &mut DiagSink) -> Result, + { + match parser(self, sink) { + Err(e) => Err(e.into_fail()), + ok => ok, + } + } + + pub fn try_parse(&self, sink: &mut DiagSink) -> Result> { + self.try_parse_with(T::parse, sink) + } + + pub fn try_parse_with(&self, parser: F, sink: &mut DiagSink) -> Result> + where + F: FnOnce(&Self, &mut DiagSink) -> Result, + { + let snapshot = sink.snapshot(); + let pos = self.cursor.get(); + + let parsed = parser(self, sink); + + match parsed { + Ok(ok) => Ok(Some(ok)), + Err(ParseError::Never) => { + sink.restore(snapshot); + self.cursor.set(pos); + Ok(None) + } + Err(e) => Err(e), + } + } + + pub fn require(&self, sink: &mut DiagSink) -> Result<&TT> { + match self.peek() { + Some(required) => Ok(required), + None => { + sink.diag(UnexpectedEnd::new(self.stream.span.end)); + Err(ParseError::Never) + } + } + } + + pub fn expect_token(&self, sink: &mut DiagSink) -> Result<&Token> { + match self.require(sink)? { + TT::Token(token) => Ok(token), + TT::Delimited(group) => { + let token = group.token_open(); + sink.diag(UnexpectedToken::new(token)); + Err(ParseError::Never) + } + } + } + + pub fn expect_token_of(&self, expected: TK, sink: &mut DiagSink) -> Result<&Token> { + let tok = self.expect_token(sink)?; + if tok.kind != expected { + sink.diag(UnexpectedToken::new(tok.clone())); + return Err(ParseError::Never); + } + Ok(tok) + } + + pub fn parse_delimited_with( + &'t self, + delim: Delimiter, + parser: F, + sink: &mut DiagSink, + ) -> Result<(T, &'t Group)> + where + F: FnOnce(&Self, &mut DiagSink) -> Result, + { + match self.require(sink)? { + TT::Delimited(group) => { + self.advance(); + + if matches!(group.delim, GroupDelim::Mismatch(_, _)) { + return Err(ParseError::Never); + } + + if group.delim.open() != delim { + sink.diag(UnexpectedGroup::new(group.delim, group.span.span())); + return Err(ParseError::Never); + } + + let buffer = ParseBuffer::new(&group.stream); + let result = parser(&buffer, sink)?; + + if buffer.is_empty() { + return Ok((result, group)); + } + + let token = match buffer.peek().unwrap() { + TT::Delimited(group) => group.token_open(), + TT::Token(token) => token.clone(), + }; + sink.diag(UnexpectedToken::new(token)); + Err(ParseError::Fail) + } + + TT::Token(token) => { + sink.diag(UnexpectedToken::new(token.clone())); + Err(ParseError::Never) + } + } + } + + pub fn parse_delimited( + &self, + delim: Delimiter, + sink: &mut DiagSink, + ) -> Result<(T, &Group)> { + self.parse_delimited_with(delim, T::parse, sink) + } +} diff --git a/src/pipelines/parser/errors.rs b/src/pipelines/parser/errors.rs new file mode 100644 index 0000000..540cc1e --- /dev/null +++ b/src/pipelines/parser/errors.rs @@ -0,0 +1,195 @@ +use annotate_snippets::Group; + +use crate::{ + annotation_here, + pipelines::{ + lexer::{Source, Token}, + tokentree::GroupDelim, + }, + tre, + utils::{DiagPrinter, Diagnostics, Span, error}, +}; + +#[derive(Debug, Clone)] +pub struct UnexpectedEnd { + pos: usize, +} + +impl UnexpectedEnd { + pub fn new(pos: usize) -> Self { + Self { pos } + } +} + +impl Diagnostics for UnexpectedEnd { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + let span = (self.pos..self.pos).into(); + sink.error( + Group::with_title(error().primary_title(tre!(unexpected_end))) + .element(annotation_here!(src, span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct UnexpectedToken { + token: Token, +} + +impl UnexpectedToken { + pub fn new(token: Token) -> Self { + Self { token } + } +} + +impl Diagnostics for UnexpectedToken { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + let token = format!("{}", self.token); + sink.error( + Group::with_title(error().primary_title(tre!(unexpected_token, token))) + .element(annotation_here!(src, self.token.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct UnexpectedGroup { + delim: GroupDelim, + span: Span, +} + +impl UnexpectedGroup { + pub fn new(delim: GroupDelim, span: Span) -> Self { + Self { delim, span } + } +} + +impl Diagnostics for UnexpectedGroup { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + let delim = self.delim.char_open().to_string(); + sink.error( + Group::with_title(error().primary_title(tre!(unexpected_group, delim))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct BadNeq { + span: Span, +} + +impl BadNeq { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for BadNeq { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(bad_neq))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct Bad3Way { + span: Span, +} + +impl Bad3Way { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for Bad3Way { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(bad_3way))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct MixedGreaterLess { + span: Span, +} + +impl MixedGreaterLess { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for MixedGreaterLess { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(mixed_greater_less))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct MixedAndOr { + span: Span, +} + +impl MixedAndOr { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for MixedAndOr { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(mixed_and_or))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct ChainedRange { + span: Span, +} + +impl ChainedRange { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for ChainedRange { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(chained_range))) + .element(annotation_here!(src, self.span)), + ); + } +} + +#[derive(Debug, Clone)] +pub struct ChainedAssign { + span: Span, +} + +impl ChainedAssign { + pub fn new(span: Span) -> Self { + Self { span } + } +} + +impl Diagnostics for ChainedAssign { + fn report<'src>(&self, src: &'src Source, sink: &mut DiagPrinter<'src>) { + sink.error( + Group::with_title(error().primary_title(tre!(chained_assign))) + .element(annotation_here!(src, self.span)), + ); + } +} diff --git a/src/pipelines/parser/print.rs b/src/pipelines/parser/print.rs new file mode 100644 index 0000000..1f375b4 --- /dev/null +++ b/src/pipelines/parser/print.rs @@ -0,0 +1,89 @@ +use std::fmt::{self, Debug, Formatter, Write}; + +use num_bigint::BigInt; + +use crate::{pipelines::parser::syntax::*, utils::Span}; + +pub trait AstPrint: Debug { + fn print(&self, f: &mut impl Write) -> fmt::Result { + write!(f, "{self:#?}") + } +} + +impl AstPrint for Box { + fn print(&self, f: &mut impl Write) -> fmt::Result { + AstPrint::print(self.as_ref(), f) + } +} + +impl AstPrint for Option { + fn print(&self, f: &mut impl Write) -> fmt::Result { + match self { + Some(value) => AstPrint::print(value, f), + None => write!(f, "undefined"), + } + } +} + +macro_rules! impl_ast_print_by_default { + ($ty:ty) => { + impl AstPrint for $ty { + fn print(&self, f: &mut impl Write) -> fmt::Result { + write!(f, "{self:#?}") + } + } + }; +} + +macro_rules! impl_ast_print_by_default_for_all { + {$($name:ty),* $(,)?} => { + $( + impl_ast_print_by_default!($name); + )* + }; +} + +impl_ast_print_by_default_for_all! { + char, + Span, + String, + u8, + Vec, + BigInt, +} + +macro_rules! impl_ast_print_for_inner { + ($ty:ty) => { + impl AstPrint for $ty { + fn print(&self, f: &mut impl Write) -> fmt::Result { + write!(f, "elem: ")?; + self.elem.print(f)?; + + writeln!(f, ",")?; + + write!(f, "semi_tok: ")?; + self.semi_tok.print(f)?; + + writeln!(f, ",")?; + + write!(f, "lens: ")?; + self.lens.print(f)?; + + Ok(()) + } + } + }; +} + +impl_ast_print_for_inner!(ArrayInner); +impl_ast_print_for_inner!(RepeatInner); + +impl Debug for Ast { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if f.alternate() { + AstPrint::print(&self.root, f) + } else { + Debug::fmt(&self.root, f) + } + } +} diff --git a/src/pipelines/parser/span.rs b/src/pipelines/parser/span.rs new file mode 100644 index 0000000..303ab50 --- /dev/null +++ b/src/pipelines/parser/span.rs @@ -0,0 +1,217 @@ +use crate::{ + pipelines::{ + parser::syntax::{ + CaseArm, ExprBlock, ExprBreak, ExprCase, ExprCont, ExprFor, ExprIf, ExprLit, ExprReturn, + ExprWhile, FieldValue, FnSign, LocalDef, Param, + }, + tokentree::GroupSpan, + }, + utils::Span, +}; + +pub trait Spanned { + fn span(&self) -> Span; +} + +impl Spanned for Span { + fn span(&self) -> Span { + *self + } +} + +impl Spanned for GroupSpan { + fn span(&self) -> Span { + self.span() + } +} + +impl Spanned for &T { + fn span(&self) -> Span { + (*self).span() + } +} + +impl Spanned for Box { + fn span(&self) -> Span { + self.as_ref().span() + } +} + +impl Spanned for FieldValue { + fn span(&self) -> Span { + let first = self.name.span(); + match &self.init { + Some(last) => { + let last = last.span(); + Span::merge(first, last) + } + _ => first, + } + } +} + +impl Spanned for FnSign { + fn span(&self) -> Span { + let first = self.fn_tok.span(); + let last = match (&self.mut_tok, &self.params, &self.ret) { + (_, _, Some(last)) => last.span(), + (_, Some(last), _) => last.span(), + (Some(last), _, _) => last.span(), + _ => return first, + }; + Span::merge(first, last) + } +} + +impl Spanned for Param { + fn span(&self) -> Span { + let first = match &self.mut_tok { + Some(first) => first.span(), + _ => self.name.span(), + }; + let last = self.ty.span(); + Span::merge(first, last) + } +} + +impl Spanned for ExprCase { + fn span(&self) -> Span { + let first = match &self.label { + Some(first) => first.span(), + _ => self.case_tok.span(), + }; + let last = self.arms.span(); + Span::merge(first, last) + } +} + +impl Spanned for CaseArm { + fn span(&self) -> Span { + let first = match &self.label { + Some(first) => first.span(), + _ => self.cond.span(), + }; + let last = self.value.span(); + Span::merge(first, last) + } +} + +impl Spanned for ExprIf { + fn span(&self) -> Span { + let first = match &self.label { + Some(first) => first.span(), + _ => self.if_tok.span(), + }; + let last = match &self.else_branch { + Some(last) => last.span(), + _ => self.then_branch.span(), + }; + Span::merge(first, last) + } +} + +impl Spanned for ExprWhile { + fn span(&self) -> Span { + let first = match &self.label { + Some(first) => first.span(), + _ => self.while_tok.span(), + }; + let last = match &self.exit { + Some(last) => last.span(), + _ => self.body.span(), + }; + Span::merge(first, last) + } +} + +impl Spanned for ExprFor { + fn span(&self) -> Span { + let first = match &self.label { + Some(first) => first.span(), + _ => self.for_tok.span(), + }; + let last = match &self.exit { + Some(last) => last.span(), + _ => self.body.span(), + }; + Span::merge(first, last) + } +} + +impl Spanned for ExprReturn { + fn span(&self) -> Span { + let first = self.return_tok.span(); + match &self.value { + Some(last) => { + let last = last.span(); + Span::merge(first, last) + } + _ => first, + } + } +} + +impl Spanned for ExprBreak { + fn span(&self) -> Span { + let first = self.break_tok.span(); + let last = match (&self.label, &self.value) { + (_, Some(last)) => last.span(), + (Some(last), _) => last.span(), + _ => return first, + }; + Span::merge(first, last) + } +} + +impl Spanned for ExprCont { + fn span(&self) -> Span { + let first = self.cont_tok.span(); + let last = match (&self.label, &self.value) { + (_, Some(last)) => last.span(), + (Some(last), _) => last.span(), + _ => return first, + }; + Span::merge(first, last) + } +} + +impl Spanned for ExprBlock { + fn span(&self) -> Span { + let last = self.block.span(); + match &self.label { + Some(first) => { + let first = first.span(); + Span::merge(first, last) + } + _ => last, + } + } +} + +impl Spanned for ExprLit { + fn span(&self) -> Span { + let first = self.lit.span(); + match &self.suffix { + Some(last) => { + let last = last.span(); + Span::merge(first, last) + } + _ => first, + } + } +} + +impl Spanned for LocalDef { + fn span(&self) -> Span { + let first = match &self.mut_tok { + Some(first) => first.span(), + _ => self.name.span(), + }; + let last = match (&self.ty, &self.init) { + (_, Some(last)) => last.span(), + (Some(last), _) => last.span(), + _ => self.colon_tok.span(), + }; + Span::merge(first, last) + } +} diff --git a/src/pipelines/parser/syntax.rs b/src/pipelines/parser/syntax.rs new file mode 100644 index 0000000..d0de7f0 --- /dev/null +++ b/src/pipelines/parser/syntax.rs @@ -0,0 +1,509 @@ +pub mod parse; +pub mod pratt; +pub mod sync; +pub mod token; + +use std::fmt::Debug; + +use macros::{AstPrint, Parse, Recover, Span}; + +use crate::{ + Tok, parse, + pipelines::parser::syntax::{ + pratt::{Bp, Pratt}, + token::*, + }, + utils::DiagSink, +}; + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeInfer(pub Tok![_]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeIdent(pub Ident); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeFn { + pub fn_tok: Tok![fn], + pub mut_tok: Option, + pub params: TypeTuple, + pub ret: Ret, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeRef { + pub ref_tok: Tok![&], + pub mut_tok: Option, + pub pointee: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypePtr { + pub ptr_tok: Tok![*], + pub mut_tok: Option, + pub pointee: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct TypeParen(pub Tok![(Box)]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeTuple(pub Tok![(Type,)]); + +#[derive(Debug, Clone, Parse, Span)] +pub struct ArrayInner { + pub elem: Box, + pub semi_tok: Tok![;], + pub lens: Tok![Expr,], +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeArray(pub Tok![[ArrayInner]]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeSlice(pub Tok![[Box]]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct StructField { + pub name: Ident, + pub colon_tok: Tok![:], + pub lens: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct TypeStruct(pub Tok![{StructField,}]); + +#[derive(Debug, Clone, AstPrint, Span, Recover)] +#[recover( + sync_point = "sync::TypeSyncPoint", + try_parse = "Type::try_parse", + err_var = "Error" +)] +pub enum Type { + Infer(TypeInfer), + Ident(TypeIdent), + Paren(TypeParen), + Fn(TypeFn), + Ref(TypeRef), + Ptr(TypePtr), + Tuple(TypeTuple), + Array(TypeArray), + Slice(TypeSlice), + Struct(TypeStruct), + + Error(Error), +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprRange { + pub lhs: Box, + pub op: RangeOp, + pub rhs: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprTuple(pub Tok![(Expr,)]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprArray(pub Tok![[Expr,]]); + +#[derive(Debug, Clone, Parse, Span)] +pub struct RepeatInner { + pub elem: Box, + pub semi_tok: Tok![;], + pub lens: Tok![Expr,], +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprRepeat(pub Tok![[RepeatInner]]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct FieldInit { + pub colon_tok: Tok![:], + pub value: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct FieldValue { + pub name: Ident, + pub init: Option, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprStruct(pub Tok![{FieldValue,}]); + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprFn { + pub sign: FnSign, + pub eq_tok: Tok![=], + pub body: FnBody, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct FnSign { + pub fn_tok: Tok![fn], + pub mut_tok: Option, + pub params: Option, + pub ret: Option, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct Params(Tok![(Param,)]); + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct Param { + pub mut_tok: Option, + pub name: Ident, + pub colon_tok: Tok![:], + pub ty: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct Ret { + pub arrow_tok: Tok![->], + pub ty: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub enum FnBody { + Expr(Box), + Ffi(Ffi), + Asm(Asm), +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct Ffi { + extern_tok: Tok![extern], + abi: Tok![(Abi)], +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct Abi { + abi: LitStr, + comma_tok: Tok![,], + symbol: LitStr, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct Asm { + extern_tok: Tok![asm], + ir: Tok![{ LitStr }], +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprLit { + pub lit: Lit, + pub suffix: Option, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub enum Member { + Ident(Ident), + Index(LitInt), +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ExprIdent(pub Ident); + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprParen(pub Tok![(Box)]); + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprField { + pub base: Box, + pub dot_tok: Tok![.], + pub member: Member, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprAssign { + pub lhs: Box, + pub op: AssignOp, + pub rhs: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprCmp(pub Separated); + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprShift { + pub lhs: Box, + pub op: ShiftOp, + pub rhs: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprAdd { + pub lhs: Box, + pub op: AddOp, + pub rhs: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprMul { + pub lhs: Box, + pub op: MulOp, + pub rhs: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprLogical(pub Separated); + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprRef { + pub ref_tok: Tok![&], + pub mut_tok: Option, + pub pointee: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprDeref { + pub deref_tok: Tok![*], + pub pointee: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprPrefix { + pub op: PrefixOp, + pub operand: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprSuffix { + pub operand: Box, + pub op: SuffixOp, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprCast { + pub operand: Box, + pub as_tok: Tok![as], + pub ty: Box, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprCall { + pub callee: Box, + pub args: ExprTuple, +} + +#[derive(Debug, Clone, AstPrint, Span)] +pub struct ExprIndex { + pub base: Box, + pub indices: Tok![[Expr,]], +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprCase { + pub label: Option, + pub case_tok: Tok![case], + pub arms: Tok![{CaseArm,}], +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct CaseArm { + pub label: Option, + pub cond: Cond, + pub eq_tok: Tok![=], + pub value: Box, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprIf { + pub label: Option, + pub if_tok: Tok![if], + pub cond: Box, + pub then_branch: Block, + pub else_branch: Option, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprWhile { + pub label: Option, + pub while_tok: Tok![while], + pub cond: Box, + pub body: Block, + pub exit: Option, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprFor { + pub label: Option, + pub for_tok: Tok![for], + pub cond: Box, + pub in_tok: Tok![in], + pub range: Box, + pub body: Block, + pub exit: Option, +} + +#[derive(Debug, Clone, AstPrint, Parse, Span)] +pub struct ElseBranch { + pub else_tok: Tok![else], + pub body: Block, +} + +#[derive(Debug, Clone, AstPrint, Parse)] +pub struct ExprBlock { + pub label: Option, + pub block: Block, +} + +#[derive(Debug, Clone, AstPrint)] +pub struct ExprReturn { + pub return_tok: Tok![return], + pub value: Option>, +} + +#[derive(Debug, Clone, AstPrint)] +pub struct ExprBreak { + pub break_tok: Tok![break], + pub label: Option