diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 4ec15ac..6b3d52c 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -101,6 +101,82 @@ pub enum Token<'a> { #[token("function", accept_expression)] #[token("func", |lex| parse_non_posix_keyword(lex, Token::Function))] Function, + #[token("length", accept_expression)] + Length, + #[token("substr", accept_expression)] + Substr, + #[token("split", accept_expression)] + Split, + #[token("sub", accept_expression)] + Sub, + #[token("gsub", accept_expression)] + Gsub, + #[token("match", accept_expression)] + MatchFn, + #[token("index", accept_expression)] + Index, + #[token("sprintf", accept_expression)] + Sprintf, + #[token("toupper", accept_expression)] + Toupper, + #[token("tolower", accept_expression)] + Tolower, + #[token("gensub", |lex| parse_non_posix_keyword(lex, Token::Gensub))] + Gensub, + #[token("patsplit", |lex| parse_non_posix_keyword(lex, Token::Patsplit))] + Patsplit, + #[token("strtonum", |lex| parse_non_posix_keyword(lex, Token::Strtonum))] + Strtonum, + #[token("close", accept_expression)] + Close, + #[token("fflush", accept_expression)] + Fflush, + #[token("system", accept_expression)] + System, + #[token("int", accept_expression)] + Int, + #[token("sqrt", accept_expression)] + Sqrt, + #[token("exp", accept_expression)] + Exp, + #[token("log", accept_expression)] + Log, + #[token("sin", accept_expression)] + Sin, + #[token("cos", accept_expression)] + Cos, + #[token("atan2", accept_expression)] + Atan2, + #[token("rand", accept_expression)] + Rand, + #[token("srand", accept_expression)] + Srand, + #[token("systime", |lex| parse_non_posix_keyword(lex, Token::Systime))] + Systime, + #[token("mktime", |lex| parse_non_posix_keyword(lex, Token::Mktime))] + Mktime, + #[token("strftime", |lex| parse_non_posix_keyword(lex, Token::Strftime))] + Strftime, + #[token("typeof", |lex| parse_non_posix_keyword(lex, Token::Typeof))] + Typeof, + #[token("isarray", |lex| parse_non_posix_keyword(lex, Token::Isarray))] + Isarray, + #[token("asort", |lex| parse_non_posix_keyword(lex, Token::Asort))] + Asort, + #[token("asorti", |lex| parse_non_posix_keyword(lex, Token::Asorti))] + Asorti, + #[token("and", |lex| parse_non_posix_keyword(lex, Token::And))] + And, + #[token("or", |lex| parse_non_posix_keyword(lex, Token::Or))] + Or, + #[token("xor", |lex| parse_non_posix_keyword(lex, Token::Xor))] + Xor, + #[token("compl", |lex| parse_non_posix_keyword(lex, Token::Compl))] + Compl, + #[token("lshift", |lex| parse_non_posix_keyword(lex, Token::Lshift))] + Lshift, + #[token("rshift", |lex| parse_non_posix_keyword(lex, Token::Rshift))] + Rshift, #[token("NR", accept_expression)] NrVariable, #[token("NF", accept_expression)] diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 673a9f3..81a9ee3 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -96,6 +96,7 @@ pub type Pattern<'a> = Either, SpecialPattern>; pub enum ExprNode<'a> { FunctionCall(Identifier<'a>, Vec<'a, Expr<'a>>), IndirectCall(Variable<'a>, Vec<'a, Expr<'a>>), + BuiltinCall(BuiltinFunction, Vec<'a, Expr<'a>>), UnaryOperation(UnaryOperator, Expr<'a>), BinaryOperation(BinaryOperator, Expr<'a>, Expr<'a>), UnaryPlaceOperation(UnaryPlaceOperator, Place<'a>), @@ -252,6 +253,49 @@ pub struct Function<'a> { pub body: Body<'a>, } +#[derive(Debug, Clone, Copy)] +#[repr(u8)] +pub enum BuiltinFunction { + Length, + Substr, + Split, + Sub, + Gsub, + Match, + Index, + Sprintf, + Toupper, + Tolower, + Gensub, + Patsplit, + Strtonum, + Close, + Fflush, + System, + Int, + Sqrt, + Exp, + Log, + Sin, + Cos, + Atan2, + Rand, + Srand, + Systime, + Mktime, + Strftime, + Typeof, + Isarray, + Asort, + Asorti, + And, + Or, + Xor, + Compl, + Lshift, + Rshift, +} + #[derive(Debug, Clone, Copy)] #[repr(u8)] pub enum Command { diff --git a/parser/src/idempotency.rs b/parser/src/idempotency.rs index 72be174..a947575 100644 --- a/parser/src/idempotency.rs +++ b/parser/src/idempotency.rs @@ -6,7 +6,7 @@ use std::fmt::{Debug, Display, Formatter, Result, Write}; use crate::{ - Ast, Function, + Ast, BuiltinFunction, Function, ast::{ ArrayOperator, Atom, BinaryOperator, BinaryPlaceOperator, BindingPower, Body, Command, Expr, ExprNode, Getline, Place, Redirection, Rule, RulePattern, SimpleStatement, Statement, @@ -264,6 +264,11 @@ impl Display for ExprNode<'_> { write_args(f, args, indent)?; write!(f, ")") } + Self::BuiltinCall(fun, args) => { + write!(f, "{fun}(")?; + write_args(f, args, indent)?; + write!(f, ")") + } Self::UnaryOperation(op, x) => { let bp = op.binding_power(); let child_w = encode(indent, bp.saturating_add(1)); @@ -424,6 +429,52 @@ impl Display for Command { } } +impl Display for BuiltinFunction { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let name = match self { + Self::Length => "length", + Self::Substr => "substr", + Self::Split => "split", + Self::Sub => "sub", + Self::Gsub => "gsub", + Self::Match => "match", + Self::Index => "index", + Self::Sprintf => "sprintf", + Self::Toupper => "toupper", + Self::Tolower => "tolower", + Self::Gensub => "gensub", + Self::Patsplit => "patsplit", + Self::Strtonum => "strtonum", + Self::Close => "close", + Self::Fflush => "fflush", + Self::System => "system", + Self::Int => "int", + Self::Sqrt => "sqrt", + Self::Exp => "exp", + Self::Log => "log", + Self::Sin => "sin", + Self::Cos => "cos", + Self::Atan2 => "atan2", + Self::Rand => "rand", + Self::Srand => "srand", + Self::Systime => "systime", + Self::Mktime => "mktime", + Self::Strftime => "strftime", + Self::Typeof => "typeof", + Self::Isarray => "isarray", + Self::Asort => "asort", + Self::Asorti => "asorti", + Self::And => "and", + Self::Or => "or", + Self::Xor => "xor", + Self::Compl => "compl", + Self::Lshift => "lshift", + Self::Rshift => "rshift", + }; + write!(f, "{name}") + } +} + impl Display for Redirection { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { diff --git a/parser/src/lex.rs b/parser/src/lex.rs index c0c6733..7de6725 100644 --- a/parser/src/lex.rs +++ b/parser/src/lex.rs @@ -11,7 +11,7 @@ use lexer::{Identifier, LexingError, Slice, Span, SpannedIter, Token}; use super::Result; use crate::{ ParsingError, - ast::{Command, SpecialPattern}, + ast::{BuiltinFunction, Command, SpecialPattern}, }; pub struct Lexer<'a> { @@ -200,6 +200,7 @@ pub trait TokenExt { fn is_place(&self) -> bool; fn is_pattern_start(&self) -> bool; fn maps_to_command(&self) -> Option; + fn maps_to_builtin(&self) -> Option; fn maps_to_special_pat(&self) -> Option; fn is_stmnt_end(&self) -> bool; fn is_stmnt_or_block_end(&self) -> bool; @@ -264,6 +265,50 @@ impl TokenExt for Token<'_> { _ => None, } } + + fn maps_to_builtin(&self) -> Option { + match self { + Token::Length => Some(BuiltinFunction::Length), + Token::Substr => Some(BuiltinFunction::Substr), + Token::Split => Some(BuiltinFunction::Split), + Token::Sub => Some(BuiltinFunction::Sub), + Token::Gsub => Some(BuiltinFunction::Gsub), + Token::MatchFn => Some(BuiltinFunction::Match), + Token::Index => Some(BuiltinFunction::Index), + Token::Sprintf => Some(BuiltinFunction::Sprintf), + Token::Toupper => Some(BuiltinFunction::Toupper), + Token::Tolower => Some(BuiltinFunction::Tolower), + Token::Gensub => Some(BuiltinFunction::Gensub), + Token::Patsplit => Some(BuiltinFunction::Patsplit), + Token::Strtonum => Some(BuiltinFunction::Strtonum), + Token::Close => Some(BuiltinFunction::Close), + Token::Fflush => Some(BuiltinFunction::Fflush), + Token::System => Some(BuiltinFunction::System), + Token::Int => Some(BuiltinFunction::Int), + Token::Sqrt => Some(BuiltinFunction::Sqrt), + Token::Exp => Some(BuiltinFunction::Exp), + Token::Log => Some(BuiltinFunction::Log), + Token::Sin => Some(BuiltinFunction::Sin), + Token::Cos => Some(BuiltinFunction::Cos), + Token::Atan2 => Some(BuiltinFunction::Atan2), + Token::Rand => Some(BuiltinFunction::Rand), + Token::Srand => Some(BuiltinFunction::Srand), + Token::Systime => Some(BuiltinFunction::Systime), + Token::Mktime => Some(BuiltinFunction::Mktime), + Token::Strftime => Some(BuiltinFunction::Strftime), + Token::Typeof => Some(BuiltinFunction::Typeof), + Token::Isarray => Some(BuiltinFunction::Isarray), + Token::Asort => Some(BuiltinFunction::Asort), + Token::Asorti => Some(BuiltinFunction::Asorti), + Token::And => Some(BuiltinFunction::And), + Token::Or => Some(BuiltinFunction::Or), + Token::Xor => Some(BuiltinFunction::Xor), + Token::Compl => Some(BuiltinFunction::Compl), + Token::Lshift => Some(BuiltinFunction::Lshift), + Token::Rshift => Some(BuiltinFunction::Rshift), + _ => None, + } + } fn maps_to_special_pat(&self) -> Option { match self { Self::BeginPattern => Some(SpecialPattern::Begin), diff --git a/parser/src/lib.rs b/parser/src/lib.rs index e2236bd..cc7f303 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -235,6 +235,10 @@ impl<'a> Parser<'a> { lex.next(); Some(self.parse_command(lex, name)) } + token if let Some(builtin) = token.maps_to_builtin() => { + lex.next(); + Some(self.parse_builtin_call(lex, builtin)) + } Token::Delete => { lex.next(); Some(self.parse_delete(lex)) @@ -481,6 +485,17 @@ impl<'a> Parser<'a> { Ok(SimpleStatement::Command { name, args, redirection }) } + #[tracing::instrument] + fn parse_builtin_call( + &mut self, + lex: &mut Lexer<'a>, + builtin: BuiltinFunction, + ) -> Result> { + let expr = + self.parse_function_call(lex, |args| ExprNode::BuiltinCall(builtin, args), lex.span())?; + Ok(SimpleStatement::Expression(expr)) + } + /// Parses arguments to command or function calls; consumes to the end of /// the argument list or short-circuits with `delimiter` if empty. fn parse_function_args(&mut self, lex: &mut Lexer<'a>) -> Result>> { diff --git a/parser/src/pratt.rs b/parser/src/pratt.rs index 2ca4d95..574d44c 100644 --- a/parser/src/pratt.rs +++ b/parser/src/pratt.rs @@ -274,6 +274,12 @@ impl<'a, 'b> Pratt<'a, 'b> { |args| ExprNode::FunctionCall(name.qualify(self.parser.namespace), args), lex.span(), ) + } else if let Some(builtin) = next.maps_to_builtin() { + self.parser.parse_function_call( + lex, + |args| ExprNode::BuiltinCall(builtin, args), + lex.span(), + ) } else if let Token::IndirectCall(name) = next { // Possible gawk bug: it accepts special variables if qualified, // even if it is with the `awk` namespace. diff --git a/parser/src/sexpr.rs b/parser/src/sexpr.rs index 4fc12aa..02e8b74 100644 --- a/parser/src/sexpr.rs +++ b/parser/src/sexpr.rs @@ -190,6 +190,13 @@ impl Debug for Expr<'_> { } write!(f, ")") } + ExprNode::BuiltinCall(ident, args) => { + write!(f, "({ident:?}")?; + for arg in args { + write!(f, " {arg:?}")?; + } + write!(f, ")") + } ExprNode::UnaryOperation(op, a) => write!(f, "({op:?} {a:?})"), ExprNode::BinaryOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"), ExprNode::BinaryPlaceOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),