regeda
diff --git a/‎Makefile‎
Lines changed: 5 additions & 5 deletions b/‎Makefile‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 28 additions & 9 deletions b/‎README.md‎
Lines changed: 28 additions & 9 deletions
diff --git a/‎asm/asm.go‎
Lines changed: 10 additions & 11 deletions b/‎asm/asm.go‎
Lines changed: 10 additions & 11 deletions
diff --git a/‎ast/ast.pb.go‎
Lines changed: 31 additions & 27 deletions b/‎ast/ast.pb.go‎
Lines changed: 31 additions & 27 deletions
diff --git a/‎ast/ast.proto‎
Lines changed: 7 additions & 6 deletions b/‎ast/ast.proto‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎ast/builder.go‎
Lines changed: 146 additions & 0 deletions b/‎ast/builder.go‎
Lines changed: 146 additions & 0 deletions
@@ -1,4 +1,4 @@
-.PHONY: gen-proto gen-fbs gen-lexer test bench bench-report escape-analysis
+.PHONY: gen-proto gen-fbs gen-tokenz test bench bench-report escape-analysis
 
 default: test
 
@@ -10,11 +10,11 @@ gen-fbs:
 	@rm -f bytecode/*.go
 	@flatc -g -o . bytecode/proto.fbs
 
-gen-lexer: gen-proto
-	@ragel -Z -G2 lexer/lexer.go.rl -o lexer/lexer.go
-	@goimports -w lexer/lexer.go
+gen-tokenz: gen-proto
+	@ragel -Z -G2 tokenz/tokenz.go.rl -o tokenz/tokenz.go
+	@goimports -w tokenz/tokenz.go
 
-test: gen-fbs gen-lexer
+test: gen-fbs gen-tokenz
 	@go test -v -cover ./...
 
 bench: test
 
@@ -1,4 +1,5 @@
 # Expr – a tiny stack-based virtual machine written in Go
+
 [![Build Status](https://travis-ci.com/regeda/expr.svg?branch=main)](https://travis-ci.com/regeda/expr)
 [![codecov](https://codecov.io/gh/regeda/expr/branch/main/graph/badge.svg?token=99QXNC2IAO)](https://codecov.io/gh/regeda/expr)
 [![Go Reference](https://pkg.go.dev/badge/gihub.com/regeda/expr.svg)](https://pkg.go.dev/github.com/regeda/expr)
@@ -90,16 +91,34 @@ contains([1, 2, 3], 4) // false
 ```
 
 ## Architecture
-The architecture consists of 3 components:
-1. Lexer
-2. Compiler
-3. Virtual Machine
 
-**The lexer** generates a syntax tree parsing the input text:
+The architecture consists of 4 components:
+1. Tokenizer
+2. Syntax Tree Builder
+3. Compiler
+4. Virtual Machine
+
+**The Tokenizer** parses the input text:
 ```
 join(",", ["a", "b"])
 ```
-The resulted syntax tree:
+and returns the following tokens:
+```
+IDENT join
+PUNCT (
+STR ","
+PUNCT ,
+PUNCT [
+STR "a"
+PUNCT ,
+STR "b"
+PUNCT ]
+PUNCT )
+```
+
+> The tokenizer is implemented using [Ragel State Machine Compiler](https://www.colm.net/open-source/ragel/).
+
+**The Syntax Tree Builder** generates a syntax tree from tokens:
 ```
 EXIT
 |-- CALL(join)
@@ -109,9 +128,9 @@ EXIT
        |-- STR("b")
 ```
 
-> The lexer is implemented using [Ragel State Machine Compiler](https://www.colm.net/open-source/ragel/). The syntax tree is described by [Protocol Buffers 3](https://developers.google.com/protocol-buffers/) to make it easy traversable by any programming language.
+> A schema of the syntax tree is described by [Protocol Buffers 3](https://developers.google.com/protocol-buffers/) to make it easy traversable by any programming language.
 
-**The compiler** makes a bytecode from the syntax tree to make it executable by **a stack-based virtual machine**:
+**The Compiler** makes a bytecode from the syntax tree to make it executable by **a stack-based virtual machine**:
 ```
 PUSH_STR ","
 PUSH_STR "a"
@@ -159,7 +178,7 @@ if err != nil {
 }
 // `addr` contains the result, see github.com/regeda/expr/memory.Addr
 ```
-> `Exec` is **not designed** to be run in concurrent environment. However, you can define a pool of executors to consume them in the safe mode.
+> `Exec` is **not designed** to be run in the concurrent environment. However, you can define a pool of executors to consume them in the safe mode.
 
 ## Benchmark
 
 
@@ -1,24 +1,23 @@
 package asm
 
 import (
+	"github.com/regeda/expr/ast"
 	"github.com/regeda/expr/compiler"
-	"github.com/regeda/expr/lexer"
+	"github.com/regeda/expr/tokenz"
 )
 
 type ASM struct {
-	lex  *lexer.Lexer
-	comp *compiler.Compiler
-}
-
-func New() *ASM {
-	return &ASM{
-		lex:  lexer.New(),
-		comp: compiler.New(),
-	}
+	comp compiler.Compiler
+	tkz  tokenz.Tokenz
+	astb ast.Builder
 }
 
 func (a *ASM) Assemble(code []byte) ([]byte, error) {
-	ast, err := a.lex.Parse(code)
+	tokens, err := a.tkz.Parse(code)
+	if err != nil {
+		return nil, err
+	}
+	ast, err := a.astb.Build(tokens)
 	if err != nil {
 		return nil, err
 	}
 
@@ -4,12 +4,13 @@ package ast;
 
 message Node {
   enum Token {
-    EXIT = 0;
-    CALL = 1;
-    STR  = 2;
-    INT  = 3;
-    BOOL = 4;
-    ARR  = 5;
+    EXIT  = 0;
+    CALL  = 1;
+    STR   = 2;
+    INT   = 3;
+    BOOL  = 4;
+    ARR   = 5;
+    IDENT = 6;
   }
 
   Token token = 1;
 
@@ -0,0 +1,146 @@
+package ast
+
+import (
+	"bytes"
+	"fmt"
+	"strconv"
+
+	"github.com/pkg/errors"
+	"github.com/regeda/expr/tokenz"
+)
+
+var (
+	errTkPunctWrongLen = errors.New("The token Punct should contain 1 byte of data")
+)
+
+var (
+	valueAfterPunct = []byte{'[', '(', ','}
+	commaAfterPunct = []byte{']', ')'}
+)
+
+type Builder struct {
+	st stack
+}
+
+func (b *Builder) Build(tokens []tokenz.Token) (*Node, error) {
+	b.st.reset()
+
+	b.st.push(Exit())
+
+	btk := tokenz.Token{Tk: tokenz.TkNone}
+
+	for _, t := range tokens {
+		switch t.Tk {
+		case tokenz.TkInt:
+			v, err := strconv.ParseInt(string(t.Dat), 10, 64)
+			if err != nil {
+				return nil, err
+			}
+			if !expectValueAfter(btk) {
+				return nil, fmt.Errorf("unexpected integer after %v", btk)
+			}
+			b.st.nest(Int(v))
+		case tokenz.TkStr:
+			v, err := strconv.Unquote(string(t.Dat))
+			if err != nil {
+				return nil, errors.Wrapf(err, "strconv.Unquote %s", t.Dat)
+			}
+			if !expectValueAfter(btk) {
+				return nil, fmt.Errorf("unexpected string after %v", btk)
+			}
+			b.st.nest(Str(v))
+		case tokenz.TkIdent:
+			if !expectValueAfter(btk) {
+				return nil, fmt.Errorf("unexpected ident after %v", btk)
+			}
+			b.st.push(Ident(string(t.Dat)))
+		case tokenz.TkTrue:
+			if !expectValueAfter(btk) {
+				return nil, fmt.Errorf("unexpected TRUE after %v", btk)
+			}
+			b.st.nest(True)
+		case tokenz.TkFalse:
+			if !expectValueAfter(btk) {
+				return nil, fmt.Errorf("unexpected FALSE after %v", btk)
+			}
+			b.st.nest(False)
+		case tokenz.TkPunct:
+			if !t.DatLen(1) {
+				return nil, errTkPunctWrongLen
+			}
+			switch t.Dat[0] {
+			case '[':
+				if !expectValueAfter(btk) {
+					return nil, fmt.Errorf("unexpected array after %v", btk)
+				}
+				b.st.push(b.st.nest(Arr()))
+			case ']':
+				switch btk.Tk {
+				case tokenz.TkInt, tokenz.TkStr, tokenz.TkTrue, tokenz.TkFalse, tokenz.TkPunct:
+				default:
+					return nil, fmt.Errorf("unexpected array closing after %v", btk)
+				}
+				n := b.st.pop()
+				if n.Token != Node_ARR {
+					return nil, fmt.Errorf("stack error: expected array, got %v", n.Token)
+				}
+			case '(':
+				switch btk.Tk {
+				case tokenz.TkIdent:
+					n := b.st.pop()
+					n.Token = Node_CALL
+					b.st.push(b.st.nest(n))
+				default:
+					return nil, fmt.Errorf("unexpected invokation after %v", btk)
+				}
+			case ')':
+				switch btk.Tk {
+				case tokenz.TkInt, tokenz.TkStr, tokenz.TkTrue, tokenz.TkFalse, tokenz.TkPunct:
+				default:
+					return nil, fmt.Errorf("unexpected invokation closing after %v", btk)
+				}
+				n := b.st.pop()
+				if n.Token != Node_CALL {
+					return nil, fmt.Errorf("stack error: expected invokation, got %v", n.Token)
+				}
+			case ',':
+				switch btk.Tk {
+				case tokenz.TkInt, tokenz.TkStr, tokenz.TkTrue, tokenz.TkFalse:
+					n := b.st.top()
+					if n.Token != Node_CALL && n.Token != Node_ARR {
+						return nil, fmt.Errorf("unexpected comma after %v", btk)
+					}
+				case tokenz.TkPunct:
+					if bytes.IndexByte(commaAfterPunct, btk.Dat[0]) == -1 {
+						return nil, fmt.Errorf("unexpected comma after %v", btk)
+					}
+				default:
+					return nil, fmt.Errorf("unexpected comma after %v", btk)
+				}
+			default:
+				return nil, fmt.Errorf("unexpected punct %s after %v", t.Dat, btk)
+			}
+		default:
+			return nil, fmt.Errorf("unexpected token %v", t)
+		}
+
+		btk = t
+	}
+
+	if b.st.len() != 1 {
+		return nil, fmt.Errorf("unexpected stack length %v", b.st.len())
+	}
+
+	return b.st[0], nil
+}
+
+func expectValueAfter(t tokenz.Token) bool {
+	switch t.Tk {
+	case tokenz.TkNone:
+		return true
+	case tokenz.TkPunct:
+		return bytes.IndexByte(valueAfterPunct, t.Dat[0]) != -1
+	default:
+		return false
+	}
+}