Skip to content

Commit c4c42a7

Browse files
committed
Implement fundamentals of AST
1 parent 97db37d commit c4c42a7

File tree

9 files changed

+485
-187
lines changed

9 files changed

+485
-187
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
target/

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[workspace]
2+
3+
members = [
4+
"script",
5+
]

design/scripting.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ hold any number of scripts.
1212

1313
Ballscript resembles a very simplified form of `Python`.
1414

15-
The convention for intendation is to use **tabs**.
15+
The indentation **must** be tabs. Using spaces will result in an error.
1616

1717
### Functions
1818

@@ -74,13 +74,14 @@ Integers can be defined as
7474
```ballscript
7575
232
7676
16_777_216
77-
80u8
78-
443_i16
77+
80
78+
443
7979
0xdeadbeef
80-
0b101010101_i32
80+
0b101010101
8181
```
8282

83-
By default, integers are `isize`s, but this can be overriden as shown above
83+
By default, they use the native platform size (i.e. 64 bit on x86\_64,
84+
32 bit on x86).
8485

8586

8687
### Floating point numbers
@@ -90,12 +91,15 @@ Floating point numbers can be defined as
9091
```ballscript
9192
23.2
9293
16_777.216
93-
80f32
94-
443_f64
94+
80
95+
443
9596
0xdeadb.eef
96-
0b10101.0101_f32
97+
0b10101.0101
9798
```
9899

100+
By default, they use the native platform size (i.e. 64 bit on x86\_64,
101+
32 bit on x86).
102+
99103

100104
### Arrays
101105

script/src/ast.rs

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
use crate::tokenizer::*;
2+
use core::ops::BitOrAssign;
3+
4+
type Integer = isize;
5+
type Real = f64;
6+
7+
#[derive(Debug)]
8+
pub struct Script<'src> {
9+
functions: Vec<Function<'src>>,
10+
variables: Vec<&'src str>,
11+
}
12+
13+
#[derive(Debug)]
14+
struct Function<'src> {
15+
name: &'src str,
16+
parameters: Vec<&'src str>,
17+
lines: Lines<'src>,
18+
}
19+
20+
#[derive(Debug)]
21+
struct Lines<'src> {
22+
lines: Vec<Statement<'src>>,
23+
}
24+
25+
#[derive(Debug)]
26+
enum Statement<'src> {
27+
Declare { var: &'src str },
28+
Assign { var: &'src str, assign_op: AssignOp, expr: Expression<'src> },
29+
Call { func: &'src str, args: Vec<Expression<'src>>, },
30+
}
31+
32+
#[derive(Debug)]
33+
enum Atom<'src> {
34+
Name(&'src str),
35+
Real(Real),
36+
Integer(Integer),
37+
String(&'src str),
38+
}
39+
40+
#[derive(Debug)]
41+
enum Expression<'src> {
42+
Atom(Atom<'src>),
43+
Name(&'src str),
44+
Operation {
45+
op: Op,
46+
left: Box<Expression<'src>>,
47+
right: Box<Expression<'src>>,
48+
},
49+
Function {
50+
name: &'src str,
51+
arguments: Vec<Expression<'src>>,
52+
},
53+
}
54+
55+
#[derive(Debug)]
56+
pub struct Error {
57+
error: ErrorType,
58+
line: usize,
59+
column: usize,
60+
}
61+
62+
#[derive(Debug)]
63+
pub enum ErrorType {
64+
UnexpectedIndent,
65+
UnexpectedToken,
66+
UnexpectedEOL,
67+
UnexpectedSpace,
68+
Noop,
69+
}
70+
71+
macro_rules! err {
72+
($err:ident, $line:expr, $column:expr) => {
73+
return Error::new(ErrorType::$err, $line, $column);
74+
};
75+
}
76+
77+
fn skip_whitespace<'s>(iter: &mut impl Iterator<Item = (Token<'s>, usize, usize)>) -> Option<(Token<'s>, usize, usize)> {
78+
while let Some((tk, line, column)) = iter.next() {
79+
if tk != Token::Space && tk != Token::Tab {
80+
return Some((tk, line, column));
81+
}
82+
}
83+
None
84+
}
85+
86+
impl<'src> Script<'src> {
87+
pub fn parse(tokens: TokenStream<'src>) -> Result<Self, Error> {
88+
let mut functions = Vec::new();
89+
let mut variables = Vec::new();
90+
let mut iter = tokens.iter();
91+
let mut indent = false;
92+
while let Some((tk, line, column)) = iter.next() {
93+
match tk {
94+
Token::EOL => indent = false,
95+
Token::Space | Token::Tab => indent = true,
96+
Token::Let => {
97+
if indent {
98+
err!(UnexpectedIndent, line, column);
99+
} else {
100+
let name = loop {
101+
match skip_whitespace(&mut iter) {
102+
Some((Token::Name(s), _, _)) => break s,
103+
Some((_, l, c)) => err!(UnexpectedToken, l, c),
104+
None => err!(UnexpectedEOL, line, column),
105+
}
106+
};
107+
variables.push(name);
108+
}
109+
}
110+
Token::Fn => {
111+
if indent {
112+
err!(UnexpectedIndent, line, column);
113+
} else {
114+
match Function::parse(&mut iter) {
115+
Ok(f) => functions.push(f),
116+
Err(f) => return Err(f),
117+
}
118+
}
119+
}
120+
_ => err!(UnexpectedToken, line, column),
121+
}
122+
}
123+
Ok(Self {
124+
functions,
125+
variables,
126+
})
127+
}
128+
}
129+
130+
impl<'src> Function<'src> {
131+
fn parse(tokens: &mut impl Iterator<Item = (Token<'src>, usize, usize)>) -> Result<Self, Error> {
132+
let name = match skip_whitespace(tokens) {
133+
Some((Token::Name(name), _, _)) => name,
134+
Some((_, l, c)) => err!(UnexpectedToken, l, c),
135+
None => err!(UnexpectedEOL, 0, 0),
136+
};
137+
match skip_whitespace(tokens) {
138+
Some((Token::BracketRoundOpen, _, _)) => (),
139+
Some((_, l, c)) => err!(UnexpectedToken, l, c),
140+
None => err!(UnexpectedEOL, 0, 0),
141+
}
142+
143+
let parameters = Vec::new();
144+
loop {
145+
match skip_whitespace(tokens) {
146+
Some((Token::BracketRoundClose, _, _)) => break,
147+
_ => todo!(),
148+
}
149+
}
150+
151+
match skip_whitespace(tokens) {
152+
Some((Token::EOL, _, _)) => (),
153+
Some((_, l, c)) => err!(UnexpectedToken, l, c),
154+
None => err!(UnexpectedEOL, 0, 0),
155+
}
156+
let mut tab_count = 0;
157+
let (mut tk, mut line, mut column) = loop {
158+
match tokens.next() {
159+
Some((Token::Tab, _, _)) => tab_count += 1,
160+
Some(e) => break e,
161+
None => err!(UnexpectedEOL, 0, 0),
162+
}
163+
};
164+
let tab_count = tab_count;
165+
166+
let mut lines = Vec::new();
167+
let mut curr_tabs = 0;
168+
loop {
169+
match tk {
170+
Token::Space => err!(UnexpectedSpace, line, column),
171+
Token::Tab => curr_tabs += 1,
172+
Token::EOL => curr_tabs = 0,
173+
Token::Name(name) => {
174+
let mut args = Vec::new();
175+
match skip_whitespace(tokens) {
176+
Some((Token::EOL, ..)) => break,
177+
Some((Token::BracketRoundOpen, l, c)) => {
178+
match skip_whitespace(tokens) {
179+
Some((Token::BracketRoundClose, ..)) => (),
180+
Some((pre, ..)) => {
181+
loop {
182+
let (expr, last_tk) = Expression::parse(pre, tokens)?;
183+
args.push(expr);
184+
match last_tk {
185+
Token::Comma => (),
186+
Token::BracketRoundClose => break,
187+
tk => panic!("Expression did not parse all tokens: {:?}", tk),
188+
}
189+
}
190+
lines.push(Statement::Call { func: name, args });
191+
}
192+
None => err!(UnexpectedEOL, l, c),
193+
}
194+
}
195+
Some(e) => { dbg!(e); todo!() },
196+
None => err!(UnexpectedEOL, 0, 0),
197+
}
198+
},
199+
_ => todo!(),
200+
}
201+
}
202+
203+
let lines = Lines { lines };
204+
Ok(Self { name, parameters, lines })
205+
}
206+
}
207+
208+
impl<'src> Expression<'src> {
209+
fn parse(pre: Token<'src>, tokens: &mut impl Iterator<Item = (Token<'src>, usize, usize)>) -> Result<(Self, Token<'src>), Error> {
210+
let (lhs, last_tk) = match pre {
211+
Token::BracketRoundOpen => {
212+
match skip_whitespace(tokens) {
213+
Some((pre, ..)) => Self::parse(pre, tokens).map(|(e, t)| (e, Some(t)))?,
214+
None => err!(UnexpectedEOL, 0, 0),
215+
}
216+
},
217+
Token::String(s) => (Expression::Atom(Atom::String(s)), None),
218+
e => { dbg!(e); todo!() },
219+
};
220+
match skip_whitespace(tokens) {
221+
Some((Token::BracketRoundClose, ..)) => return Ok((lhs, Token::BracketRoundClose)),
222+
Some((Token::Comma, ..)) => return Ok((lhs, Token::Comma)),
223+
_ => todo!(),
224+
}
225+
/*
226+
let lhs = match skip_whitespace(tokens) {
227+
Some((Token::BracketRoundOpen, ..)) => Self::parse(tokens)?,
228+
Some((Token::String(s), ..)) => Expression::Atom(Atom::String(s)),
229+
Some(e) => { dbg!(e); todo!() },
230+
None => todo!(),
231+
};
232+
*/
233+
}
234+
}
235+
236+
impl Error {
237+
fn new<T>(error: ErrorType, line: usize, column: usize) -> Result<T, Self> {
238+
Err(Self {
239+
error,
240+
line,
241+
column,
242+
})
243+
}
244+
}

script/src/bin.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
1-
use ballscript::*;
1+
use std::env;
2+
use std::fs;
3+
use std::io;
24

3-
pub fn main() {
4-
test();
5+
pub fn main() -> Result<(), io::Error> {
6+
let mut args = env::args();
7+
let exec = args.next().unwrap_or_else(|| String::from("ballscript"));
8+
if let Some(file) = args.next() {
9+
match fs::read_to_string(file) {
10+
Ok(source) => {
11+
let script = ballscript::parse(&source);
12+
dbg!(script);
13+
Ok(())
14+
}
15+
Err(e) => Err(e),
16+
}
17+
} else {
18+
eprintln!("Usage: {} <file>", exec);
19+
Err(io::Error::new(
20+
io::ErrorKind::InvalidInput,
21+
"No script file specified",
22+
))
23+
}
524
}

script/src/hello.bs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
let global_string # = "I am a script variable"
2+
3+
fn main()
4+
print("Hello, world!")
5+
#print(global_string)

script/src/lib.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
mod ast;
12
mod tokenizer;
23

3-
pub fn test() {
4-
println!("Test");
4+
use tokenizer::TokenStream;
5+
6+
pub fn parse(source: &str) {
7+
let tks = TokenStream::parse(source).unwrap();
8+
let ast = ast::Script::parse(tks).unwrap();
9+
dbg!(ast);
510
}

0 commit comments

Comments
 (0)