diff --git a/README.md b/README.md index 06d7ac8..3d09c96 100644 --- a/README.md +++ b/README.md @@ -166,10 +166,12 @@ end Some features I would like to implement in this project: -- [ ] Refactor the whole thing :) +- [x] Refactor the whole thing :) - [ ] Add more built-in functions and constants - [ ] Make constants actually constant - [ ] Add ability to create user-defined functions - [ ] Allow `stdin` - [ ] Port to C++ - [ ] Remove dependencies on Linux (it was made for Linux as that was the subject I wrote it for) +- [ ] Split the server and BASIC library into their own repos, use submodule to link to this +- [ ] Add tests compatible with CTest diff --git a/lib/basic/include/basic/basic.h b/lib/basic/include/basic/basic.h index 0326489..58e9fd2 100644 --- a/lib/basic/include/basic/basic.h +++ b/lib/basic/include/basic/basic.h @@ -12,10 +12,9 @@ * */ -#include - #include "ast.h" +#include "basic_program.h" #include "basic_token.h" +#include "basic_lexer.h" #include "basic_parser.h" -#include "basic_program.h" #include "basic_runner.h" diff --git a/lib/basic/include/basic/basic_lexer.h b/lib/basic/include/basic/basic_lexer.h new file mode 100644 index 0000000..ea49a02 --- /dev/null +++ b/lib/basic/include/basic/basic_lexer.h @@ -0,0 +1,6 @@ +#pragma once + +#include "basic_program.h" + +// Lexer +int basic_tokenize(BASICProgram *program); diff --git a/lib/basic/include/basic/basic_parser.h b/lib/basic/include/basic/basic_parser.h index e5cbed0..21c4642 100644 --- a/lib/basic/include/basic/basic_parser.h +++ b/lib/basic/include/basic/basic_parser.h @@ -21,10 +21,10 @@ extern char PARSE_WS_CHAR[]; #define KEYWORD_IDX_GOTO 5 // Parser -int basic_parse_form_expression(BASICParseTree *ptree, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos); -int basic_parse_form_function(BASICParseTree *ptree, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos); +int basic_parse_form_expression(BASICTokenParseList *parse_list, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos); +int basic_parse_form_function(BASICTokenParseList *parse_list, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos); -int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int from, int to, int level, int allow_keyword, int *next_ptr); -int basic_parse_to_ast_between_onlyexpr(BASICParseTree *ptree, ASTNode *root, int from, int to); -int basic_parse_to_ast_between(BASICParseTree *ptree, ASTNode *root, int from, int to); +int basic_parse_to_ast_between_level(BASICTokenParseList *parse_list, ASTNode *root, int from, int to, int level, int allow_keyword, int *next_ptr); +int basic_parse_to_ast_between_onlyexpr(BASICTokenParseList *parse_list, ASTNode *root, int from, int to); +int basic_parse_to_ast_between(BASICTokenParseList *parse_list, ASTNode *root, int from, int to); int basic_parse_to_ast(BASICProgram *program); diff --git a/lib/basic/include/basic/basic_program.h b/lib/basic/include/basic/basic_program.h index 7643ede..e59f052 100644 --- a/lib/basic/include/basic/basic_program.h +++ b/lib/basic/include/basic/basic_program.h @@ -8,7 +8,7 @@ typedef struct { char *program_source; - BASICParseTree program_tokens; + BASICTokenParseList program_tokens; ASTNode *program_sequence; // Map between line number and which instruction to execute on that line. For non-linear control flow // BASICLineNode program_line_instruction; @@ -17,6 +17,3 @@ typedef struct BASICProgram *basic_create_program(); void basic_clear_program(BASICProgram *program); void basic_destroy_program(BASICProgram *program); - -// Lexer -int basic_tokenize(BASICProgram *program); diff --git a/lib/basic/include/basic/basic_runner.h b/lib/basic/include/basic/basic_runner.h index c6fa56e..625aaa9 100644 --- a/lib/basic/include/basic/basic_runner.h +++ b/lib/basic/include/basic/basic_runner.h @@ -3,6 +3,8 @@ #include "ast.h" #include "basic_program.h" +#include + /* BASIC interpreter */ typedef enum diff --git a/lib/basic/include/basic/basic_token.h b/lib/basic/include/basic/basic_token.h index 9c47983..b58686d 100644 --- a/lib/basic/include/basic/basic_token.h +++ b/lib/basic/include/basic/basic_token.h @@ -29,6 +29,6 @@ typedef struct BASICToken *tokens; int tokens_length; // Stack for knowning current scope -} BASICParseTree; +} BASICTokenParseList; const char *_cvt_whitespace_to_escape_code(char character); diff --git a/lib/basic/src/basic_lexer.c b/lib/basic/src/basic_lexer.c index 72de69f..2969042 100644 --- a/lib/basic/src/basic_lexer.c +++ b/lib/basic/src/basic_lexer.c @@ -1,4 +1,4 @@ -#include "basic/basic.h" +#include "basic/basic_lexer.h" #include "basic/basic_token.h" #include "basic/basic_program.h" @@ -51,40 +51,40 @@ const char *_cvt_whitespace_to_escape_code(char character) } // Function to insert new token into the token array -void basic_insert_token(BASICParseTree *ptree, BASICToken tok) +void basic_insert_token(BASICTokenParseList *parse_list, BASICToken tok) { - int new_token_size = ptree->tokens_length + 1; - if (ptree->tokens == NULL) + int new_token_size = parse_list->tokens_length + 1; + if (parse_list->tokens == NULL) { // In case of garbage value - ptree->tokens_length = 0; + parse_list->tokens_length = 0; new_token_size = 1; - ptree->tokens = (BASICToken *)malloc(sizeof(BASICToken) * new_token_size); + parse_list->tokens = (BASICToken *)malloc(sizeof(BASICToken) * new_token_size); } else - ptree->tokens = (BASICToken *)realloc(ptree->tokens, sizeof(BASICToken) * new_token_size); + parse_list->tokens = (BASICToken *)realloc(parse_list->tokens, sizeof(BASICToken) * new_token_size); - if (ptree->tokens == NULL) + if (parse_list->tokens == NULL) { lprintf("LEXER", LOGTYPE_DEBUG, "Memory allocation failed"); return; } - memcpy(&(ptree->tokens[ptree->tokens_length]), &tok, sizeof(BASICToken)); - ptree->tokens_length = new_token_size; + memcpy(&(parse_list->tokens[parse_list->tokens_length]), &tok, sizeof(BASICToken)); + parse_list->tokens_length = new_token_size; } -void basic_clear_tokens(BASICParseTree *ptree) +void basic_clear_tokens(BASICTokenParseList *parse_list) { - if (ptree->tokens == NULL) + if (parse_list->tokens == NULL) { return; } - free(ptree->tokens); - ptree->tokens = NULL; - ptree->tokens_length = 0; + free(parse_list->tokens); + parse_list->tokens = NULL; + parse_list->tokens_length = 0; } // Returns 1 if given symbol is present in the given list of symbols @@ -124,12 +124,12 @@ void program_whereis(char *program, char *current_position, char *buffer) // Converts words/symbols to tokens. Also called "Lexer" int basic_tokenize(BASICProgram *program) { - BASICParseTree *ptree = &(program->program_tokens); + BASICTokenParseList *parse_list = &(program->program_tokens); char *tok_ptr, *tok_start; StringLiteral buffer = {0}; int tok_len = 0; - basic_clear_tokens(ptree); + basic_clear_tokens(parse_list); for (tok_ptr = program->program_source; *tok_ptr != '\0'; tok_ptr++) { @@ -153,7 +153,7 @@ int basic_tokenize(BASICProgram *program) tk_num.token_at = tok_start; // Go back one symbol as the above loop moved past the current token tok_ptr--; - basic_insert_token(ptree, tk_num); + basic_insert_token(parse_list, tk_num); lprintf("LEXER", LOGTYPE_DEBUG, "Found number %s\n", tk_num.token); continue; } @@ -169,7 +169,7 @@ int basic_tokenize(BASICProgram *program) tk_op.token[1] = '\0'; tk_op.token_type = TOKEN_OPERATOR; tk_op.token_at = tok_ptr; - basic_insert_token(ptree, tk_op); + basic_insert_token(parse_list, tk_op); lprintf("LEXER", LOGTYPE_DEBUG, "Found operator '%s'\n", tk_op.token); continue; } @@ -182,7 +182,7 @@ int basic_tokenize(BASICProgram *program) tk_ws.token[0] = *tok_ptr; tk_ws.token[1] = '\0'; tk_ws.token_at = tok_ptr; - basic_insert_token(ptree, tk_ws); + basic_insert_token(parse_list, tk_ws); lprintf("LEXER", LOGTYPE_DEBUG, "Found whitespace '%s'\n", _cvt_whitespace_to_escape_code(*tok_ptr)); continue; } @@ -207,21 +207,21 @@ int basic_tokenize(BASICProgram *program) { // It is a keyword tk_identifier.token_type = TOKEN_KEYWORD; - basic_insert_token(ptree, tk_identifier); + basic_insert_token(parse_list, tk_identifier); lprintf("LEXER", LOGTYPE_DEBUG, "Found keyword \"%s\"\n", tk_identifier.token); } else if (token_is_bool(tk_identifier.token)) { // It is a boolean tk_identifier.token_type = TOKEN_BOOL; - basic_insert_token(ptree, tk_identifier); + basic_insert_token(parse_list, tk_identifier); lprintf("LEXER", LOGTYPE_DEBUG, "Found boolean %s\n", tk_identifier.token); } else { - // It is an identifer + // It is an identifier tk_identifier.token_type = TOKEN_IDENTIFIER; - basic_insert_token(ptree, tk_identifier); + basic_insert_token(parse_list, tk_identifier); lprintf("LEXER", LOGTYPE_DEBUG, "Found identifier \"%s\"\n", tk_identifier.token); } @@ -249,7 +249,7 @@ int basic_tokenize(BASICProgram *program) tk_str.token[id_size] = '\0'; tk_str.token_type = TOKEN_STRING; tk_str.token_at = tok_start; - basic_insert_token(ptree, tk_str); + basic_insert_token(parse_list, tk_str); lprintf("LEXER", LOGTYPE_DEBUG, "Found string literal \"%s\"\n", tk_str.token); continue; } @@ -262,7 +262,7 @@ int basic_tokenize(BASICProgram *program) tk_sp.token[1] = '\0'; tk_sp.token_type = TOKEN_SEPARATOR; tk_sp.token_at = tok_ptr; - basic_insert_token(ptree, tk_sp); + basic_insert_token(parse_list, tk_sp); lprintf("LEXER", LOGTYPE_DEBUG, "Found separator %c\n", *tok_ptr); continue; } @@ -273,7 +273,7 @@ int basic_tokenize(BASICProgram *program) tk_end.token_type = TOKEN_END; tk_end.token[0] = '\0'; tk_end.token_at = tok_ptr; - basic_insert_token(ptree, tk_end); + basic_insert_token(parse_list, tk_end); lprintf("LEXER", LOGTYPE_DEBUG, "End of program\n"); lprintf("LEXER", LOGTYPE_DEBUG, "Finished tokenization of program\n"); diff --git a/lib/basic/src/basic_parser.c b/lib/basic/src/basic_parser.c index 0508725..46b37ae 100644 --- a/lib/basic/src/basic_parser.c +++ b/lib/basic/src/basic_parser.c @@ -22,39 +22,39 @@ int basic_parse_id_is_fn_call(BASICToken *tokens, int idx, int len) } // Skips whitespace and reaches the next token -void basic_token_seek_immediate(BASICParseTree *ptree, int *ptr, int end) +void basic_token_seek_immediate(BASICTokenParseList *parse_list, int *ptr, int end) { while (*ptr < end) { - if (ptree->tokens[*ptr].token_type != TOKEN_WHITESPACE) + if (parse_list->tokens[*ptr].token_type != TOKEN_WHITESPACE) break; (*ptr)++; } } -int basic_parse_to_ast_between(BASICParseTree *ptree, ASTNode *root, int from, int to) +int basic_parse_to_ast_between(BASICTokenParseList *parse_list, ASTNode *root, int from, int to) { - return basic_parse_to_ast_between_level(ptree, root, from, to, 0, 1, NULL); + return basic_parse_to_ast_between_level(parse_list, root, from, to, 0, 1, NULL); } -int basic_parse_to_ast_between_onlyexpr(BASICParseTree *ptree, ASTNode *root, int from, int to) +int basic_parse_to_ast_between_onlyexpr(BASICTokenParseList *parse_list, ASTNode *root, int from, int to) { - return basic_parse_to_ast_between_level(ptree, root, from, to, 0, 0, NULL); + return basic_parse_to_ast_between_level(parse_list, root, from, to, 0, 0, NULL); } -int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int from, int to, int level, int allow_keyword, int *next_ptr) +int basic_parse_to_ast_between_level(BASICTokenParseList *parse_list, ASTNode *root, int from, int to, int level, int allow_keyword, int *next_ptr) { int cb_ret; for (int i = from; i < to; i++) { - switch (ptree->tokens[i].token_type) + switch (parse_list->tokens[i].token_type) { case TOKEN_IDENTIFIER: { // The identifier may be a function call - if (basic_parse_id_is_fn_call(ptree->tokens, i, to)) + if (basic_parse_id_is_fn_call(parse_list->tokens, i, to)) { - cb_ret = basic_parse_form_function(ptree, root, i, to, &i); + cb_ret = basic_parse_form_function(parse_list, root, i, to, &i); if (cb_ret != 0) return cb_ret; break; @@ -67,7 +67,7 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f case TOKEN_SEPARATOR: case TOKEN_BOOL: case TOKEN_OPERATOR: - cb_ret = basic_parse_form_expression(ptree, root, i, to, &i); + cb_ret = basic_parse_form_expression(parse_list, root, i, to, &i); if (cb_ret != 0) return cb_ret; break; @@ -75,9 +75,9 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f // Keywords case TOKEN_KEYWORD: { - lprintf("AST", LOGTYPE_DEBUG, "Parse keyword \"%s\"\n", ptree->tokens[i].token); + lprintf("AST", LOGTYPE_DEBUG, "Parse keyword \"%s\"\n", parse_list->tokens[i].token); - if (strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_IF]) == 0) + if (strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_IF]) == 0) { // "IF" clause. Has an expression and a program sequence to execute if the value of the expression is non-zero (true) ASTNode *if_node = ast_create_node(); @@ -103,20 +103,20 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f i++; lprintf("AST", LOGTYPE_DEBUG, "Parse IF condition expression\n"); - cb_ret = basic_parse_form_expression(ptree, condition_node, i, to, &i); + cb_ret = basic_parse_form_expression(parse_list, condition_node, i, to, &i); if (cb_ret != 0) return cb_ret; - basic_token_seek_immediate(ptree, &i, to); + basic_token_seek_immediate(parse_list, &i, to); // Check if there is a "THEN" - if (ptree->tokens[i].token_type == TOKEN_KEYWORD && strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_THEN]) == 0) + if (parse_list->tokens[i].token_type == TOKEN_KEYWORD && strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_THEN]) == 0) { // Next part after "THEN" is the body, till "END" is found i++; int next_level = level + 1; lprintf("AST", LOGTYPE_DEBUG, "Parse program statements at scope level %d\n", next_level); int next_pos = -1; - int ret = basic_parse_to_ast_between_level(ptree, if_true_node, i, to, next_level, 1, &next_pos); + int ret = basic_parse_to_ast_between_level(parse_list, if_true_node, i, to, next_level, 1, &next_pos); if (ret >= 0 && next_pos >= 0) { // Set token position to the next instruction returned @@ -126,7 +126,7 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f // Else route exists. Go to next symbol to find the program sequence within the else clause body i++; lprintf("AST", LOGTYPE_DEBUG, "Parse program statements for %s at scope level %d\n", PARSE_KEYWORDS[KEYWORD_IDX_ELSE], next_level); - ret = basic_parse_to_ast_between_level(ptree, if_false_node, i, to, next_level, 1, &next_pos); + ret = basic_parse_to_ast_between_level(parse_list, if_false_node, i, to, next_level, 1, &next_pos); // We basically do the same check again, for one last time if (ret >= 0 && next_pos >= 0) { @@ -152,7 +152,7 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f return -2; } } - else if (strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_WHILE]) == 0) + else if (strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_WHILE]) == 0) { // "WHILE" clause. Has an expression and a program sequence to execute till the value of the expression becomes zero (false) ASTNode *while_node = ast_create_node(); @@ -174,20 +174,20 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f i++; lprintf("AST", LOGTYPE_DEBUG, "Parse WHILE condition expression\n"); - cb_ret = basic_parse_form_expression(ptree, condition_node, i, to, &i); + cb_ret = basic_parse_form_expression(parse_list, condition_node, i, to, &i); if (cb_ret != 0) return cb_ret; - basic_token_seek_immediate(ptree, &i, to); + basic_token_seek_immediate(parse_list, &i, to); // Check if there is a "THEN" - if (ptree->tokens[i].token_type == TOKEN_KEYWORD && strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_THEN]) == 0) + if (parse_list->tokens[i].token_type == TOKEN_KEYWORD && strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_THEN]) == 0) { // Next part after "THEN" is the body, till "END" is found i++; int next_level = level + 1; lprintf("AST", LOGTYPE_DEBUG, "Parse program statements at scope level %d\n", next_level); int next_pos = -1; - int ret = basic_parse_to_ast_between_level(ptree, while_true_node, i, to, next_level, 1, &next_pos); + int ret = basic_parse_to_ast_between_level(parse_list, while_true_node, i, to, next_level, 1, &next_pos); if (ret >= 0 && next_pos >= 0) { // Set token position to the next instruction returned @@ -211,7 +211,7 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f return -2; } } - else if (strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_ELSE]) == 0) + else if (strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_ELSE]) == 0) { // Else clause for a matching IF clause if (level > 0) @@ -228,7 +228,7 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f return -1; } } - else if (strcasecmp(ptree->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_END]) == 0) + else if (strcasecmp(parse_list->tokens[i].token, PARSE_KEYWORDS[KEYWORD_IDX_END]) == 0) { // "END" keyword. We are in some kind of body segment of a clause // Check if we are actually inside a body by checking out current level @@ -263,10 +263,10 @@ int basic_parse_to_ast_between_level(BASICParseTree *ptree, ASTNode *root, int f int basic_parse_to_ast(BASICProgram *program) { - BASICParseTree *ptree = &(program->program_tokens); + BASICTokenParseList *parse_list = &(program->program_tokens); ASTNode *prog = program->program_sequence; - lprintf("AST", LOGTYPE_DEBUG, "Found %d tokens in the token list\n", ptree->tokens_length); - int ret_code = basic_parse_to_ast_between(ptree, prog, 0, ptree->tokens_length); + lprintf("AST", LOGTYPE_DEBUG, "Found %d tokens in the token list\n", parse_list->tokens_length); + int ret_code = basic_parse_to_ast_between(parse_list, prog, 0, parse_list->tokens_length); if (ret_code == 0) lprintf("AST", LOGTYPE_DEBUG, "Finished parsing the program\n"); else @@ -444,11 +444,11 @@ int basic_expr_make_tree(Queue *infix_queue, ASTNode *root) } // Convert an expression into prefix notation and create an AST -int basic_parse_form_expression(BASICParseTree *ptree, ASTNode *root, int parse_from, int parse_to, int *parser_idx) +int basic_parse_form_expression(BASICTokenParseList *parse_list, ASTNode *root, int parse_from, int parse_to, int *parser_idx) { lprintf("AST", LOGTYPE_DEBUG, "Trying to find expression between tokens %d and %d\n", parse_from, parse_to); - BASICToken *expr = ptree->tokens; + BASICToken *expr = parse_list->tokens; // Construct a stack of AST nodes to then rearrange to a tree structure Queue infix_queue; @@ -551,7 +551,7 @@ int basic_parse_form_expression(BASICParseTree *ptree, ASTNode *root, int parse_ operator_node->type = AST_EXPRESSION; operator_node->data.token_type = DTYPE_SYMB; operator_node->data = ASTVOID; - basic_parse_form_function(ptree, operator_node, *parser_idx, parse_to, parser_idx); + basic_parse_form_function(parse_list, operator_node, *parser_idx, parse_to, parser_idx); } else { @@ -600,11 +600,11 @@ int basic_parse_form_expression(BASICParseTree *ptree, ASTNode *root, int parse_ return 0; } -int basic_parse_form_function(BASICParseTree *ptree, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos) +int basic_parse_form_function(BASICTokenParseList *parse_list, ASTNode *root, int parse_from, int parse_to, int *parse_new_pos) { // Function call with possibly multiple argument expressions int scope_level = 0, arg_start = parse_from + 2, arg_end = -1; - BASICToken *func = ptree->tokens; + BASICToken *func = parse_list->tokens; // Create a node to call a function ASTNode *fn_call_node = ast_create_node(); @@ -631,7 +631,7 @@ int basic_parse_form_function(BASICParseTree *ptree, ASTNode *root, int parse_fr arg_end = *parse_new_pos; lprintf("AST", LOGTYPE_DEBUG, "Function argument parse:\n"); lprintf("AST", LOGTYPE_DEBUG, "Trying to find expression between tokens %d and %d\n", arg_start, arg_end); - basic_parse_to_ast_between_onlyexpr(ptree, fn_call_node, arg_start, arg_end); + basic_parse_to_ast_between_onlyexpr(parse_list, fn_call_node, arg_start, arg_end); lprintf("AST", LOGTYPE_DEBUG, "End of function call \"%s\"\n", fn_call_node->data.token.kw); break; } @@ -643,7 +643,7 @@ int basic_parse_form_function(BASICParseTree *ptree, ASTNode *root, int parse_fr arg_end = *parse_new_pos; lprintf("AST", LOGTYPE_DEBUG, "Function argument parse:\n"); lprintf("AST", LOGTYPE_DEBUG, "Trying to find expression between tokens %d and %d\n", arg_start, arg_end); - basic_parse_to_ast_between_onlyexpr(ptree, fn_call_node, arg_start, arg_end); + basic_parse_to_ast_between_onlyexpr(parse_list, fn_call_node, arg_start, arg_end); arg_start = (*parse_new_pos) + 1; lprintf("AST", LOGTYPE_DEBUG, "Function argument parse done\n"); }