Skip to content

Commit acfa3c5

Browse files
Merge pull request #16 from BeyondMagic/feat/subexpression
feat(main): add CSE (common subexpression elimination)
2 parents d6fa1da + 34c18d4 commit acfa3c5

26 files changed

+811
-26
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ TARGET = c2lua
77
SRC = src/main.c \
88
src/ast.c \
99
src/symbol_table.c \
10-
src/semantic.c \
11-
src/codegen_lua.c
10+
src/semantic.c \
11+
src/codegen_lua.c \
12+
src/optimizer.c
1213
LEX_SRC = src/lexer.l
1314
YACC_SRC = src/parser.y
1415

src/ast.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,15 @@ AstExpr *ast_expr_make_int(long long value)
402402
return expr;
403403
}
404404

405+
AstExpr *ast_expr_make_char(long long value)
406+
{
407+
AstExpr *expr = xcalloc(1, sizeof(AstExpr));
408+
expr->kind = EXPR_INT_LITERAL;
409+
expr->type = TYPE_CHAR;
410+
expr->data.int_value = (long long)(unsigned char)value;
411+
return expr;
412+
}
413+
405414
AstExpr *ast_expr_make_float(double value)
406415
{
407416
AstExpr *expr = xcalloc(1, sizeof(AstExpr));
@@ -510,6 +519,10 @@ TypeKind ast_type_from_keyword(const char *kw)
510519
{
511520
return TYPE_INT;
512521
}
522+
if (strcmp(kw, "char") == 0)
523+
{
524+
return TYPE_CHAR;
525+
}
513526
if (strcmp(kw, "float") == 0)
514527
{
515528
return TYPE_FLOAT;
@@ -535,6 +548,8 @@ const char *ast_type_name(TypeKind type)
535548
{
536549
case TYPE_INT:
537550
return "int";
551+
case TYPE_CHAR:
552+
return "char";
538553
case TYPE_FLOAT:
539554
return "float";
540555
case TYPE_BOOL:

src/ast.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ typedef enum
77
{
88
TYPE_UNKNOWN = 0,
99
TYPE_INT,
10+
TYPE_CHAR,
1011
TYPE_FLOAT,
1112
TYPE_BOOL,
1213
TYPE_STRING,
@@ -236,6 +237,7 @@ AstExpr *ast_expr_make_array_literal(AstExprList *elements);
236237
AstExpr *ast_expr_make_subscript(AstExpr *array, AstExpr *index);
237238

238239
AstExpr *ast_expr_make_int(long long value);
240+
AstExpr *ast_expr_make_char(long long value);
239241
AstExpr *ast_expr_make_float(double value);
240242
AstExpr *ast_expr_make_bool(int value);
241243
AstExpr *ast_expr_make_string(char *value);

src/codegen_lua.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ static void emit_main_wrapper(FILE *out, const AstFunction *fn, const FunctionSi
9797
case TYPE_INT:
9898
fprintf(out, "local %s = args_table and tonumber(args_table[%zu]) or 0\n", param->name, i + 1);
9999
break;
100+
case TYPE_CHAR:
101+
fprintf(out, "local %s = args_table and args_table[%zu] and string.byte(args_table[%zu]) or 0\n",
102+
param->name,
103+
i + 1,
104+
i + 1);
105+
break;
100106
case TYPE_FLOAT:
101107
fprintf(out, "local %s = args_table and tonumber(args_table[%zu]) or 0.0\n", param->name, i + 1);
102108
break;
@@ -271,15 +277,15 @@ static void emit_expression_expected(FILE *out, const AstExpr *expr, const Funct
271277
return;
272278
}
273279

274-
if (expected_type == TYPE_INT && actual == TYPE_FLOAT)
280+
if ((expected_type == TYPE_INT || expected_type == TYPE_CHAR) && actual == TYPE_FLOAT)
275281
{
276282
fputs("math.floor(", out);
277283
emit_expression_raw(out, expr, functions);
278284
fputc(')', out);
279285
return;
280286
}
281287

282-
if ((expected_type == TYPE_INT || expected_type == TYPE_FLOAT) && actual == TYPE_BOOL)
288+
if ((expected_type == TYPE_INT || expected_type == TYPE_FLOAT || expected_type == TYPE_CHAR) && actual == TYPE_BOOL)
283289
{
284290
fputc('(', out);
285291
emit_expression_as_bool(out, expr, functions);
@@ -400,6 +406,7 @@ static void emit_expression_as_bool(FILE *out, const AstExpr *expr, const Functi
400406
break;
401407
case TYPE_INT:
402408
case TYPE_FLOAT:
409+
case TYPE_CHAR:
403410
fputc('(', out);
404411
emit_expression_raw(out, expr, functions);
405412
fputs(" ~= 0)", out);
@@ -624,6 +631,9 @@ static void emit_array_default_value(FILE *out, TypeKind type)
624631
case TYPE_INT:
625632
fputs("0", out);
626633
break;
634+
case TYPE_CHAR:
635+
fputs("0", out);
636+
break;
627637
case TYPE_FLOAT:
628638
fputs("0.0", out);
629639
break;

src/lexer.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef LEXER_H
2+
#define LEXER_H
3+
4+
const char *lexer_get_source_name(void);
5+
void lexer_set_source_name(const char *name);
6+
void lexer_reset_position(void);
7+
8+
extern int yy_line;
9+
extern int yy_column;
10+
extern int yy_token_line;
11+
extern int yy_token_column;
12+
13+
#endif

src/lexer.l

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,49 @@
33
#include <stdlib.h>
44
#include <string.h>
55
#include "parser.tab.h"
6+
#include "lexer.h"
7+
8+
int yy_line = 1;
9+
int yy_column = 1;
10+
int yy_token_line = 1;
11+
int yy_token_column = 1;
12+
static const char *yy_source_name = "<stdin>";
13+
14+
void lexer_set_source_name(const char *name)
15+
{
16+
yy_source_name = (name && *name) ? name : "<stdin>";
17+
}
18+
19+
const char *lexer_get_source_name(void)
20+
{
21+
return yy_source_name ? yy_source_name : "<stdin>";
22+
}
23+
24+
void lexer_reset_position(void)
25+
{
26+
yy_line = 1;
27+
yy_column = 1;
28+
yy_token_line = 1;
29+
yy_token_column = 1;
30+
}
31+
32+
#define YY_USER_ACTION \
33+
{ \
34+
yy_token_line = yy_line; \
35+
yy_token_column = yy_column; \
36+
for (int yy_i = 0; yy_i < yyleng; ++yy_i) \
37+
{ \
38+
if (yytext[yy_i] == '\n') \
39+
{ \
40+
yy_line++; \
41+
yy_column = 1; \
42+
} \
43+
else \
44+
{ \
45+
yy_column++; \
46+
} \
47+
} \
48+
}
649

750
static char *yy_strdup(const char *src)
851
{
@@ -60,6 +103,43 @@ static char *yy_parse_string_literal(const char *src)
60103
buffer[out] = '\0';
61104
return buffer;
62105
}
106+
107+
static long long yy_parse_char_literal(const char *src)
108+
{
109+
size_t len = strlen(src);
110+
if (len < 3 || src[0] != '\'' || src[len - 1] != '\'')
111+
{
112+
return 0;
113+
}
114+
if (src[1] != '\\')
115+
{
116+
return (unsigned char)src[1];
117+
}
118+
if (len < 4)
119+
{
120+
return 0;
121+
}
122+
char esc = src[2];
123+
switch (esc)
124+
{
125+
case '\\':
126+
return '\\';
127+
case '\'':
128+
return '\'';
129+
case 'n':
130+
return '\n';
131+
case 'r':
132+
return '\r';
133+
case 't':
134+
return '\t';
135+
case '0':
136+
return '\0';
137+
case '"':
138+
return '"';
139+
default:
140+
return (unsigned char)esc;
141+
}
142+
}
63143
%}
64144

65145
%option noyywrap nodefault noinput nounput
@@ -70,6 +150,7 @@ static char *yy_parse_string_literal(const char *src)
70150

71151
[ \t\r\n]+ { /* skip whitespace */ }
72152
"int" { return KW_INT; }
153+
"char" { return KW_CHAR; }
73154
"float" { return KW_FLOAT; }
74155
"bool" { return KW_BOOL; }
75156
"void" { return KW_VOID; }
@@ -78,8 +159,8 @@ static char *yy_parse_string_literal(const char *src)
78159
"for" { return FOR; }
79160
"true" { return TRUE; }
80161
"false" { return FALSE; }
81-
"\\[" { return LBRACKET; }
82-
"\\]" { return RBRACKET; }
162+
"\[" { return LBRACKET; }
163+
"\]" { return RBRACKET; }
83164
"//"[^\n]* { /* skip single line comments */ }
84165
"/*" { BEGIN(COMMENT); }
85166
<COMMENT>{
@@ -117,6 +198,7 @@ static char *yy_parse_string_literal(const char *src)
117198
"{" { return LBRACE; }
118199
"}" { return RBRACE; }
119200
\"([^\"\n]|\\.)*\" { yylval.string = yy_parse_string_literal(yytext); return STRING_LITERAL; }
120-
. { fprintf(stderr, "invalid character '%s'\n", yytext); }
201+
'([^\\'\n]|\\.)' { yylval.intValue = yy_parse_char_literal(yytext); return CHAR_LITERAL; }
202+
. { fprintf(stderr, "%s:%d:%d: invalid character '%s'\n", lexer_get_source_name(), yy_token_line, yy_token_column, yytext); }
121203

122204
%%

src/main.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
#include "ast.h"
77
#include "codegen_lua.h"
8+
#include "lexer.h"
9+
#include "optimizer.h"
810
#include "semantic.h"
911
#include "parser.tab.h"
1012

@@ -18,6 +20,10 @@ int main(int argc, char **argv)
1820
return EXIT_FAILURE;
1921
}
2022

23+
const char *source_name = (argc == 2) ? argv[1] : "<stdin>";
24+
lexer_set_source_name(source_name);
25+
lexer_reset_position();
26+
2127
AstProgram *program = c2lua_parse(input);
2228
if (!program)
2329
{
@@ -39,6 +45,8 @@ int main(int argc, char **argv)
3945
return EXIT_FAILURE;
4046
}
4147

48+
optimize_program(program);
49+
4250
codegen_lua_emit(stdout, program, &sem_info.functions);
4351

4452
semantic_info_free(&sem_info);

0 commit comments

Comments
 (0)