Skip to content

Commit 9be0723

Browse files
committed
feat(semantic): add printf lexer, parser, and codegen
1 parent 503d52c commit 9be0723

File tree

8 files changed

+279
-0
lines changed

8 files changed

+279
-0
lines changed

src/ast.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ static void ast_expr_destroy(AstExpr *expr)
149149
case EXPR_IDENTIFIER:
150150
free(expr->data.identifier);
151151
break;
152+
case EXPR_STRING_LITERAL:
153+
free(expr->data.string_literal);
154+
break;
152155
case EXPR_BINARY:
153156
ast_expr_destroy(expr->data.binary.left);
154157
ast_expr_destroy(expr->data.binary.right);
@@ -329,6 +332,15 @@ AstExpr *ast_expr_make_bool(int value)
329332
return expr;
330333
}
331334

335+
AstExpr *ast_expr_make_string(char *value)
336+
{
337+
AstExpr *expr = xcalloc(1, sizeof(AstExpr));
338+
expr->kind = EXPR_STRING_LITERAL;
339+
expr->type = TYPE_STRING;
340+
expr->data.string_literal = value;
341+
return expr;
342+
}
343+
332344
AstExpr *ast_expr_make_identifier(char *name)
333345
{
334346
AstExpr *expr = xcalloc(1, sizeof(AstExpr));
@@ -390,6 +402,10 @@ TypeKind ast_type_from_keyword(const char *kw)
390402
{
391403
return TYPE_BOOL;
392404
}
405+
if (strcmp(kw, "string") == 0)
406+
{
407+
return TYPE_STRING;
408+
}
393409
if (strcmp(kw, "void") == 0)
394410
{
395411
return TYPE_VOID;
@@ -407,6 +423,8 @@ const char *ast_type_name(TypeKind type)
407423
return "float";
408424
case TYPE_BOOL:
409425
return "bool";
426+
case TYPE_STRING:
427+
return "string";
410428
case TYPE_VOID:
411429
return "void";
412430
case TYPE_UNKNOWN:

src/ast.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ typedef enum
99
TYPE_INT,
1010
TYPE_FLOAT,
1111
TYPE_BOOL,
12+
TYPE_STRING,
1213
TYPE_VOID
1314
} TypeKind;
1415

@@ -81,6 +82,7 @@ typedef struct AstExpr
8182
EXPR_INT_LITERAL,
8283
EXPR_FLOAT_LITERAL,
8384
EXPR_BOOL_LITERAL,
85+
EXPR_STRING_LITERAL,
8486
EXPR_IDENTIFIER,
8587
EXPR_BINARY,
8688
EXPR_UNARY,
@@ -92,6 +94,7 @@ typedef struct AstExpr
9294
long long int_value;
9395
double float_value;
9496
int bool_value;
97+
char *string_literal;
9598
char *identifier;
9699
struct
97100
{
@@ -189,6 +192,7 @@ AstStmt *ast_stmt_make_return(AstExpr *expr);
189192
AstExpr *ast_expr_make_int(long long value);
190193
AstExpr *ast_expr_make_float(double value);
191194
AstExpr *ast_expr_make_bool(int value);
195+
AstExpr *ast_expr_make_string(char *value);
192196
AstExpr *ast_expr_make_identifier(char *name);
193197
AstExpr *ast_expr_make_binary(AstBinaryOp op, AstExpr *left, AstExpr *right);
194198
AstExpr *ast_expr_make_unary(AstUnaryOp op, AstExpr *operand);

src/codegen_lua.c

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,15 @@ static void emit_statement(FILE *out, const AstStmt *stmt, const FunctionTable *
1111
static void emit_expression_raw(FILE *out, const AstExpr *expr, const FunctionTable *functions);
1212
static void emit_expression_expected(FILE *out, const AstExpr *expr, const FunctionTable *functions, TypeKind expected_type);
1313
static void emit_call(FILE *out, const AstExpr *expr, const FunctionTable *functions);
14+
static void emit_printf_call(FILE *out, const AstExpr *expr, const FunctionTable *functions);
15+
static void emit_puts_call(FILE *out, const AstExpr *expr, const FunctionTable *functions);
16+
static void emit_printf_args(FILE *out, const AstExpr *expr, const FunctionTable *functions);
17+
static void emit_string_literal_n(FILE *out, const char *value, size_t length);
18+
static void emit_string_literal(FILE *out, const char *value);
1419
static const FunctionSignature *lookup_signature(const FunctionTable *functions, const char *name);
1520
static const char *binary_op_token(AstBinaryOp op);
1621
static void emit_indent(FILE *out, int indent);
22+
static int emit_builtin_expr_statement(FILE *out, const AstExpr *expr, const FunctionTable *functions, int indent);
1723

1824
void codegen_lua_emit(FILE *out, const AstProgram *program, const FunctionTable *functions)
1925
{
@@ -164,6 +170,10 @@ static void emit_statement(FILE *out, const AstStmt *stmt, const FunctionTable *
164170
case STMT_EXPR:
165171
if (stmt->data.expr)
166172
{
173+
if (emit_builtin_expr_statement(out, stmt->data.expr, functions, indent))
174+
{
175+
break;
176+
}
167177
emit_indent(out, indent);
168178
emit_expression_raw(out, stmt->data.expr, functions);
169179
fputc('\n', out);
@@ -228,6 +238,9 @@ static void emit_expression_raw(FILE *out, const AstExpr *expr, const FunctionTa
228238
case EXPR_BOOL_LITERAL:
229239
fputs(expr->data.bool_value ? "true" : "false", out);
230240
break;
241+
case EXPR_STRING_LITERAL:
242+
emit_string_literal(out, expr->data.string_literal);
243+
break;
231244
case EXPR_IDENTIFIER:
232245
fputs(expr->data.identifier, out);
233246
break;
@@ -264,6 +277,16 @@ static void emit_expression_raw(FILE *out, const AstExpr *expr, const FunctionTa
264277

265278
static void emit_call(FILE *out, const AstExpr *expr, const FunctionTable *functions)
266279
{
280+
if (strcmp(expr->data.call.callee, "printf") == 0)
281+
{
282+
emit_printf_call(out, expr, functions);
283+
return;
284+
}
285+
if (strcmp(expr->data.call.callee, "puts") == 0)
286+
{
287+
emit_puts_call(out, expr, functions);
288+
return;
289+
}
267290
const FunctionSignature *signature = lookup_signature(functions, expr->data.call.callee);
268291
fputs(expr->data.call.callee, out);
269292
fputc('(', out);
@@ -283,6 +306,126 @@ static void emit_call(FILE *out, const AstExpr *expr, const FunctionTable *funct
283306
fputc(')', out);
284307
}
285308

309+
static void emit_printf_call(FILE *out, const AstExpr *expr, const FunctionTable *functions)
310+
{
311+
fputs("((print(string.format(", out);
312+
emit_printf_args(out, expr, functions);
313+
fputs("))) or 0)", out);
314+
}
315+
316+
static void emit_puts_call(FILE *out, const AstExpr *expr, const FunctionTable *functions)
317+
{
318+
fputs("((print(", out);
319+
if (expr->data.call.args.count > 0)
320+
{
321+
emit_expression_raw(out, expr->data.call.args.items[0], functions);
322+
}
323+
fputs(")) or 0)", out);
324+
}
325+
326+
static void emit_printf_args(FILE *out, const AstExpr *expr, const FunctionTable *functions)
327+
{
328+
for (size_t i = 0; i < expr->data.call.args.count; ++i)
329+
{
330+
if (i > 0)
331+
{
332+
fputs(", ", out);
333+
}
334+
if (i == 0 && expr->data.call.args.items[i]->kind == EXPR_STRING_LITERAL)
335+
{
336+
const char *raw = expr->data.call.args.items[i]->data.string_literal;
337+
size_t len = raw ? strlen(raw) : 0;
338+
if (len > 0 && raw[len - 1] == '\n')
339+
{
340+
emit_string_literal_n(out, raw, len - 1);
341+
continue;
342+
}
343+
}
344+
emit_expression_raw(out, expr->data.call.args.items[i], functions);
345+
}
346+
}
347+
348+
static void emit_string_literal_n(FILE *out, const char *value, size_t length)
349+
{
350+
if (!value)
351+
{
352+
fputs("\"\"", out);
353+
return;
354+
}
355+
fputc('"', out);
356+
for (size_t i = 0; i < length; ++i)
357+
{
358+
unsigned char c = (unsigned char)value[i];
359+
switch (c)
360+
{
361+
case '\\':
362+
fputs("\\\\", out);
363+
break;
364+
case '"':
365+
fputs("\\\"", out);
366+
break;
367+
case '\n':
368+
fputs("\\n", out);
369+
break;
370+
case '\r':
371+
fputs("\\r", out);
372+
break;
373+
case '\t':
374+
fputs("\\t", out);
375+
break;
376+
default:
377+
if (c < 32 || c == 127)
378+
{
379+
fprintf(out, "\\x%02X", c);
380+
}
381+
else
382+
{
383+
fputc(c, out);
384+
}
385+
break;
386+
}
387+
}
388+
fputc('"', out);
389+
}
390+
391+
static void emit_string_literal(FILE *out, const char *value)
392+
{
393+
if (!value)
394+
{
395+
fputs("\"\"", out);
396+
return;
397+
}
398+
emit_string_literal_n(out, value, strlen(value));
399+
}
400+
401+
static int emit_builtin_expr_statement(FILE *out, const AstExpr *expr, const FunctionTable *functions, int indent)
402+
{
403+
if (!expr || expr->kind != EXPR_CALL)
404+
{
405+
return 0;
406+
}
407+
if (strcmp(expr->data.call.callee, "printf") == 0)
408+
{
409+
emit_indent(out, indent);
410+
fputs("print(string.format(", out);
411+
emit_printf_args(out, expr, functions);
412+
fputs("))\n", out);
413+
return 1;
414+
}
415+
if (strcmp(expr->data.call.callee, "puts") == 0)
416+
{
417+
emit_indent(out, indent);
418+
fputs("print(", out);
419+
if (expr->data.call.args.count > 0)
420+
{
421+
emit_expression_raw(out, expr->data.call.args.items[0], functions);
422+
}
423+
fputs(")\n", out);
424+
return 1;
425+
}
426+
return 0;
427+
}
428+
286429
static const FunctionSignature *lookup_signature(const FunctionTable *functions, const char *name)
287430
{
288431
if (!functions || !name)

src/lexer.l

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,50 @@ static char *yy_strdup(const char *src)
1616
memcpy(copy, src, len);
1717
return copy;
1818
}
19+
20+
static char *yy_parse_string_literal(const char *src)
21+
{
22+
size_t len = strlen(src);
23+
char *buffer = malloc(len);
24+
if (!buffer)
25+
{
26+
fprintf(stderr, "out of memory\n");
27+
exit(EXIT_FAILURE);
28+
}
29+
size_t out = 0;
30+
for (size_t i = 1; i + 1 < len; ++i)
31+
{
32+
char c = src[i];
33+
if (c == '\\' && i + 1 < len)
34+
{
35+
char next = src[++i];
36+
switch (next)
37+
{
38+
case 'n':
39+
buffer[out++] = '\n';
40+
break;
41+
case 't':
42+
buffer[out++] = '\t';
43+
break;
44+
case '\\':
45+
buffer[out++] = '\\';
46+
break;
47+
case '"':
48+
buffer[out++] = '"';
49+
break;
50+
default:
51+
buffer[out++] = next;
52+
break;
53+
}
54+
}
55+
else
56+
{
57+
buffer[out++] = c;
58+
}
59+
}
60+
buffer[out] = '\0';
61+
return buffer;
62+
}
1963
%}
2064

2165
%option noyywrap nodefault noinput nounput
@@ -65,6 +109,7 @@ static char *yy_strdup(const char *src)
65109
")" { return RPAREN; }
66110
"{" { return LBRACE; }
67111
"}" { return RBRACE; }
112+
\"([^\"\n]|\\.)*\" { yylval.string = yy_parse_string_literal(yytext); return STRING_LITERAL; }
68113
. { fprintf(stderr, "invalid character '%s'\n", yytext); }
69114

70115
%%

src/parser.y

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static void parser_error_cleanup(AstProgram **out_program);
2929
long long intValue;
3030
double floatValue;
3131
char *id;
32+
char *string;
3233
TypeKind type;
3334
AstExpr *expr;
3435
AstStmt *stmt;
@@ -44,6 +45,7 @@ static void parser_error_cleanup(AstProgram **out_program);
4445
%token <intValue> INT_LITERAL
4546
%token <floatValue> FLOAT_LITERAL
4647
%token <id> IDENT
48+
%token <string> STRING_LITERAL
4749
%token KW_INT KW_FLOAT KW_BOOL KW_VOID
4850
%token RETURN
4951
%token TRUE FALSE
@@ -375,6 +377,10 @@ primary_expression
375377
{
376378
$$ = ast_expr_make_identifier($1);
377379
}
380+
| STRING_LITERAL
381+
{
382+
$$ = ast_expr_make_string($1);
383+
}
378384
| LPAREN expression RPAREN
379385
{
380386
$$ = $2;

0 commit comments

Comments
 (0)