Skip to content

Commit d2e1d20

Browse files
authored
Merge pull request #240 from sysprog21/cfront
Improve C language features
2 parents bbd5b41 + 3e5c460 commit d2e1d20

File tree

15 files changed

+1628
-46
lines changed

15 files changed

+1628
-46
lines changed

lib/c.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,17 @@ char *memcpy(char *dest, char *src, int count)
146146
void *memset(void *s, int c, int n)
147147
{
148148
int i = 0;
149-
char *ptr = s;
149+
char *ptr = (char *) s;
150+
char byte_val = (char) c;
150151
for (; i + 4 <= n; i += 4) {
151-
ptr[i] = c;
152-
ptr[i + 1] = c;
153-
ptr[i + 2] = c;
154-
ptr[i + 3] = c;
152+
ptr[i] = byte_val;
153+
ptr[i + 1] = byte_val;
154+
ptr[i + 2] = byte_val;
155+
ptr[i + 3] = byte_val;
155156
}
156157

157158
for (; i < n; i++)
158-
ptr[i] = c;
159+
ptr[i] = byte_val;
159160

160161
return s;
161162
}
@@ -282,7 +283,7 @@ void __fmtbuf_write_char(fmtbuf_t *fmtbuf, int val)
282283
if (fmtbuf->n <= 1)
283284
return;
284285

285-
char ch = val & 0xFF;
286+
char ch = (char) (val & 0xFF);
286287
fmtbuf->buf[0] = ch;
287288
fmtbuf->buf += 1;
288289
fmtbuf->n -= 1;
@@ -418,12 +419,12 @@ void __format_to_buf(fmtbuf_t *fmtbuf, char *format, int *var_args)
418419
switch (format[si]) {
419420
case 's':
420421
/* append param pi as string */
421-
l = strlen(v);
422-
__fmtbuf_write_str(fmtbuf, v, l);
422+
l = strlen((char *) v);
423+
__fmtbuf_write_str(fmtbuf, (char *) v, l);
423424
break;
424425
case 'c':
425426
/* append param pi as char */
426-
__fmtbuf_write_char(fmtbuf, v);
427+
__fmtbuf_write_char(fmtbuf, (char) v);
427428
break;
428429
case 'o':
429430
/* append param as octal */
@@ -551,8 +552,8 @@ char *fgets(char *str, int n, FILE *stream)
551552
str[i] = 0;
552553
return str;
553554
}
554-
/* Not support casting yet. Simply assign it. */
555-
str[i] = c;
555+
/* Use explicit cast for clarity */
556+
str[i] = (char) c;
556557

557558
if (c == '\n') {
558559
str[i + 1] = 0;
@@ -683,7 +684,7 @@ void *malloc(int size)
683684
__alloc_tail->size = allocated->size;
684685
chunk_clear_freed(__alloc_tail);
685686

686-
void *ptr = __alloc_tail + 1;
687+
void *ptr = (void *) (__alloc_tail + 1);
687688
return ptr;
688689
}
689690

@@ -750,8 +751,8 @@ void free(void *ptr)
750751
if (!ptr)
751752
return;
752753

753-
char *__ptr = ptr;
754-
chunk_t *cur = __ptr - sizeof(chunk_t);
754+
char *__ptr = (char *) ptr;
755+
chunk_t *cur = (chunk_t *) (__ptr - sizeof(chunk_t));
755756
if (IS_CHUNK_GET_FREED(cur->size)) {
756757
printf("free(): double free detected\n");
757758
abort();

src/arm-codegen.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
125125
case OP_sign_ext:
126126
elf_offset += 4;
127127
return;
128+
case OP_cast:
129+
elf_offset += 4;
130+
return;
128131
default:
129132
fatal("Unknown opcode");
130133
}
@@ -439,6 +442,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
439442
/* TODO: Allow to sign extends to other types */
440443
emit(__sxtb(__AL, rd, rn, 0));
441444
return;
445+
case OP_cast:
446+
/* Generic cast operation - for now, just move the value */
447+
emit(__mov_r(__AL, rd, rn));
448+
return;
442449
default:
443450
fatal("Unknown opcode");
444451
}

src/defs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ typedef enum {
157157
T_typedef,
158158
T_enum,
159159
T_struct,
160+
T_union,
160161
T_sizeof,
161162
T_elipsis, /* ... */
162163
T_switch,
@@ -184,6 +185,7 @@ typedef enum {
184185
TYPE_int,
185186
TYPE_char,
186187
TYPE_struct,
188+
TYPE_union,
187189
TYPE_typedef
188190
} base_type_t;
189191

@@ -254,6 +256,7 @@ typedef enum {
254256
/* data type conversion */
255257
OP_trunc,
256258
OP_sign_ext,
259+
OP_cast,
257260

258261
/* entry point of the state machine */
259262
OP_start
@@ -531,7 +534,6 @@ typedef struct {
531534
int polluted;
532535
} regfile_t;
533536

534-
/* FIXME: replace char[2] with a short data type in ELF header structures */
535537
/* ELF header */
536538
typedef struct {
537539
char e_ident[16];

src/elf.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ void elf_write_byte(strbuf_t *elf_array, int val)
3131

3232
char e_extract_byte(int v, int b)
3333
{
34-
return (v >> (b << 3)) & 0xFF;
34+
return (char) ((v >> (b << 3)) & 0xFF);
3535
}
3636

3737
void elf_write_int(strbuf_t *elf_array, int val)
@@ -97,7 +97,7 @@ void elf_generate_header(void)
9797
* 34 | | |
9898
*/
9999
/* ELF file header */
100-
hdr.e_ident[0] = 0x7F; /* ELF magic number */
100+
hdr.e_ident[0] = (char) 0x7F; /* ELF magic number */
101101
hdr.e_ident[1] = 'E';
102102
hdr.e_ident[2] = 'L';
103103
hdr.e_ident[3] = 'F';
@@ -124,13 +124,13 @@ void elf_generate_header(void)
124124
elf_symtab->size +
125125
elf_strtab->size; /* section header offset */
126126
hdr.e_flags = ELF_FLAGS; /* flags */
127-
hdr.e_ehsize[0] = 0x34; /* header size */
127+
hdr.e_ehsize[0] = (char) 0x34; /* header size */
128128
hdr.e_ehsize[1] = 0;
129-
hdr.e_phentsize[0] = 0x20; /* program header size */
129+
hdr.e_phentsize[0] = (char) 0x20; /* program header size */
130130
hdr.e_phentsize[1] = 0;
131131
hdr.e_phnum[0] = 1; /* number of program headers */
132132
hdr.e_phnum[1] = 0;
133-
hdr.e_shentsize[0] = 0x28; /* section header size */
133+
hdr.e_shentsize[0] = (char) 0x28; /* section header size */
134134
hdr.e_shentsize[1] = 0;
135135
hdr.e_shnum[0] = 6; /* number of section headers */
136136
hdr.e_shnum[1] = 0;

src/globals.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,8 @@ int hard_mul_div = 0;
580580
type_t *find_type(char *type_name, int flag)
581581
{
582582
for (int i = 0; i < types_idx; i++) {
583-
if (TYPES[i].base_type == TYPE_struct) {
583+
if (TYPES[i].base_type == TYPE_struct ||
584+
TYPES[i].base_type == TYPE_union) {
584585
if (flag == 1)
585586
continue;
586587
if (!strcmp(TYPES[i].type_name, type_name))
@@ -1398,6 +1399,10 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
13981399
printf("%%%s = sign_ext %%%s, %d", rd->var_name, rs1->var_name,
13991400
insn->sz);
14001401
break;
1402+
case OP_cast:
1403+
print_indent(1);
1404+
printf("%%%s = cast %%%s", rd->var_name, rs1->var_name);
1405+
break;
14011406
default:
14021407
printf("<Unsupported opcode: %d>", insn->opcode);
14031408
break;

src/lexer.c

Lines changed: 132 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 15
15+
#define NUM_KEYWORDS 16
1616

1717
/* Preprocessor directive hash table using existing shecc hashmap */
1818
hashmap_t *DIRECTIVE_MAP = NULL;
@@ -112,6 +112,8 @@ void lex_init_keywords()
112112
token_values[13] = T_default;
113113
names[14] = "continue";
114114
token_values[14] = T_continue;
115+
names[15] = "union";
116+
token_values[15] = T_union;
115117

116118
/* hashmap insertion */
117119
for (int i = 0; i < NUM_KEYWORDS; i++) {
@@ -203,6 +205,17 @@ bool is_hex(char c)
203205
(c >= 'A' && c <= 'F');
204206
}
205207

208+
int hex_digit_value(char c)
209+
{
210+
if (c >= '0' && c <= '9')
211+
return c - '0';
212+
if (c >= 'a' && c <= 'f')
213+
return c - 'a' + 10;
214+
if (c >= 'A' && c <= 'F')
215+
return c - 'A' + 10;
216+
return -1;
217+
}
218+
206219
bool is_numeric(char buffer[])
207220
{
208221
bool hex = false;
@@ -330,8 +343,21 @@ token_t lex_token_internal(bool aliasing)
330343
token_str[i++] = next_char;
331344
} while (is_hex(read_char(false)));
332345

346+
} else if (token_str[0] == '0' && ((next_char | 32) == 'b')) {
347+
/* Binary: starts with 0b or 0B */
348+
token_str[i++] = next_char;
349+
350+
read_char(false);
351+
if (next_char != '0' && next_char != '1')
352+
error("Invalid binary literal: expected 0 or 1 after 0b");
353+
354+
do {
355+
token_str[i++] = next_char;
356+
read_char(false);
357+
} while (next_char == '0' || next_char == '1');
358+
333359
} else if (token_str[0] == '0') {
334-
/* Octal: starts with 0 but not followed by 'x' */
360+
/* Octal: starts with 0 but not followed by 'x' or 'b' */
335361
while (is_digit(next_char)) {
336362
if (next_char >= '8')
337363
error("Invalid octal digit: must be in range 0-7");
@@ -413,8 +439,58 @@ token_t lex_token_internal(bool aliasing)
413439
token_str[i - 1] = '\\';
414440
else if (next_char == '0')
415441
token_str[i - 1] = '\0';
416-
else
417-
abort();
442+
else if (next_char == 'a')
443+
token_str[i - 1] = '\a';
444+
else if (next_char == 'b')
445+
token_str[i - 1] = '\b';
446+
else if (next_char == 'v')
447+
token_str[i - 1] = '\v';
448+
else if (next_char == 'f')
449+
token_str[i - 1] = '\f';
450+
else if (next_char == 'e') /* GNU extension: ESC character */
451+
token_str[i - 1] = 27;
452+
else if (next_char == '?')
453+
token_str[i - 1] = '?';
454+
else if (next_char == 'x') {
455+
/* Hexadecimal escape sequence \xHH */
456+
read_char(false);
457+
if (!is_hex(next_char))
458+
error("Invalid hex escape sequence");
459+
int value = 0;
460+
int count = 0;
461+
while (is_hex(next_char) && count < 2) {
462+
value = (value << 4) + hex_digit_value(next_char);
463+
read_char(false);
464+
count++;
465+
}
466+
token_str[i - 1] = value;
467+
/* Back up one character as we read one too many */
468+
SOURCE->size--;
469+
next_char = SOURCE->elements[SOURCE->size];
470+
} else if (next_char >= '0' && next_char <= '7') {
471+
/* Octal escape sequence \nnn */
472+
int value = next_char - '0';
473+
read_char(false);
474+
if (next_char >= '0' && next_char <= '7') {
475+
value = (value << 3) + (next_char - '0');
476+
read_char(false);
477+
if (next_char >= '0' && next_char <= '7') {
478+
value = (value << 3) + (next_char - '0');
479+
} else {
480+
/* Back up one character */
481+
SOURCE->size--;
482+
next_char = SOURCE->elements[SOURCE->size];
483+
}
484+
} else {
485+
/* Back up one character */
486+
SOURCE->size--;
487+
next_char = SOURCE->elements[SOURCE->size];
488+
}
489+
token_str[i - 1] = value;
490+
} else {
491+
/* Handle unknown escapes gracefully */
492+
token_str[i - 1] = next_char;
493+
}
418494
} else {
419495
token_str[i++] = next_char;
420496
}
@@ -445,8 +521,58 @@ token_t lex_token_internal(bool aliasing)
445521
token_str[0] = '\\';
446522
else if (next_char == '0')
447523
token_str[0] = '\0';
448-
else
449-
abort();
524+
else if (next_char == 'a')
525+
token_str[0] = '\a';
526+
else if (next_char == 'b')
527+
token_str[0] = '\b';
528+
else if (next_char == 'v')
529+
token_str[0] = '\v';
530+
else if (next_char == 'f')
531+
token_str[0] = '\f';
532+
else if (next_char == 'e') /* GNU extension: ESC character */
533+
token_str[0] = 27;
534+
else if (next_char == '?')
535+
token_str[0] = '?';
536+
else if (next_char == 'x') {
537+
/* Hexadecimal escape sequence \xHH */
538+
read_char(false);
539+
if (!is_hex(next_char))
540+
error("Invalid hex escape sequence");
541+
int value = 0;
542+
int count = 0;
543+
while (is_hex(next_char) && count < 2) {
544+
value = (value << 4) + hex_digit_value(next_char);
545+
read_char(false);
546+
count++;
547+
}
548+
token_str[0] = value;
549+
/* Back up one character as we read one too many */
550+
SOURCE->size--;
551+
next_char = SOURCE->elements[SOURCE->size];
552+
} else if (next_char >= '0' && next_char <= '7') {
553+
/* Octal escape sequence \nnn */
554+
int value = next_char - '0';
555+
read_char(false);
556+
if (next_char >= '0' && next_char <= '7') {
557+
value = (value << 3) + (next_char - '0');
558+
read_char(false);
559+
if (next_char >= '0' && next_char <= '7') {
560+
value = (value << 3) + (next_char - '0');
561+
} else {
562+
/* Back up one character */
563+
SOURCE->size--;
564+
next_char = SOURCE->elements[SOURCE->size];
565+
}
566+
} else {
567+
/* Back up one character */
568+
SOURCE->size--;
569+
next_char = SOURCE->elements[SOURCE->size];
570+
}
571+
token_str[0] = value;
572+
} else {
573+
/* Handle unknown escapes gracefully */
574+
token_str[0] = next_char;
575+
}
450576
} else {
451577
token_str[0] = next_char;
452578
}

0 commit comments

Comments
 (0)