Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,16 @@ typedef struct {
int count;
} token_buffer_t;

/* String pool for identifier deduplication */
typedef struct {
hashmap_t *strings; /* Map string -> interned string */
} string_pool_t;

/* String literal pool for deduplicating string constants */
typedef struct {
hashmap_t *literals; /* Map string literal -> ELF data offset */
} string_literal_pool_t;

/* builtin types */
typedef enum {
TYPE_void = 0,
Expand Down
72 changes: 66 additions & 6 deletions src/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

#include "defs.h"

/* Forward declaration for string interning */
char *intern_string(char *str);

/* Lexer */
char token_str[MAX_TOKEN_LEN];
token_t next_token;
Expand Down Expand Up @@ -673,7 +676,8 @@ void add_alias(char *alias, char *value)
printf("Failed to allocate alias_t\n");
return;
}
strcpy(al->alias, alias);
/* Use interned string for alias name */
strcpy(al->alias, intern_string(alias));
hashmap_put(ALIASES_MAP, alias, al);
}
strcpy(al->value, value);
Expand Down Expand Up @@ -707,7 +711,8 @@ macro_t *add_macro(char *name)
printf("Failed to allocate macro_t\n");
return NULL;
}
strcpy(ma->name, name);
/* Use interned string for macro name */
strcpy(ma->name, intern_string(name));
hashmap_put(MACROS_MAP, name, ma);
}
ma->disabled = false;
Expand All @@ -733,6 +738,41 @@ bool remove_macro(char *name)
}

void error(char *msg);

/* String pool global */
string_pool_t *string_pool;
string_literal_pool_t *string_literal_pool;

/* Safe string interning that works with self-hosting */
char *intern_string(char *str)
{
char *existing;
char *interned;
int len;

/* Safety: return original if NULL */
if (!str)
return NULL;

/* Safety: can't intern before initialization */
if (!GENERAL_ARENA || !string_pool)
return str;

/* Check if already interned */
existing = hashmap_get(string_pool->strings, str);
if (existing)
return existing;

/* Allocate and store new string */
len = strlen(str) + 1;
interned = arena_alloc(GENERAL_ARENA, len);
strcpy(interned, str);

hashmap_put(string_pool->strings, interned, interned);

return interned;
}

int find_macro_param_src_idx(char *name, block_t *parent)
{
macro_t *macro = parent->macro;
Expand Down Expand Up @@ -761,7 +801,8 @@ type_t *add_type(void)
type_t *add_named_type(char *name)
{
type_t *type = add_type();
strcpy(type->type_name, name);
/* Use interned string for type name */
strcpy(type->type_name, intern_string(name));
return type;
}

Expand All @@ -773,7 +814,8 @@ void add_constant(char alias[], int value)
return;
}

strcpy(constant->alias, alias);
/* Use interned string for constant name */
strcpy(constant->alias, intern_string(alias));
constant->value = value;
hashmap_put(CONSTANTS_MAP, alias, constant);
}
Expand Down Expand Up @@ -877,7 +919,8 @@ func_t *add_func(char *func_name, bool synthesize)

func = arena_alloc_func();
hashmap_put(FUNC_MAP, func_name, func);
strcpy(func->return_def.var_name, func_name);
/* Use interned string for function name */
strcpy(func->return_def.var_name, intern_string(func_name));
func->stack_size = 4;

if (synthesize)
Expand Down Expand Up @@ -1042,7 +1085,7 @@ void add_insn(block_t *block,
n->idx = 0;

if (str)
strcpy(n->str, str);
strcpy(n->str, intern_string(str));
else
n->str[0] = '\0';

Expand Down Expand Up @@ -1151,6 +1194,16 @@ void global_init(void)
TYPES = arena_alloc(GENERAL_ARENA, MAX_TYPES * sizeof(type_t));
PH2_IR_FLATTEN =
arena_alloc(GENERAL_ARENA, MAX_IR_INSTR * sizeof(ph2_ir_t *));

/* Initialize string pool for identifier deduplication */
string_pool = arena_alloc(GENERAL_ARENA, sizeof(string_pool_t));
string_pool->strings = hashmap_create(512);

/* Initialize string literal pool for deduplicating string constants */
string_literal_pool =
arena_alloc(GENERAL_ARENA, sizeof(string_literal_pool_t));
string_literal_pool->literals = hashmap_create(256);
Comment on lines +1198 to +1205
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems hashmap won't be freed after in global_release?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems hashmap won't be freed after in global_release?

Yes, the hashmaps for string pools were not being freed in global_release().
Fixed now by adding proper cleanup for both string pool hashmaps.


SOURCE = strbuf_create(MAX_SOURCE);
FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE);
INCLUSION_MAP = hashmap_create(DEFAULT_INCLUSIONS_SIZE);
Expand Down Expand Up @@ -1273,6 +1326,13 @@ void global_release(void)
lexer_cleanup();

hashmap_free(MACROS_MAP);

/* Free string interning hashmaps */
if (string_pool && string_pool->strings)
hashmap_free(string_pool->strings);
if (string_literal_pool && string_literal_pool->literals)
hashmap_free(string_literal_pool->literals);

arena_free(BLOCK_ARENA);
arena_free(INSN_ARENA);
arena_free(BB_ARENA);
Expand Down
26 changes: 15 additions & 11 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ bool read_preproc_directive(void)
while (lex_peek(T_identifier, alias)) {
lex_expect(T_identifier);
strcpy(macro->param_defs[macro->num_param_defs++].var_name,
alias);
intern_string(alias));
lex_accept(T_comma);
}
if (lex_accept(T_elipsis))
Expand Down Expand Up @@ -1192,14 +1192,18 @@ void read_inner_var_decl(var_t *vd, int anon, int is_param)
/* is it function pointer declaration? */
if (lex_accept(T_open_bracket)) {
func_t func;
char temp_name[MAX_VAR_LEN];
lex_expect(T_asterisk);
lex_ident(T_identifier, vd->var_name);
lex_ident(T_identifier, temp_name);
strcpy(vd->var_name, intern_string(temp_name));
lex_expect(T_close_bracket);
read_parameter_list_decl(&func, 1);
vd->is_func = true;
} else {
if (anon == 0) {
lex_ident(T_identifier, vd->var_name);
char temp_name[MAX_VAR_LEN];
lex_ident(T_identifier, temp_name);
strcpy(vd->var_name, intern_string(temp_name));
if (!lex_peek(T_open_bracket, NULL) && !is_param) {
if (vd->is_global) {
opstack_push(vd);
Expand Down Expand Up @@ -2078,7 +2082,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
/* indirective function pointer assignment */
vd = require_var(parent);
vd->is_func = true;
strcpy(vd->var_name, token);
strcpy(vd->var_name, intern_string(token));
opstack_push(vd);
}
} else if (lex_accept(T_open_curly)) {
Expand Down Expand Up @@ -4431,7 +4435,7 @@ void read_global_statement(void)
if (!type)
type = add_type();

strcpy(type->type_name, token);
strcpy(type->type_name, intern_string(token));
type->base_type = TYPE_struct;

lex_expect(T_open_curly);
Expand Down Expand Up @@ -4469,7 +4473,7 @@ void read_global_statement(void)
if (!type)
type = add_type();

strcpy(type->type_name, token);
strcpy(type->type_name, intern_string(token));
type->base_type = TYPE_union;

lex_expect(T_open_curly);
Expand Down Expand Up @@ -4520,7 +4524,7 @@ void read_global_statement(void)
} while (lex_accept(T_comma));
lex_expect(T_close_curly);
lex_ident(T_identifier, token);
strcpy(type->type_name, token);
strcpy(type->type_name, intern_string(token));
lex_expect(T_semicolon);
} else if (lex_accept(T_struct)) {
int i = 0, size = 0, has_struct_def = 0;
Expand All @@ -4535,7 +4539,7 @@ void read_global_statement(void)
if (!tag) {
tag = add_type();
tag->base_type = TYPE_struct;
strcpy(tag->type_name, token);
strcpy(tag->type_name, intern_string(token));
}
}

Expand Down Expand Up @@ -4574,7 +4578,7 @@ void read_global_statement(void)
strcpy(token, tag->type_name);
memcpy(tag, type, sizeof(type_t));
tag->base_type = TYPE_struct;
strcpy(tag->type_name, token);
strcpy(tag->type_name, intern_string(token));
} else {
/* If it is a forward declaration, build a connection between
* structure tag and alias. In 'find_type', it will retrieve
Expand All @@ -4597,7 +4601,7 @@ void read_global_statement(void)
if (!tag) {
tag = add_type();
tag->base_type = TYPE_union;
strcpy(tag->type_name, token);
strcpy(tag->type_name, intern_string(token));
}
}

Expand Down Expand Up @@ -4640,7 +4644,7 @@ void read_global_statement(void)
strcpy(token, tag->type_name);
memcpy(tag, type, sizeof(type_t));
tag->base_type = TYPE_union;
strcpy(tag->type_name, token);
strcpy(tag->type_name, intern_string(token));
} else {
/* If it is a forward declaration, build a connection between
* union tag and alias. In 'find_type', it will retrieve
Expand Down