1+ #include "includes/hash_table.c"
12#include <stddef.h>
23#include <stdint.h>
34#include <stdio.h>
45#include <stdlib.h>
56#include <string.h>
67#include <time.h>
78
8- typedef struct {
9- char * key ;
10- size_t value ;
11- size_t hash_key ;
12- } token_t ;
13-
14- typedef struct {
15- size_t collisions_count ;
16- size_t global_token_count ;
17- size_t unique_tokens ;
18- } token_analysis_t ;
19-
20- #define TABLE_SIZE 100000
21-
22- token_t table [TABLE_SIZE ];
23- token_t naive_table [TABLE_SIZE ];
24-
25- void init_table (token_t * table ) {
26- for (size_t i = 0 ; i < TABLE_SIZE ; ++ i ) {
27- table [i ].key = NULL ;
28- table [i ].value = 0 ;
29- }
30- }
31-
32- size_t hash (char * str ) {
33- unsigned long hash = 5381 ;
34- int c ;
35-
36- while ((c = * str ++ )) {
37- hash = ((hash << 5 ) + hash ) + c ; /* hash * 33 + c */
38- }
39-
40- return hash % TABLE_SIZE ;
41- }
42-
439char * read_file (char * path , char * mode ) {
4410 FILE * fptr ;
4511 fptr = fopen (path , mode );
@@ -66,81 +32,14 @@ char *read_file(char *path, char *mode) {
6632 return content ;
6733}
6834
69- void naive (char * input ) {
70- if (input == NULL )
71- return ;
72- char * str = strdup (input );
73- const char * delimiters = " \n\r\t" ;
74- char * token ;
75- char * rest = str ;
76- if (!str )
77- return ;
78- while ((token = strtok_r (rest , delimiters , & rest )) != NULL ) {
79- for (size_t i = 0 ; i < TABLE_SIZE ; ++ i ) {
80- if (naive_table [i ].key == NULL ||
81- strcmp (naive_table [i ].key , token ) == 0 ) {
82- naive_table [i ].key = strdup (token );
83- naive_table [i ].value += 1 ;
84- break ;
85- }
86- }
87- }
88- free (str );
89- }
90-
91- void * tokenize (char * input ) {
92- if (input == NULL )
93- return NULL ;
94- char * str = strdup (input );
95- if (!str )
96- return NULL ;
97-
98- const char * delimiters = " \n\r\t" ;
99- char * token ;
100- char * rest = str ;
101- size_t collisions_count = 0 ;
102- size_t token_count = 0 ;
103- size_t unique_tokens = 0 ;
104-
105- printf ("\nTokens:\n" );
106-
107- while ((token = strtok_r (rest , delimiters , & rest )) != NULL ) {
108- token_count ++ ;
109- size_t key = hash (token );
110-
111- if (table [key ].key == NULL ) {
112- table [key ].key = strdup (token );
113- table [key ].value = 1 ;
114- table [key ].hash_key = key ;
115- unique_tokens ++ ;
116- } else if (table [key ].key != NULL && strcmp (table [key ].key , token ) == 0 ) {
117- table [key ].value += 1 ;
118- } else {
119- key = (key + 1 ) % TABLE_SIZE ;
120- table [key ].key = strdup (token );
121- table [key ].value = 1 ;
122- table [key ].hash_key = key ;
123- collisions_count ++ ;
124- }
125- }
126-
127- free (str );
128- void * analyzer_ptr = malloc (sizeof (token_analysis_t ));
129- token_analysis_t res = {.global_token_count = token_count ,
130- .collisions_count = collisions_count ,
131- .unique_tokens = unique_tokens };
132- memcpy (analyzer_ptr , & res , sizeof (res ));
133- return analyzer_ptr ;
134- }
135-
13635int comp (const void * elem1 , const void * elem2 ) {
13736 int f = ((token_t * )elem1 )-> value ;
13837 int s = ((token_t * )elem2 )-> value ;
13938 return s - f ;
14039}
14140
142- void print_table (size_t top , token_t * table ) {
143- qsort (table , TABLE_SIZE , sizeof (token_t ), comp );
41+ void print_table (size_t top , token_t * table , size_t table_size ) {
42+ qsort (table , table_size , sizeof (token_t ), comp );
14443 printf ("Top %zu entries:\n" , top );
14544 for (size_t i = 0 ; i < top ; ++ i ) {
14645 if (table [i ].key != NULL ) {
@@ -150,32 +49,24 @@ void print_table(size_t top, token_t *table) {
15049 }
15150}
15251
153- void free_table () {
154- for (size_t i = 0 ; i < TABLE_SIZE ; i ++ ) {
155- if (table [i ].key != NULL ) {
156- free (table [i ].key );
157- table [i ].key = NULL ;
158- }
159- }
160- }
161-
162- void test_hash (char * content ) {
52+ void test_hash (char * content , hash_table * hash_table ) {
16353 clock_t start = clock ();
164- token_analysis_t * collisions = (token_analysis_t * )tokenize (content );
54+ token_analysis_t * collisions =
55+ (token_analysis_t * )tokenize (content , hash_table );
16556
166- print_table (10 , table );
57+ print_table (10 , ( token_t * ) hash_table -> table , hash_table -> capacity );
16758 clock_t end = clock ();
16859 printf ("\nCollisions: %zu\nTotal tokens parsed: %zu, Unique tokens: %zu" ,
16960 collisions -> collisions_count , collisions -> global_token_count ,
17061 collisions -> unique_tokens );
171- free (collisions );
17262 printf ("\nTime elapsed: %f" , (float )(end - start ) / CLOCKS_PER_SEC );
63+ free (collisions );
17364}
17465
175- void test_naive (char * content ) {
66+ void test_naive (char * content , hash_table * naive_table ) {
17667 clock_t start = clock ();
177- naive (content );
178- print_table (10 , naive_table );
68+ naive (content , naive_table );
69+ print_table (10 , ( token_t * ) naive_table -> table , naive_table -> capacity );
17970 clock_t end = clock ();
18071 printf ("\nTime elapsed: %f" , (float )(end - start ) / CLOCKS_PER_SEC );
18172}
@@ -186,11 +77,28 @@ int main(int argc, char **argv) {
18677 return 1 ;
18778 if (argc < 2 )
18879 return 1 ;
189- init_table (table );
190- init_table (naive_table );
191-
192- test_naive (content );
193- test_hash (content );
80+ void * table = malloc (TABLE_SIZE * sizeof (token_t ));
81+ void * naive_table = malloc (TABLE_SIZE * sizeof (token_t ));
82+ hash_table hash_table_impl = {
83+ .table = table ,
84+ .capacity = TABLE_SIZE ,
85+ .size = 0 ,
86+ };
87+
88+ // hash_table naive_hash_table = {
89+ // .table = naive_table,
90+ // .capacity = TABLE_SIZE,
91+ // .size = 0,
92+ // };
93+
94+ init_table (& hash_table_impl );
95+ // init_table(&naive_hash_table);
96+
97+ // test_naive(content, &naive_hash_table);
98+ test_hash (content , & hash_table_impl );
19499 free (content );
195- free_table ();
100+ free_table ((token_t * )hash_table_impl .table , hash_table_impl .capacity );
101+ // free_table((token_t *)naive_hash_table.table, naive_hash_table.capacity);
102+ free (hash_table_impl .table );
103+ // free(naive_hash_table.table);
196104}
0 commit comments