Skip to content

Commit 6d30044

Browse files
committed
let's up those numbers
0 parents  commit 6d30044

File tree

7 files changed

+170754
-0
lines changed

7 files changed

+170754
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
./bit
2+
./t8.shakespeare.txt
3+
./test.txt

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
run:
2+
cc -Wall -Wextra -ggdb -pedantic -o bit ./main.c && ./bit ./t8.shakespeare.txt

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Hash Table
2+
3+
It's a really simple implemenation of hash table in C inspired by who? Mr. Tsoding. Here is his video:
4+
[![Hash Table](https://markdown-videos-api.jorgenkh.no/url?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dn-S9DBwPGTo)](https://www.youtube.com/watch?v=n-S9DBwPGTo)
5+
6+
To test it out you can just clone the repo, download all Shakespeare poems, build and run the program.
7+
8+
```bash
9+
git clone https://github.com/SamSyntax/hashtable-c
10+
```
11+
```bash
12+
wget https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt
13+
```
14+
```bash
15+
make
16+
```

bit

18.7 KB
Binary file not shown.

main.c

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
#include <stddef.h>
2+
#include <stdint.h>
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
#include <string.h>
6+
#include <time.h>
7+
8+
typedef struct {
9+
char *key;
10+
size_t value;
11+
size_t hash_key;
12+
} token_t;
13+
14+
typedef struct {
15+
size_t collisions_count;
16+
size_t global_token_count;
17+
size_t unique_tokens;
18+
} token_analysis_t;
19+
20+
#define TABLE_SIZE 100000
21+
22+
token_t table[TABLE_SIZE];
23+
token_t naive_table[TABLE_SIZE];
24+
25+
void init_table(token_t *table) {
26+
for (size_t i = 0; i < TABLE_SIZE; ++i) {
27+
table[i].key = NULL;
28+
table[i].value = 0;
29+
}
30+
}
31+
32+
size_t hash(char *str) {
33+
unsigned long hash = 5381;
34+
int c;
35+
36+
while ((c = *str++)) {
37+
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
38+
}
39+
40+
return hash % TABLE_SIZE;
41+
}
42+
43+
char *read_file(char *path, char *mode) {
44+
FILE *fptr;
45+
fptr = fopen(path, mode);
46+
if (!fptr)
47+
return NULL;
48+
49+
fseek(fptr, 0, SEEK_END);
50+
long file_size = ftell(fptr);
51+
fseek(fptr, 0, SEEK_SET);
52+
53+
char *content = malloc(file_size + 1);
54+
if (!content) {
55+
fclose(fptr);
56+
return NULL;
57+
}
58+
size_t bytes_read = fread(content, 1, file_size, fptr);
59+
if (bytes_read != (size_t)file_size) {
60+
fclose(fptr);
61+
free(content);
62+
return NULL;
63+
}
64+
content[file_size] = '\0';
65+
fclose(fptr);
66+
return content;
67+
}
68+
69+
void naive(char *input) {
70+
if (input == NULL)
71+
return;
72+
char *str = strdup(input);
73+
const char *delimiters = " \n\r\t";
74+
char *token;
75+
char *rest = str;
76+
if (!str)
77+
return;
78+
while ((token = strtok_r(rest, delimiters, &rest)) != NULL) {
79+
for (size_t i = 0; i < TABLE_SIZE; ++i) {
80+
if (naive_table[i].key == NULL ||
81+
strcmp(naive_table[i].key, token) == 0) {
82+
naive_table[i].key = strdup(token);
83+
naive_table[i].value += 1;
84+
break;
85+
}
86+
}
87+
}
88+
free(str);
89+
}
90+
91+
void *tokenize(char *input) {
92+
if (input == NULL)
93+
return NULL;
94+
char *str = strdup(input);
95+
if (!str)
96+
return NULL;
97+
98+
const char *delimiters = " \n\r\t";
99+
char *token;
100+
char *rest = str;
101+
size_t collisions_count = 0;
102+
size_t token_count = 0;
103+
size_t unique_tokens = 0;
104+
105+
printf("\nTokens:\n");
106+
107+
while ((token = strtok_r(rest, delimiters, &rest)) != NULL) {
108+
token_count++;
109+
size_t key = hash(token);
110+
111+
if (table[key].key == NULL) {
112+
table[key].key = strdup(token);
113+
table[key].value = 1;
114+
table[key].hash_key = key;
115+
unique_tokens++;
116+
} else if (table[key].key != NULL && strcmp(table[key].key, token) == 0) {
117+
table[key].value += 1;
118+
} else {
119+
key = (key + 1) % TABLE_SIZE;
120+
table[key].key = strdup(token);
121+
table[key].value = 1;
122+
table[key].hash_key = key;
123+
collisions_count++;
124+
}
125+
}
126+
127+
free(str);
128+
void *analyzer_ptr = malloc(sizeof(token_analysis_t));
129+
token_analysis_t res = {.global_token_count = token_count,
130+
.collisions_count = collisions_count,
131+
.unique_tokens = unique_tokens};
132+
memcpy(analyzer_ptr, &res, sizeof(res));
133+
return analyzer_ptr;
134+
}
135+
136+
int comp(const void *elem1, const void *elem2) {
137+
int f = ((token_t *)elem1)->value;
138+
int s = ((token_t *)elem2)->value;
139+
return s - f;
140+
}
141+
142+
void print_table(size_t top, token_t *table) {
143+
qsort(table, TABLE_SIZE, sizeof(token_t), comp);
144+
printf("Top %zu entries:\n", top);
145+
for (size_t i = 0; i < top; ++i) {
146+
if (table[i].key != NULL) {
147+
printf("Idx: %zu, Key: %4s, hash_key: %8zu => %10zu\n", i + 1,
148+
table[i].key, table[i].hash_key, table[i].value);
149+
}
150+
}
151+
}
152+
153+
void free_table() {
154+
for (size_t i = 0; i < TABLE_SIZE; i++) {
155+
if (table[i].key != NULL) {
156+
free(table[i].key);
157+
table[i].key = NULL;
158+
}
159+
}
160+
}
161+
162+
void test_hash(char *content) {
163+
clock_t start = clock();
164+
token_analysis_t *collisions = (token_analysis_t *)tokenize(content);
165+
166+
print_table(10, table);
167+
clock_t end = clock();
168+
printf("\nCollisions: %zu\nTotal tokens parsed: %zu, Unique tokens: %zu",
169+
collisions->collisions_count, collisions->global_token_count,
170+
collisions->unique_tokens);
171+
free(collisions);
172+
printf("\nTime elapsed: %f", (float)(end - start) / CLOCKS_PER_SEC);
173+
}
174+
175+
void test_naive(char *content) {
176+
clock_t start = clock();
177+
naive(content);
178+
print_table(10, naive_table);
179+
clock_t end = clock();
180+
printf("\nTime elapsed: %f", (float)(end - start) / CLOCKS_PER_SEC);
181+
}
182+
183+
int main(int argc, char **argv) {
184+
char *content = read_file(argv[1], "r");
185+
if (!content)
186+
return 1;
187+
if (argc < 2)
188+
return 1;
189+
init_table(table);
190+
init_table(naive_table);
191+
192+
test_naive(content);
193+
test_hash(content);
194+
free(content);
195+
free_table();
196+
}

0 commit comments

Comments
 (0)