Skip to content

Commit b71a257

Browse files
committed
Adding comparison with stdlib cpp implementation
1 parent c7e9af2 commit b71a257

File tree

4 files changed

+68
-2
lines changed

4 files changed

+68
-2
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
run:
22
cc -Wall -Wextra -ggdb -pedantic -o bit ./main.c && ./bit ./t8.shakespeare.txt
3+
4+
runCpp:
5+
g++ -Wall -Wextra -ggdb -pedantic -o hashcpp ./hash.cpp && ./hashcpp ./t8.shakespeare.txt

hash.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <algorithm>
2+
#include <chrono>
3+
#include <cstddef>
4+
#include <cstdlib>
5+
#include <cstring>
6+
#include <fstream>
7+
#include <iomanip>
8+
#include <ios>
9+
#include <iostream>
10+
#include <iterator>
11+
#include <sstream>
12+
#include <stdio.h>
13+
#include <string>
14+
#include <unordered_map>
15+
#include <vector>
16+
17+
void tokenize(const std::string &input,
18+
std::unordered_map<std::string, size_t> &hash_table) {
19+
if (input.empty())
20+
return;
21+
std::istringstream stream(input);
22+
std::string token;
23+
while (stream >> token) {
24+
hash_table[token]++;
25+
}
26+
}
27+
28+
int main(int argc, char *argv[]) {
29+
std::unordered_map<std::string, size_t> hash_table{};
30+
if (argc < 2)
31+
return 1;
32+
std::ifstream ifs(argv[1]);
33+
if (!ifs.is_open()) {
34+
std::cerr << "Error: Could not open file '" << argv[1] << "'\n";
35+
return 1;
36+
}
37+
38+
std::string content((std::istreambuf_iterator<char>(ifs)),
39+
(std::istreambuf_iterator<char>()));
40+
41+
ifs.close();
42+
auto start = std::chrono::high_resolution_clock::now();
43+
44+
tokenize(content, hash_table);
45+
46+
auto end = std::chrono::high_resolution_clock::now();
47+
auto duration = std::chrono::duration<double>(end - start);
48+
std::cout << "Token counts:\n";
49+
std::vector<std::pair<std::string, size_t>> sorted_tokens(hash_table.begin(),
50+
hash_table.end());
51+
std::sort(sorted_tokens.begin(), sorted_tokens.end(),
52+
[](const auto &a, const auto &b) { return a.second > b.second; });
53+
54+
std::cout << "\nTop 10 most frequent tokens:\n";
55+
for (int i = 0; i < std::min(10, (int)sorted_tokens.size()); i++) {
56+
std::cout << sorted_tokens[i].first << ": " << sorted_tokens[i].second
57+
<< std::endl;
58+
}
59+
std::cout << std::fixed << std::setprecision(6);
60+
std::cout << "Time elapsed: " << duration.count() << std::endl;
61+
62+
return 0;
63+
}

hashcpp

372 KB
Binary file not shown.

main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ void test_naive(char *content, hash_table *naive_table) {
7272
}
7373

7474
int main(int argc, char **argv) {
75+
if (argc < 2)
76+
return 1;
7577
char *content = read_file(argv[1], "r");
7678
if (!content)
7779
return 1;
78-
if (argc < 2)
79-
return 1;
8080
void *table = malloc(TABLE_SIZE * sizeof(token_t));
8181
void *naive_table = malloc(TABLE_SIZE * sizeof(token_t));
8282
hash_table hash_table_impl = {

0 commit comments

Comments
 (0)