Skip to content

Commit d04fcb3

Browse files
authored
Merge pull request #5 from Phelsong/async
Add byte-level comparison optimization Use logical cores to determine thread count for better compatibility Added Large File benchmark Single Threaded Performance Improvements, ~3x speedup Threaded Performance Improvements, +100% over previous / ~+40% over single threaded on large files
2 parents 51bf99b + 948263b commit d04fcb3

File tree

6 files changed

+320
-256
lines changed

6 files changed

+320
-256
lines changed

bench.mojo

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ from sys import exit
33
from testing import assert_true
44
from time import time_function, perf_counter
55

6-
# from src.csv_reader import CsvReader
7-
from mojo_csv import CsvReader
6+
from src.csv_reader import CsvReader
7+
8+
# from mojo_csv import CsvReader
89

910

1011
fn bench_parse_micro() capturing:
@@ -72,7 +73,7 @@ fn main():
7273
for _ in range(1000):
7374
elapsed = time_function[bench_parse_mini]()
7475
time += elapsed / 1000000
75-
avg = time / 100
76+
avg = time / 1000
7677
print("average time in ms for mini file:")
7778
print(round(avg, 6))
7879
print("-------------------------")
@@ -87,18 +88,18 @@ fn main():
8788
print("-------------------------")
8889
print("running benchmark for medium csv:")
8990
time = 0
90-
for _ in range(1000):
91+
for _ in range(100):
9192
elapsed = time_function[bench_parse_medium]()
9293
time += elapsed / 1000000
93-
avg = time / 1000
94+
avg = time / 100
9495
print("average time in ms for medium file:")
9596
print(round(avg, 6))
9697
print("-------------------------")
9798
print("running benchmark for large csv:")
9899
time = 0
99-
for _ in range(1000):
100+
for _ in range(100):
100101
elapsed = time_function[bench_parse_large]()
101102
time += elapsed / 1000000
102-
avg = time / 1000
103+
avg = time / 100
103104
print("average time in ms for large file:")
104105
print(round(avg, 6))

bench_threaded.mojo

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ from sys import exit
33
from testing import assert_true
44
from time import time_function, perf_counter
55

6-
# from src import CsvReader, ThreadedCsvReader
7-
from mojo_csv import CsvReader, ThreadedCsvReader
6+
from src import CsvReader, ThreadedCsvReader
7+
8+
# from mojo_csv import CsvReader, ThreadedCsvReader
89

910

1011
fn bench_single_threaded_medium() capturing:
@@ -43,6 +44,24 @@ fn bench_multi_threaded_small() capturing:
4344
exit()
4445

4546

47+
fn bench_single_threaded_large() capturing:
48+
try:
49+
var in_csv: Path = cwd().joinpath("tests/datablist/products-2000000.csv")
50+
var _ = CsvReader(in_csv)
51+
except:
52+
print("error in single threaded large")
53+
exit()
54+
55+
56+
fn bench_multi_threaded_large() capturing:
57+
try:
58+
var in_csv: Path = cwd().joinpath("tests/datablist/products-2000000.csv")
59+
var _ = ThreadedCsvReader(in_csv)
60+
except:
61+
print("error in multi threaded large")
62+
exit()
63+
64+
4665
fn main():
4766
print("=== CSV Reader Performance Comparison ===")
4867
print()
@@ -91,6 +110,29 @@ fn main():
91110
print("Speedup:", round(speedup_medium, 2), "x")
92111
print("-------------------------")
93112

113+
# Test large file (2M rows)
114+
print("Large file benchmark (2,000,000 rows):")
115+
print("Single-threaded:")
116+
var time_single_large: Float64 = 0
117+
for _ in range(3): # Fewer iterations for large file
118+
var elapsed = time_function[bench_single_threaded_large]()
119+
time_single_large += elapsed / 1000000
120+
var avg_single_large = time_single_large / 3
121+
print("Average time:", round(avg_single_large, 6), "ms")
122+
123+
print("Multi-threaded:")
124+
var time_multi_large: Float64 = 0
125+
for _ in range(3): # Fewer iterations for large file
126+
var elapsed = time_function[bench_multi_threaded_large]()
127+
time_multi_large += elapsed / 1000000
128+
var avg_multi_large = time_multi_large / 3
129+
print("Average time:", round(avg_multi_large, 6), "ms")
130+
131+
var speedup_large = avg_single_large / avg_multi_large
132+
print("Speedup:", round(speedup_large, 2), "x")
133+
print("-------------------------")
134+
94135
print("Summary:")
95136
print("Small file speedup:", round(speedup_small, 2), "x")
96137
print("Medium file speedup:", round(speedup_medium, 2), "x")
138+
print("Large file speedup:", round(speedup_large, 2), "x")

0 commit comments

Comments
 (0)