Skip to content

Commit 34f513c

Browse files
committed
Sped up benchmarks.
1 parent 5781bb4 commit 34f513c

File tree

1 file changed

+18
-15
lines changed

1 file changed

+18
-15
lines changed

tests/bench.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,38 @@
1111
CHUNK_SIZE = 512
1212
# END CONFIG #
1313

14-
def bench() -> dict[str, float]:
14+
def bench() -> dict[str, float]:
1515
# Initialise the chunkers.
1616
semchunk_chunker = semchunk.chunkerify(tiktoken.encoding_for_model('gpt-4'), CHUNK_SIZE)
17-
semantic_text_splitter_chunker = TextSplitter.from_tiktoken_model('gpt-4', CHUNK_SIZE)
17+
sts_chunker = TextSplitter.from_tiktoken_model('gpt-4', CHUNK_SIZE)
1818

19-
def bench_semchunk(text: str) -> None:
20-
semchunk_chunker(text)
19+
def bench_semchunk(texts: list[str]) -> None:
20+
semchunk_chunker(texts)
2121

22-
def bench_semantic_text_splitter(text: str) -> None:
23-
semantic_text_splitter_chunker.chunks(text)
22+
def bench_sts(texts: list[str]) -> None:
23+
[sts_chunker.chunks(text) for text in texts]
2424

2525
libraries = {
2626
'semchunk': bench_semchunk,
27-
'semantic_text_splitter': bench_semantic_text_splitter,
27+
'semantic_text_splitter': bench_sts,
2828
}
2929

3030
# Download the Gutenberg corpus.
31-
nltk.download('gutenberg')
32-
gutenberg = nltk.corpus.gutenberg
31+
try:
32+
gutenberg = nltk.corpus.gutenberg
33+
34+
except Exception:
35+
nltk.download('gutenberg')
36+
gutenberg = nltk.corpus.gutenberg
3337

3438
# Benchmark the libraries.
3539
benchmarks = dict.fromkeys(libraries.keys(), 0)
40+
texts = [gutenberg.raw(fileid) for fileid in gutenberg.fileids()]
3641

37-
for fileid in gutenberg.fileids():
38-
sample = gutenberg.raw(fileid)
39-
for library, function in libraries.items():
40-
start = time.time()
41-
function(sample)
42-
benchmarks[library] += time.time() - start
42+
for library, function in libraries.items():
43+
start = time.time()
44+
function(texts)
45+
benchmarks[library] = time.time() - start
4346

4447
return benchmarks
4548

0 commit comments

Comments
 (0)