Skip to content

Commit c1f5cb9

Browse files
committed
Merge branch 'main' into dev
2 parents 6fd5b6d + d84fa73 commit c1f5cb9

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

tests/bench.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
CHUNK_SIZE = 512
1212
# END CONFIG #
1313

14-
def bench() -> dict[str, float]:
14+
def bench() -> dict[str, float]:
1515
# Initialise the chunkers.
1616
semchunk_chunker = semchunk.chunkerify(tiktoken.encoding_for_model('gpt-4'), CHUNK_SIZE)
1717
sts_chunker = TextSplitter.from_tiktoken_model('gpt-4', CHUNK_SIZE)
@@ -21,7 +21,7 @@ def bench_semchunk(texts: list[str]) -> None:
2121
semchunk_chunker(texts)
2222

2323
def bench_sts(texts: list[str]) -> None:
24-
[sts_chunker.chunks(text) for text in texts]
24+
sts_chunker.chunk_all(texts)
2525

2626
libraries = {
2727
'semchunk': bench_semchunk,
@@ -31,22 +31,23 @@ def bench_sts(texts: list[str]) -> None:
3131
# Download the Gutenberg corpus.
3232
try:
3333
gutenberg = nltk.corpus.gutenberg
34-
34+
3535
except Exception:
3636
nltk.download('gutenberg')
3737
gutenberg = nltk.corpus.gutenberg
38-
38+
3939
# Benchmark the libraries.
40-
benchmarks = dict.fromkeys(libraries.keys(), 0)
40+
benchmarks = dict.fromkeys(libraries.keys(), 0.0)
4141
texts = [gutenberg.raw(fileid) for fileid in gutenberg.fileids()]
42-
42+
4343
for library, function in libraries.items():
4444
start = time.time()
4545
function(texts)
4646
benchmarks[library] = time.time() - start
47-
47+
4848
return benchmarks
4949

5050
if __name__ == '__main__':
51+
nltk.download('gutenberg')
5152
for library, time_taken in bench().items():
52-
print(f'{library}: {time_taken:.2f}s')
53+
print(f'{library}: {time_taken:.2f}s')

0 commit comments

Comments
 (0)