Skip to content

Commit 3871735

Browse files
committed
Made overlap unit test more robust
1 parent cc4f365 commit 3871735

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

tests/test_semchunk.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""Test semchunk."""
2+
import math
3+
24
import semchunk
35

46
import tiktoken
@@ -84,7 +86,7 @@ def test_semchunk() -> None:
8486
# Test overlapping.
8587
chunker = semchunk.chunkerify(token_counter, DETERMINISTIC_TEST_CHUNK_SIZE)
8688
low_overlap_chunks = chunker(DETERMINISTIC_TEST_INPUT, overlap = 0.1)
87-
high_overlap_chunks = chunker(DETERMINISTIC_TEST_INPUT, overlap = 0.9)
89+
high_overlap_chunks = chunker(DETERMINISTIC_TEST_INPUT, overlap = math.ceil(DETERMINISTIC_TEST_CHUNK_SIZE * 0.9))
8890

8991
if name == 'word':
9092
assert len(high_overlap_chunks) == len(low_overlap_chunks)
@@ -94,7 +96,7 @@ def test_semchunk() -> None:
9496

9597
if TEST_OFFSETS:
9698
low_overlap_chunks, low_overlap_offsets = chunker(DETERMINISTIC_TEST_INPUT, overlap = 0.1, offsets = True)
97-
high_overlap_chunks, high_overlap_offsets = chunker(DETERMINISTIC_TEST_INPUT, overlap = 0.9, offsets = True)
99+
high_overlap_chunks, high_overlap_offsets = chunker(DETERMINISTIC_TEST_INPUT, overlap = math.ceil(DETERMINISTIC_TEST_CHUNK_SIZE * 0.9), offsets = True)
98100

99101
if name == 'word':
100102
assert len(high_overlap_chunks) == len(low_overlap_chunks)

0 commit comments

Comments
 (0)