Skip to content

Commit 338f32d

Browse files
committed
Address more reviewer feedback: add conditional to adaptive find, tweak
comments, drop extremely marginal "test"
1 parent c07c23e commit 338f32d

File tree

3 files changed

+8
-42
lines changed

3 files changed

+8
-42
lines changed

Lib/test/string_tests.py

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import unittest, string, sys, struct
66
from test import support
7+
from test.support import check_sanitizer
78
from test.support import import_helper
89
from collections import UserList
910
import random
@@ -767,40 +768,16 @@ def test_replace(self):
767768
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
768769
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
769770

771+
@unittest.skipUnless(check_sanitizer(address=True), "AddressSanitizer required")
770772
def test_replacement_on_buffer_boundary(self):
771-
772773
# gh-127971: Check we don't read past the end of the buffer when a
773-
# potential match misses on the last character. Note this will likely
774-
# not cause a failure unless ASAN is enabled, and even that may be
775-
# dependent on implementation details subject to change.
774+
# potential match misses on the last character.
776775
any_3_nonblank_codepoints = '!!!'
777776
seven_codepoints = any_3_nonblank_codepoints + ' ' + any_3_nonblank_codepoints
778777
a = (' ' * 243) + seven_codepoints + (' ' * 7)
779778
b = ' ' * 6 + chr(256)
780779
a.replace(seven_codepoints, b)
781780

782-
def test_adaptive_find_on_buffer_boundary(self):
783-
784-
# gh-127971: This exercises the adaptive search algorithm to trigger a
785-
# corner-case where it might examine the character *after* the last
786-
# position that could be the start of the pattern.
787-
#
788-
# Unfortunately there is nothing to *test* to confirm whether the
789-
# character is read or not, nor in fact does it matter for correctness
790-
# with the implementation at time of writing: the adaptive algorithm is
791-
# only triggered if the input is over a certain size and with a pattern
792-
# with more than one character, so with the current implementation even
793-
# though the final character read is not necessary or significant, it
794-
# won't cause a fault.
795-
#
796-
# This test at least intentionally exercises this path, and might
797-
# possibly catch a regression if the implementation changes and breaks
798-
# those assumptions.
799-
prefix = ' ' * (1024 * 4)
800-
haystack = prefix + 'x'
801-
needle = prefix + 'y'
802-
self.assertEqual(haystack.find(needle), -1)
803-
804781
def test_replace_uses_two_way_maxcount(self):
805782
# Test that maxcount works in _two_way_count in fastsearch.h
806783
A, B = "A"*1000, "B"*1000
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Fix off-by-one read beyond the end of a string in string search
1+
Fix off-by-one read beyond the end of a string in string search.

Objects/stringlib/fastsearch.h

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -667,28 +667,17 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
667667
return res + count;
668668
}
669669
}
670-
671-
/* Miss: check if next character is part of pattern.
672-
Note that in contrast to default_find and default_rfind we do
673-
*not* need to prevent the algorithm from reading one character
674-
beyond the last character in the input that the pattern could
675-
start in. I.e. if i == w it is safe to read ss[i + 1] since the
676-
input and pattern length requirements on when this variant
677-
algorithm will be called ensure it will always be a valid part
678-
of the input. In that case it doesn't matter what the character
679-
read is since the loop will terminate regardless. */
680-
if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
670+
/* miss: check if next character is part of pattern */
671+
if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681672
i = i + m;
682673
}
683674
else {
684675
i = i + gap;
685676
}
686677
}
687678
else {
688-
/* Skip: check if next character is part of pattern.
689-
See comment above re safety of accessing ss[i+1] when i == w.
690-
*/
691-
if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
679+
/* skip: check if next character is part of pattern */
680+
if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
692681
i = i + m;
693682
}
694683
}

0 commit comments

Comments
 (0)