@@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
595595 continue ;
596596 }
597597 /* miss: check if next character is part of pattern */
598- if (i < w && !STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
598+ if (i + 1 <= w && !STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
599599 i = i + m ;
600600 }
601601 else {
@@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
604604 }
605605 else {
606606 /* skip: check if next character is part of pattern */
607- if (i < w && !STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
607+ if (i + 1 <= w && !STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
608608 i = i + m ;
609609 }
610610 }
@@ -667,7 +667,16 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
667667 return res + count ;
668668 }
669669 }
670- /* miss: check if next character is part of pattern */
670+
671+ /* Miss: check if next character is part of pattern.
672+ Note that in contrast to default_find and default_rfind we do
673+ *not* need to prevent the algorithm from reading one character
674+ beyond the last character in the input that the pattern could
675+ start in. I.e. if i == w it is safe to read ss[i + 1] since the
676+ input and pattern length requirements on when this variant
677+ algorithm will be called ensure it will always be a valid part
678+ of the input. In that case it doesn't matter what the character
679+ read is since the loop will terminate regardless. */
671680 if (!STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
672681 i = i + m ;
673682 }
@@ -676,7 +685,9 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
676685 }
677686 }
678687 else {
679- /* skip: check if next character is part of pattern */
688+ /* Skip: check if next character is part of pattern.
689+ See comment above re safety of accessing ss[i+1] when i == w.
690+ */
680691 if (!STRINGLIB_BLOOM (mask , ss [i + 1 ])) {
681692 i = i + m ;
682693 }
0 commit comments