Tweak conditional phrasing to match loop terminating criteria, add comment

duaneg · duaneg · commit c07c23eb921b · 2025-06-22T11:49:30.000+12:00
explaining why a guard is not necessary in adaptive_find.
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
@@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
                 continue;
             }
             /* miss: check if next character is part of pattern */
-            if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
+            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
                 i = i + m;
             }
             else {
@@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
         }
         else {
             /* skip: check if next character is part of pattern */
-            if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
+            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
                 i = i + m;
             }
         }
@@ -667,7 +667,16 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
                     return res + count;
                 }
             }
-            /* miss: check if next character is part of pattern */
+
+            /* Miss: check if next character is part of pattern.
+               Note that in contrast to default_find and default_rfind we do
+               *not* need to prevent the algorithm from reading one character
+               beyond the last character in the input that the pattern could
+               start in. I.e. if i == w it is safe to read ss[i + 1] since the
+               input and pattern length requirements on when this variant
+               algorithm will be called ensure it will always be a valid part
+               of the input. In that case it doesn't matter what the character
+               read is since the loop will terminate regardless. */
             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
                 i = i + m;
             }
@@ -676,7 +685,9 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
             }
         }
         else {
-            /* skip: check if next character is part of pattern */
+            /* Skip: check if next character is part of pattern.
+               See comment above re safety of accessing ss[i+1] when i == w.
+             */
             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
                 i = i + m;
             }

Original file line number	Diff line number	Diff line change
`@@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,`
`595`	`595`	`continue;`
`596`	`596`	`}`
`597`	`597`	`/* miss: check if next character is part of pattern */`
`598`		`- if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {`
	`598`	`+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {`
`599`	`599`	`i = i + m;`
`600`	`600`	`}`
`601`	`601`	`else {`
`@@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,`
`604`	`604`	`}`
`605`	`605`	`else {`
`606`	`606`	`/* skip: check if next character is part of pattern */`
`607`		`- if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {`
	`607`	`+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {`
`608`	`608`	`i = i + m;`
`609`	`609`	`}`
`610`	`610`	`}`
`@@ -667,7 +667,16 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,`
`667`	`667`	`return res + count;`
`668`	`668`	`}`
`669`	`669`	`}`
`670`		`- /* miss: check if next character is part of pattern */`
	`670`	`+`
	`671`	`+ /* Miss: check if next character is part of pattern.`
	`672`	`+ Note that in contrast to default_find and default_rfind we do`
	`673`	`+ not need to prevent the algorithm from reading one character`
	`674`	`+ beyond the last character in the input that the pattern could`
	`675`	`+ start in. I.e. if i == w it is safe to read ss[i + 1] since the`
	`676`	`+ input and pattern length requirements on when this variant`
	`677`	`+ algorithm will be called ensure it will always be a valid part`
	`678`	`+ of the input. In that case it doesn't matter what the character`
	`679`	`+ read is since the loop will terminate regardless. */`
`671`	`680`	`if (!STRINGLIB_BLOOM(mask, ss[i+1])) {`
`672`	`681`	`i = i + m;`
`673`	`682`	`}`
`@@ -676,7 +685,9 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,`
`676`	`685`	`}`
`677`	`686`	`}`
`678`	`687`	`else {`
`679`		`- /* skip: check if next character is part of pattern */`
	`688`	`+ /* Skip: check if next character is part of pattern.`
	`689`	`+ See comment above re safety of accessing ss[i+1] when i == w.`
	`690`	`+ */`
`680`	`691`	`if (!STRINGLIB_BLOOM(mask, ss[i+1])) {`
`681`	`692`	`i = i + m;`
`682`	`693`	`}`