Skip to content

Commit 0f4ab0d

Browse files
committed
bugfix: startswith / endswith does not overflow when suffix size > str size
The previous implementation of startswith / endswith would overflow when the suffix size > str size, due to an issue with the __substrcmp(str, suffix, lower ) implementation. The updated fix was to re-implement both of these functions looking at the python 2.7 source, which relies on the _string_tailmatch convenience function. Addresses issue #2
1 parent 38b83f0 commit 0f4ab0d

File tree

3 files changed

+105
-58
lines changed

3 files changed

+105
-58
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
LIBTOOL = libtool
22
LIBDIR = /usr/lib
33
CXX = g++
4-
CXXFLAGS = -g -O3 -Wall
4+
CXXFLAGS = -g -O3 -Wall -Wextra -Wshadow -Wconversion -Wcast-qual -Wformat=2
55

66
all: libpystring.la
77

@@ -20,5 +20,5 @@ clean:
2020
.PHONY: test
2121
test:
2222
$(RM) -fr test
23-
$(CXX) pystring.cpp test.cpp -o test
23+
$(CXX) pystring.cpp test.cpp $(CXXFLAGS) -o test
2424
./test

pystring.cpp

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,31 @@
4242
namespace pystring
4343
{
4444

45+
// This definition codes from configure.in in the python src.
46+
// Strictly speaking this limits us to str sizes of 2**31.
47+
// Should we wish to handle this limit, we could use an architecture
48+
// specific #defines and read from ssize_t (unistd.h) if the header exists.
49+
// But in the meantime, the use of int assures maximum arch compatibility.
50+
// This must also equal the size used in the end = MAX_32BIT_INT default arg.
51+
52+
typedef int Py_ssize_t;
53+
54+
/* helper macro to fixup start/end slice values */
55+
#define ADJUST_INDICES(start, end, len) \
56+
if (end > len) \
57+
end = len; \
58+
else if (end < 0) { \
59+
end += len; \
60+
if (end < 0) \
61+
end = 0; \
62+
} \
63+
if (start < 0) { \
64+
start += len; \
65+
if (start < 0) \
66+
start = 0; \
67+
}
68+
69+
4570
namespace {
4671

4772
//////////////////////////////////////////////////////////////////////////////////////////////
@@ -213,29 +238,6 @@ namespace pystring
213238
#define RIGHTSTRIP 1
214239
#define BOTHSTRIP 2
215240

216-
//////////////////////////////////////////////////////////////////////////////////////////////
217-
///
218-
///
219-
bool __substrcmp( const std::string & str, const std::string & str2, std::string::size_type pos )
220-
{
221-
std::string::size_type len = str.size(), len2 = str2.size();
222-
if ( pos + len2 > len )
223-
{
224-
return false;
225-
}
226-
227-
for ( std::string::size_type i = 0; i < len2; ++i )
228-
{
229-
230-
if ( str[pos + i] != str2[i] )
231-
{
232-
return false;
233-
}
234-
}
235-
236-
return true;
237-
}
238-
239241
//////////////////////////////////////////////////////////////////////////////////////////////
240242
///
241243
///
@@ -432,44 +434,61 @@ namespace pystring
432434
//////////////////////////////////////////////////////////////////////////////////////////////
433435
///
434436
///
435-
bool startswith( const std::string & str, const std::string & prefix, int start, int end )
437+
438+
namespace
436439
{
437-
int startp, endp;
440+
/* Matches the end (direction >= 0) or start (direction < 0) of self
441+
* against substr, using the start and end arguments. Returns
442+
* -1 on error, 0 if not found and 1 if found.
443+
*/
438444

439-
startp = __adjustslicepos( str.size(), start );
440-
endp = __adjustslicepos( str.size(), end );
441-
442-
if ( start > (int) str.size() ) return false;
443-
444-
if ( endp - startp < (int) prefix.size() ) return false;
445-
return __substrcmp( str, prefix, startp );
446-
445+
int _string_tailmatch(const std::string & self, const std::string & substr,
446+
Py_ssize_t start, Py_ssize_t end,
447+
int direction)
448+
{
449+
Py_ssize_t len = (Py_ssize_t) self.size();
450+
Py_ssize_t slen = (Py_ssize_t) substr.size();
451+
452+
const char* sub = substr.c_str();
453+
const char* str = self.c_str();
454+
455+
ADJUST_INDICES(start, end, len);
456+
457+
if (direction < 0) {
458+
// startswith
459+
if (start+slen > len)
460+
return 0;
461+
} else {
462+
// endswith
463+
if (end-start < slen || start > len)
464+
return 0;
465+
if (end-slen > start)
466+
start = end - slen;
467+
}
468+
if (end-start >= slen)
469+
return (!std::memcmp(str+start, sub, slen));
470+
471+
return 0;
472+
}
447473
}
448-
449-
//////////////////////////////////////////////////////////////////////////////////////////////
450-
///
451-
///
474+
452475
bool endswith( const std::string & str, const std::string & suffix, int start, int end )
453476
{
454-
int startp, endp;
455-
456-
startp = __adjustslicepos( str.size(), start );
457-
endp = __adjustslicepos( str.size(), end );
477+
int result = _string_tailmatch(str, suffix,
478+
(Py_ssize_t) start, (Py_ssize_t) end, +1);
479+
//if (result == -1) // TODO: Error condition
458480

459-
int suffixsize = (int) suffix.size();
460-
int upper = endp;
461-
int lower = ( upper - suffixsize ) > startp ? ( upper - suffixsize ) : startp;
462-
463-
if ( start > (int) str.size() ) return false;
464-
465-
466-
if ( upper - lower < suffixsize )
467-
{
468-
return false;
469-
}
470-
471-
472-
return __substrcmp(str, suffix, lower );
481+
return static_cast<bool>(result);
482+
}
483+
484+
485+
bool startswith( const std::string & str, const std::string & prefix, int start, int end )
486+
{
487+
int result = _string_tailmatch(str, prefix,
488+
(Py_ssize_t) start, (Py_ssize_t) end, -1);
489+
//if (result == -1) // TODO: Error condition
490+
491+
return static_cast<bool>(result);
473492
}
474493

475494
//////////////////////////////////////////////////////////////////////////////////////////////
@@ -735,7 +754,7 @@ namespace pystring
735754

736755
if ( table.size() != 256 )
737756
{
738-
//raise exception instead
757+
// TODO : raise exception instead
739758
return str;
740759
}
741760

test.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,39 @@ PYSTRING_TEST_APP(PyStringUnitTests)
88

99
PYSTRING_ADD_TEST(pystring, endswith)
1010
{
11+
PYSTRING_CHECK_EQUAL(pystring::endswith("", ""), true);
12+
PYSTRING_CHECK_EQUAL(pystring::endswith("", "a"), false);
13+
PYSTRING_CHECK_EQUAL(pystring::endswith("a", ""), true);
1114
PYSTRING_CHECK_EQUAL(pystring::endswith("", ".mesh"), false);
1215
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh"), false);
16+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", 0), false);
17+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", 1), false);
18+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", 1, 2), false);
19+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", 1, 1), false);
20+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", 1, -1), false);
21+
PYSTRING_CHECK_EQUAL(pystring::endswith("help", ".mesh", -1), false);
1322
PYSTRING_CHECK_EQUAL(pystring::endswith(".mesh", ".mesh"), true);
1423
PYSTRING_CHECK_EQUAL(pystring::endswith("a.mesh", ".mesh"), true);
1524
PYSTRING_CHECK_EQUAL(pystring::endswith("a.", "."), true);
25+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "ef"), true);
26+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "cdef"), true);
27+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "cdef", 2), true);
28+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "cdef", 3), false);
29+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "cdef", 2, 3), false);
30+
PYSTRING_CHECK_EQUAL(pystring::endswith("abcdef", "cdef", -10), true);
31+
}
32+
33+
PYSTRING_ADD_TEST(pystring, startswith)
34+
{
35+
PYSTRING_CHECK_EQUAL(pystring::startswith("", ""), true);
36+
PYSTRING_CHECK_EQUAL(pystring::startswith("", "a"), false);
37+
PYSTRING_CHECK_EQUAL(pystring::startswith("a", ""), true);
38+
PYSTRING_CHECK_EQUAL(pystring::startswith("abc", "ab"), true);
39+
PYSTRING_CHECK_EQUAL(pystring::startswith("abc", "abc"), true);
40+
PYSTRING_CHECK_EQUAL(pystring::startswith("abc", "abcd"), false);
41+
PYSTRING_CHECK_EQUAL(pystring::startswith("abcdef", "abc"), true);
42+
PYSTRING_CHECK_EQUAL(pystring::startswith("abcdef", "abc", 1), false);
43+
PYSTRING_CHECK_EQUAL(pystring::startswith("abcdef", "bc", 1), true);
1644
}
1745

1846
PYSTRING_ADD_TEST(pystring, strip)

0 commit comments

Comments
 (0)