Skip to content

Commit 3f7231a

Browse files
authored
PYTHON-3048 Fixed bug with incorrect validation of UTF-8 regex patterns (#970)
1 parent be3008a commit 3f7231a

File tree

6 files changed

+23
-190
lines changed

6 files changed

+23
-190
lines changed

THIRD-PARTY-NOTICES

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,26 +71,3 @@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
7171
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
7272
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
7373
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
74-
75-
3) License Notice for encoding_helpers.c
76-
----------------------------------------
77-
78-
Portions Copyright 2001 Unicode, Inc.
79-
80-
Disclaimer
81-
82-
This source code is provided as is by Unicode, Inc. No claims are
83-
made as to fitness for any particular purpose. No warranties of any
84-
kind are expressed or implied. The recipient agrees to determine
85-
applicability of information provided. If this file has been
86-
purchased on magnetic or optical media from Unicode, Inc., the
87-
sole remedy for any claim will be exchange of defective media
88-
within 90 days of receipt.
89-
90-
Limitations on Rights to Redistribute This Code
91-
92-
Unicode, Inc. hereby grants the right to freely use the information
93-
supplied in this file in the creation of products supporting the
94-
Unicode Standard, and to make copies of this file in any form
95-
for internal or external distribution as long as this notice
96-
remains attached.

bson/_cbsonmodule.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626

2727
#include "buffer.h"
2828
#include "time64.h"
29-
#include "encoding_helpers.h"
3029

3130
#define _CBSON_MODULE
3231
#include "_cbsonmodule.h"
@@ -553,12 +552,12 @@ static int _write_regex_to_buffer(
553552
PyObject* py_flags;
554553
PyObject* py_pattern;
555554
PyObject* encoded_pattern;
555+
PyObject* decoded_pattern;
556556
long int_flags;
557557
char flags[FLAGS_SIZE];
558558
char check_utf8 = 0;
559559
const char* pattern_data;
560560
int pattern_length, flags_length;
561-
result_t status;
562561

563562
/*
564563
* Both the builtin re type and our Regex class have attributes
@@ -597,18 +596,8 @@ static int _write_regex_to_buffer(
597596
Py_DECREF(encoded_pattern);
598597
return 0;
599598
}
600-
status = cbson_check_string((const unsigned char*)pattern_data,
601-
pattern_length, check_utf8, 1);
602-
if (status == NOT_UTF_8) {
603-
PyObject* InvalidStringData = _error("InvalidStringData");
604-
if (InvalidStringData) {
605-
PyErr_SetString(InvalidStringData,
606-
"regex patterns must be valid UTF-8");
607-
Py_DECREF(InvalidStringData);
608-
}
609-
Py_DECREF(encoded_pattern);
610-
return 0;
611-
} else if (status == HAS_NULL) {
599+
600+
if (strlen(pattern_data) != (size_t) pattern_length){
612601
PyObject* InvalidDocument = _error("InvalidDocument");
613602
if (InvalidDocument) {
614603
PyErr_SetString(InvalidDocument,
@@ -619,6 +608,22 @@ static int _write_regex_to_buffer(
619608
return 0;
620609
}
621610

611+
if (check_utf8) {
612+
decoded_pattern = PyUnicode_DecodeUTF8(pattern_data, (Py_ssize_t) pattern_length, NULL);
613+
if (decoded_pattern == NULL) {
614+
PyErr_Clear();
615+
PyObject* InvalidStringData = _error("InvalidStringData");
616+
if (InvalidStringData) {
617+
PyErr_SetString(InvalidStringData,
618+
"regex patterns must be valid UTF-8");
619+
Py_DECREF(InvalidStringData);
620+
}
621+
Py_DECREF(encoded_pattern);
622+
return 0;
623+
}
624+
Py_DECREF(decoded_pattern);
625+
}
626+
622627
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
623628
Py_DECREF(encoded_pattern);
624629
return 0;

bson/encoding_helpers.c

Lines changed: 0 additions & 118 deletions
This file was deleted.

bson/encoding_helpers.h

Lines changed: 0 additions & 29 deletions
This file was deleted.

doc/changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Bug fixes
1919

2020
- Fixed a bug where :meth:`~pymongo.collection.Collection.estimated_document_count`
2121
would fail with a "CommandNotSupportedOnView" error on views (`PYTHON-2885`_).
22+
- Fixed a bug where invalid UTF-8 strings could be passed as patterns for :class:`~bson.regex.Regex`
23+
objects (`PYTHON-3048`_). :func:`bson.encode` now correctly raises :class:`bson.errors.InvalidStringData`.
2224

2325
Unavoidable breaking changes
2426
............................
@@ -38,6 +40,7 @@ Issues Resolved
3840
See the `PyMongo 4.2 release notes in JIRA`_ for the list of resolved issues
3941
in this release.
4042

43+
.. _PYTHON-3048: https://jira.mongodb.org/browse/PYTHON-3048
4144
.. _PYTHON-2885: https://jira.mongodb.org/browse/PYTHON-2885
4245
.. _PYTHON-3167: https://jira.mongodb.org/browse/PYTHON-3167
4346
.. _PyMongo 4.2 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=33196

setup.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -255,12 +255,7 @@ def build_extension(self, ext):
255255
Extension(
256256
"bson._cbson",
257257
include_dirs=["bson"],
258-
sources=[
259-
"bson/_cbsonmodule.c",
260-
"bson/time64.c",
261-
"bson/buffer.c",
262-
"bson/encoding_helpers.c",
263-
],
258+
sources=["bson/_cbsonmodule.c", "bson/time64.c", "bson/buffer.c"],
264259
),
265260
Extension(
266261
"pymongo._cmessage",

0 commit comments

Comments
 (0)