Skip to content

Commit c1c23fb

Browse files
committed
Expose XML Expat 2.7.2 mitigation APIs
1 parent 81c975b commit c1c23fb

File tree

4 files changed

+285
-21
lines changed

4 files changed

+285
-21
lines changed

Modules/clinic/pyexpat.c.h

Lines changed: 135 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/expat/expat_external.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
#ifndef Expat_External_INCLUDED
4141
# define Expat_External_INCLUDED 1
42+
/* Required so that functions in expat.h are declared */
43+
#include "expat_config.h"
4244
/* Namespace external symbols to allow multiple libexpat version to
4345
co-exist. */
4446
#include "pyexpatns.h"

Modules/expat/pyexpatns.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@
8282
#define XmlPrologStateInit PyExpat_XmlPrologStateInit
8383
#define XmlPrologStateInitExternalEntity PyExpat_XmlPrologStateInitExternalEntity
8484
#define XML_ResumeParser PyExpat_XML_ResumeParser
85+
#define XML_SetAllocTrackerActivationThreshold PyExpat_XML_SetAllocTrackerActivationThreshold
86+
#define XML_SetAllocTrackerMaximumAmplification PyExpat_XML_SetAllocTrackerMaximumAmplification
8587
#define XML_SetAttlistDeclHandler PyExpat_XML_SetAttlistDeclHandler
8688
#define XML_SetBase PyExpat_XML_SetBase
8789
#define XML_SetBillionLaughsAttackProtectionActivationThreshold PyExpat_XML_SetBillionLaughsAttackProtectionActivationThreshold

Modules/pyexpat.c

Lines changed: 146 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
#include "pycore_pyhash.h" // _Py_HashSecret
88
#include "pycore_traceback.h" // _PyTraceback_Add()
99

10+
#include <float.h> // FLT_MAX
1011
#include <stdbool.h>
1112
#include <stddef.h> // offsetof()
13+
1214
#include "expat.h"
1315
#include "pyexpat.h"
1416

@@ -138,31 +140,72 @@ set_error_attr(PyObject *err, const char *name, int value)
138140
return 1;
139141
}
140142

143+
static PyObject *
144+
format_xml_error(enum XML_Error code, int lineno, int column)
145+
{
146+
const char *errmsg = XML_ErrorString(code);
147+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(strlen(errmsg) + 1);
148+
if (writer == NULL) {
149+
return NULL;
150+
}
151+
if (PyUnicodeWriter_Format(writer,
152+
"%s: line %i, column %i",
153+
errmsg, lineno, column) < 0)
154+
{
155+
PyUnicodeWriter_Discard(writer);
156+
return NULL;
157+
}
158+
return PyUnicodeWriter_Finish(writer);
159+
}
160+
161+
static PyObject *
162+
set_xml_error(pyexpat_state *state,
163+
enum XML_Error code, int lineno, int column,
164+
const char *errmsg)
165+
{
166+
PyObject *arg = errmsg == NULL
167+
? format_xml_error(code, lineno, column)
168+
: PyUnicode_FromStringAndSize(errmsg, strlen(errmsg));
169+
if (arg == NULL) {
170+
return NULL;
171+
}
172+
PyObject *res = PyObject_CallOneArg(state->error, arg);
173+
Py_DECREF(arg);
174+
if (
175+
res != NULL
176+
&& set_error_attr(res, "code", code)
177+
&& set_error_attr(res, "lineno", lineno)
178+
&& set_error_attr(res, "offset", column)
179+
) {
180+
PyErr_SetObject(state->error, res);
181+
Py_DECREF(res);
182+
}
183+
return NULL;
184+
}
185+
186+
#define SET_XML_ERROR(STATE, SELF, CODE, ERRMSG) \
187+
do { \
188+
XML_Parser parser = SELF->itself; \
189+
assert(parser != NULL); \
190+
int lineno = XML_GetErrorLineNumber(parser); \
191+
int column = XML_GetErrorColumnNumber(parser); \
192+
(void)set_xml_error(state, CODE, lineno, column, ERRMSG); \
193+
} while (0)
194+
141195
/* Build and set an Expat exception, including positioning
142196
* information. Always returns NULL.
143197
*/
144198
static PyObject *
145199
set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
146200
{
147-
PyObject *err;
148-
PyObject *buffer;
149-
XML_Parser parser = self->itself;
150-
int lineno = XML_GetErrorLineNumber(parser);
151-
int column = XML_GetErrorColumnNumber(parser);
201+
SET_XML_ERROR(state, self, code, NULL);
202+
return NULL;
203+
}
152204

153-
buffer = PyUnicode_FromFormat("%s: line %i, column %i",
154-
XML_ErrorString(code), lineno, column);
155-
if (buffer == NULL)
156-
return NULL;
157-
err = PyObject_CallOneArg(state->error, buffer);
158-
Py_DECREF(buffer);
159-
if ( err != NULL
160-
&& set_error_attr(err, "code", code)
161-
&& set_error_attr(err, "offset", column)
162-
&& set_error_attr(err, "lineno", lineno)) {
163-
PyErr_SetObject(state->error, err);
164-
}
165-
Py_XDECREF(err);
205+
static PyObject *
206+
set_invalid_arg(pyexpat_state *state, xmlparseobject *self, const char *errmsg)
207+
{
208+
SET_XML_ERROR(state, self, XML_ERROR_INVALID_ARGUMENT, errmsg);
166209
return NULL;
167210
}
168211

@@ -1133,6 +1176,89 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
11331176
}
11341177
#endif
11351178

1179+
#if XML_COMBINED_VERSION >= 20702
1180+
/*[clinic input]
1181+
@permit_long_summary
1182+
@permit_long_docstring_body
1183+
pyexpat.xmlparser.SetAllocTrackerMaximumAmplification
1184+
1185+
cls: defining_class
1186+
max_factor: float
1187+
/
1188+
1189+
Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.
1190+
1191+
By default, parsers objects have a maximum amplification factor of 100.
1192+
1193+
The amplification factor is calculated as "allocated / direct" while parsing,
1194+
where "direct" is the number of bytes read from the primary document in parsing
1195+
and "allocated" is the number of bytes of dynamic memory allocated in the parser
1196+
hierarchy.
1197+
1198+
The 'max_factor' value must be a non-NaN floating point value greater than
1199+
or equal to 1.0. Amplifications factors greater than 100 can been observed
1200+
near the start of parsing even with benign files in practice. As such, the
1201+
upper bound must be carefully chosen so to avoid false positives.
1202+
[clinic start generated code]*/
1203+
1204+
static PyObject *
1205+
pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self,
1206+
PyTypeObject *cls,
1207+
float max_factor)
1208+
/*[clinic end generated code: output=6e44bd48c9b112a0 input=18e8d07329c0efda]*/
1209+
{
1210+
assert(self->itself != NULL);
1211+
if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) {
1212+
Py_RETURN_NONE;
1213+
}
1214+
// XML_SetAllocTrackerMaximumAmplification() can fail if self->itself
1215+
// is not a root parser (currently, this is equivalent to be created
1216+
// by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0.
1217+
//
1218+
// Expat does not provide a way to determine whether a parser is a root
1219+
// or not, nor does it provide a way to distinguish between failures in
1220+
// XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually
1221+
// detect the factor out-of-range issue here so that users have a better
1222+
// error message.
1223+
pyexpat_state *state = PyType_GetModuleState(cls);
1224+
const char *message = (isnan(max_factor) || max_factor < 1.0f)
1225+
? "'max_factor' must be at least 1.0"
1226+
: "parser must be a root parser";
1227+
return set_invalid_arg(state, self, message);
1228+
}
1229+
1230+
/*[clinic input]
1231+
@permit_long_summary
1232+
@permit_long_docstring_body
1233+
pyexpat.xmlparser.SetAllocTrackerActivationThreshold
1234+
1235+
cls: defining_class
1236+
threshold: unsigned_long_long
1237+
/
1238+
1239+
Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.
1240+
1241+
By default, parsers objects have an allocation activation threshold of 64 MiB.
1242+
[clinic start generated code]*/
1243+
1244+
static PyObject *
1245+
pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self,
1246+
PyTypeObject *cls,
1247+
unsigned long long threshold)
1248+
/*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/
1249+
{
1250+
assert(self->itself != NULL);
1251+
if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) {
1252+
Py_RETURN_NONE;
1253+
}
1254+
// XML_SetAllocTrackerActivationThreshold() can only fail if self->itself
1255+
// is not a root parser (currently, this is equivalent to be created
1256+
// by ExternalEntityParserCreate()).
1257+
pyexpat_state *state = PyType_GetModuleState(cls);
1258+
return set_invalid_arg(state, self, "parser must be a root parser");
1259+
}
1260+
#endif
1261+
11361262
static struct PyMethodDef xmlparse_methods[] = {
11371263
PYEXPAT_XMLPARSER_PARSE_METHODDEF
11381264
PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
@@ -1141,9 +1267,9 @@ static struct PyMethodDef xmlparse_methods[] = {
11411267
PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
11421268
PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
11431269
PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1144-
#if XML_COMBINED_VERSION >= 19505
11451270
PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1146-
#endif
1271+
PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF
1272+
PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF
11471273
PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
11481274
PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
11491275
{NULL, NULL} /* sentinel */

0 commit comments

Comments
 (0)