7
7
#include "pycore_pyhash.h" // _Py_HashSecret
8
8
#include "pycore_traceback.h" // _PyTraceback_Add()
9
9
10
+ #include <float.h> // FLT_MAX
10
11
#include <stdbool.h>
11
12
#include <stddef.h> // offsetof()
13
+
12
14
#include "expat.h"
13
15
#include "pyexpat.h"
14
16
@@ -138,31 +140,72 @@ set_error_attr(PyObject *err, const char *name, int value)
138
140
return 1 ;
139
141
}
140
142
143
+ static PyObject *
144
+ format_xml_error (enum XML_Error code , int lineno , int column )
145
+ {
146
+ const char * errmsg = XML_ErrorString (code );
147
+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (strlen (errmsg ) + 1 );
148
+ if (writer == NULL ) {
149
+ return NULL ;
150
+ }
151
+ if (PyUnicodeWriter_Format (writer ,
152
+ "%s: line %i, column %i" ,
153
+ errmsg , lineno , column ) < 0 )
154
+ {
155
+ PyUnicodeWriter_Discard (writer );
156
+ return NULL ;
157
+ }
158
+ return PyUnicodeWriter_Finish (writer );
159
+ }
160
+
161
+ static PyObject *
162
+ set_xml_error (pyexpat_state * state ,
163
+ enum XML_Error code , int lineno , int column ,
164
+ const char * errmsg )
165
+ {
166
+ PyObject * arg = errmsg == NULL
167
+ ? format_xml_error (code , lineno , column )
168
+ : PyUnicode_FromStringAndSize (errmsg , strlen (errmsg ));
169
+ if (arg == NULL ) {
170
+ return NULL ;
171
+ }
172
+ PyObject * res = PyObject_CallOneArg (state -> error , arg );
173
+ Py_DECREF (arg );
174
+ if (
175
+ res != NULL
176
+ && set_error_attr (res , "code" , code )
177
+ && set_error_attr (res , "lineno" , lineno )
178
+ && set_error_attr (res , "offset" , column )
179
+ ) {
180
+ PyErr_SetObject (state -> error , res );
181
+ Py_DECREF (res );
182
+ }
183
+ return NULL ;
184
+ }
185
+
186
+ #define SET_XML_ERROR (STATE , SELF , CODE , ERRMSG ) \
187
+ do { \
188
+ XML_Parser parser = SELF->itself; \
189
+ assert(parser != NULL); \
190
+ int lineno = XML_GetErrorLineNumber(parser); \
191
+ int column = XML_GetErrorColumnNumber(parser); \
192
+ (void)set_xml_error(state, CODE, lineno, column, ERRMSG); \
193
+ } while (0)
194
+
141
195
/* Build and set an Expat exception, including positioning
142
196
* information. Always returns NULL.
143
197
*/
144
198
static PyObject *
145
199
set_error (pyexpat_state * state , xmlparseobject * self , enum XML_Error code )
146
200
{
147
- PyObject * err ;
148
- PyObject * buffer ;
149
- XML_Parser parser = self -> itself ;
150
- int lineno = XML_GetErrorLineNumber (parser );
151
- int column = XML_GetErrorColumnNumber (parser );
201
+ SET_XML_ERROR (state , self , code , NULL );
202
+ return NULL ;
203
+ }
152
204
153
- buffer = PyUnicode_FromFormat ("%s: line %i, column %i" ,
154
- XML_ErrorString (code ), lineno , column );
155
- if (buffer == NULL )
156
- return NULL ;
157
- err = PyObject_CallOneArg (state -> error , buffer );
158
- Py_DECREF (buffer );
159
- if ( err != NULL
160
- && set_error_attr (err , "code" , code )
161
- && set_error_attr (err , "offset" , column )
162
- && set_error_attr (err , "lineno" , lineno )) {
163
- PyErr_SetObject (state -> error , err );
164
- }
165
- Py_XDECREF (err );
205
+ static PyObject *
206
+ set_invalid_arg (pyexpat_state * state , xmlparseobject * self , const char * errmsg )
207
+ {
208
+ SET_XML_ERROR (state , self , XML_ERROR_INVALID_ARGUMENT , errmsg );
166
209
return NULL ;
167
210
}
168
211
@@ -1133,6 +1176,89 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1133
1176
}
1134
1177
#endif
1135
1178
1179
+ #if XML_COMBINED_VERSION >= 20702
1180
+ /*[clinic input]
1181
+ @permit_long_summary
1182
+ @permit_long_docstring_body
1183
+ pyexpat.xmlparser.SetAllocTrackerMaximumAmplification
1184
+
1185
+ cls: defining_class
1186
+ max_factor: float
1187
+ /
1188
+
1189
+ Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.
1190
+
1191
+ By default, parsers objects have a maximum amplification factor of 100.
1192
+
1193
+ The amplification factor is calculated as "allocated / direct" while parsing,
1194
+ where "direct" is the number of bytes read from the primary document in parsing
1195
+ and "allocated" is the number of bytes of dynamic memory allocated in the parser
1196
+ hierarchy.
1197
+
1198
+ The 'max_factor' value must be a non-NaN floating point value greater than
1199
+ or equal to 1.0. Amplifications factors greater than 100 can been observed
1200
+ near the start of parsing even with benign files in practice. As such, the
1201
+ upper bound must be carefully chosen so to avoid false positives.
1202
+ [clinic start generated code]*/
1203
+
1204
+ static PyObject *
1205
+ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl (xmlparseobject * self ,
1206
+ PyTypeObject * cls ,
1207
+ float max_factor )
1208
+ /*[clinic end generated code: output=6e44bd48c9b112a0 input=18e8d07329c0efda]*/
1209
+ {
1210
+ assert (self -> itself != NULL );
1211
+ if (XML_SetAllocTrackerMaximumAmplification (self -> itself , max_factor ) == XML_TRUE ) {
1212
+ Py_RETURN_NONE ;
1213
+ }
1214
+ // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself
1215
+ // is not a root parser (currently, this is equivalent to be created
1216
+ // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0.
1217
+ //
1218
+ // Expat does not provide a way to determine whether a parser is a root
1219
+ // or not, nor does it provide a way to distinguish between failures in
1220
+ // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually
1221
+ // detect the factor out-of-range issue here so that users have a better
1222
+ // error message.
1223
+ pyexpat_state * state = PyType_GetModuleState (cls );
1224
+ const char * message = (isnan (max_factor ) || max_factor < 1.0f )
1225
+ ? "'max_factor' must be at least 1.0"
1226
+ : "parser must be a root parser" ;
1227
+ return set_invalid_arg (state , self , message );
1228
+ }
1229
+
1230
+ /*[clinic input]
1231
+ @permit_long_summary
1232
+ @permit_long_docstring_body
1233
+ pyexpat.xmlparser.SetAllocTrackerActivationThreshold
1234
+
1235
+ cls: defining_class
1236
+ threshold: unsigned_long_long
1237
+ /
1238
+
1239
+ Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.
1240
+
1241
+ By default, parsers objects have an allocation activation threshold of 64 MiB.
1242
+ [clinic start generated code]*/
1243
+
1244
+ static PyObject *
1245
+ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl (xmlparseobject * self ,
1246
+ PyTypeObject * cls ,
1247
+ unsigned long long threshold )
1248
+ /*[clinic end generated code: output=bed7e93207ba08c5 input=8453509a137a47c0]*/
1249
+ {
1250
+ assert (self -> itself != NULL );
1251
+ if (XML_SetAllocTrackerActivationThreshold (self -> itself , threshold ) == XML_TRUE ) {
1252
+ Py_RETURN_NONE ;
1253
+ }
1254
+ // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself
1255
+ // is not a root parser (currently, this is equivalent to be created
1256
+ // by ExternalEntityParserCreate()).
1257
+ pyexpat_state * state = PyType_GetModuleState (cls );
1258
+ return set_invalid_arg (state , self , "parser must be a root parser" );
1259
+ }
1260
+ #endif
1261
+
1136
1262
static struct PyMethodDef xmlparse_methods [] = {
1137
1263
PYEXPAT_XMLPARSER_PARSE_METHODDEF
1138
1264
PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
@@ -1141,9 +1267,9 @@ static struct PyMethodDef xmlparse_methods[] = {
1141
1267
PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1142
1268
PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1143
1269
PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1144
- #if XML_COMBINED_VERSION >= 19505
1145
1270
PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1146
- #endif
1271
+ PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF
1272
+ PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF
1147
1273
PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
1148
1274
PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
1149
1275
{NULL , NULL } /* sentinel */
0 commit comments