Skip to content

Commit 9f8b1cc

Browse files
committed
fix UBSan failures for PyExpat
1 parent 13cb8ca commit 9f8b1cc

File tree

2 files changed

+173
-49
lines changed

2 files changed

+173
-49
lines changed

Lib/test/test_pyexpat.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,14 @@ def test7(self):
436436
"<!--abc-->", "4", "<!--def-->", "5", "</a>"],
437437
"buffered text not properly split")
438438

439+
def test_noop(self):
440+
def handler(*args):
441+
parser.CharacterDataHandler = None
442+
443+
parser = expat.ParserCreate()
444+
parser.CharacterDataHandler = handler
445+
parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
446+
439447

440448
# Test handling of exception from callback:
441449
class HandlerExceptionTest(unittest.TestCase):

Modules/pyexpat.c

Lines changed: 165 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -98,26 +98,102 @@ typedef struct {
9898

9999
#define CHARACTER_DATA_BUFFER_SIZE 8192
100100

101-
typedef const void *xmlhandler;
101+
typedef union xmlhandler_union {
102+
// - XML_StartCdataSectionHandler
103+
// - XML_EndCdataSectionHandler
104+
// - XML_EndDoctypeDeclHandler
105+
void (*parser_only)(
106+
void *
107+
);
108+
// XML_NotStandaloneHandler
109+
int (*not_standalone)(
110+
void *
111+
);
112+
// - XML_EndElementHandler
113+
// - XML_EndNamespaceDeclHandler
114+
// - XML_CommentHandler
115+
void (*parser_and_data)(
116+
void *, const XML_Char *
117+
);
118+
// - XML_CharacterDataHandler
119+
// - XML_DefaultHandler
120+
// - XML_DefaultHandlerExpand
121+
// - XML_SkippedEntityHandler
122+
// - noop_character_data_handler
123+
void (*parser_and_data_and_int)(
124+
void *, const XML_Char *, int
125+
);
126+
// - XML_ProcessingInstructionHandler
127+
// - XML_StartNamespaceDeclHandler
128+
void (*parser_and_data_and_data)(
129+
void *, const XML_Char *, const XML_Char *
130+
);
131+
// XML_StartElementHandler
132+
void (*start_element)(
133+
void *, const XML_Char *, const XML_Char **
134+
);
135+
// XML_ElementDeclHandler
136+
void (*element_decl)(
137+
void *, const XML_Char *, XML_Content *
138+
);
139+
// XML_XmlDeclHandler
140+
void (*xml_decl)(
141+
void *, const XML_Char *, const XML_Char *, int
142+
);
143+
// XML_StartDoctypeDeclHandler
144+
void (*start_doctype_decl)(
145+
void *,
146+
const XML_Char *, const XML_Char *, const XML_Char *,
147+
int
148+
);
149+
// XML_NotationDeclHandler
150+
void (*notation_decl)(
151+
void *,
152+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
153+
);
154+
// XML_AttlistDeclHandler
155+
void (*attlist_decl)(
156+
void *,
157+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
158+
int
159+
);
160+
// XML_UnparsedEntityDeclHandler
161+
void (*unparsed_entity_decl)(
162+
void *,
163+
const XML_Char *, const XML_Char *,
164+
const XML_Char *, const XML_Char *, const XML_Char *
165+
);
166+
// XML_EntityDeclHandler
167+
void (*entity_decl)(
168+
void *,
169+
const XML_Char *, int,
170+
const XML_Char *, int,
171+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
172+
);
173+
// XML_ExternalEntityRefHandler
174+
int (*external_entity_ref)(
175+
XML_Parser,
176+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *
177+
);
178+
} xmlhandler_union;
179+
180+
typedef xmlhandler_union *xmlhandler;
102181
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
103182

104183
struct HandlerInfo {
105184
const char *name;
106185
xmlhandlersetter setter;
107-
xmlhandler handler;
186+
xmlhandler_union handler;
108187
PyGetSetDef getset;
109188
};
110189

111190
static struct HandlerInfo handler_info[64];
112191

113-
// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
114-
// handler API for each handler.
115-
static inline void _Py_NO_SANITIZE_UNDEFINED
192+
static inline void
116193
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
117194
XML_Parser xml_parser, xmlhandler xml_handler)
118195
{
119-
xmlhandlersetter setter = (xmlhandlersetter)handler_info->setter;
120-
setter(xml_parser, xml_handler);
196+
handler_info->setter(xml_parser, xml_handler);
121197
}
122198

123199
/* Set an integer attribute on the error object; return true on success,
@@ -1063,7 +1139,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10631139
if (handler != NULL) {
10641140
new_parser->handlers[i] = Py_NewRef(handler);
10651141
struct HandlerInfo info = handler_info[i];
1066-
CALL_XML_HANDLER_SETTER(&info, new_parser->itself, info.handler);
1142+
CALL_XML_HANDLER_SETTER(&info, new_parser->itself, &info.handler);
10671143
}
10681144
}
10691145

@@ -1315,15 +1391,23 @@ xmlparse_dealloc(PyObject *op)
13151391
Py_DECREF(tp);
13161392
}
13171393

1394+
static Py_ssize_t
1395+
xmlparse_handler_get_index(void *closure)
1396+
{
1397+
struct HandlerInfo *info = (struct HandlerInfo *)closure;
1398+
Py_ssize_t ind = (Py_ssize_t)(info - handler_info);
1399+
assert(ind >= 0);
1400+
assert(ind < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1401+
assert(info->name == handler_info[ind].name);
1402+
return ind;
1403+
}
13181404

13191405
static PyObject *
13201406
xmlparse_handler_getter(PyObject *op, void *closure)
13211407
{
13221408
xmlparseobject *self = xmlparseobject_CAST(op);
1323-
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
1324-
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1325-
int handlernum = (int)(hi - handler_info);
1326-
PyObject *result = self->handlers[handlernum];
1409+
Py_ssize_t ind = xmlparse_handler_get_index(closure);
1410+
PyObject *result = self->handlers[ind];
13271411
if (result == NULL) {
13281412
result = Py_None;
13291413
}
@@ -1334,9 +1418,7 @@ static int
13341418
xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
13351419
{
13361420
xmlparseobject *self = xmlparseobject_CAST(op);
1337-
struct HandlerInfo *hi = (struct HandlerInfo *)closure;
1338-
assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1339-
int handlernum = (int)(hi - handler_info);
1421+
Py_ssize_t handlernum = xmlparse_handler_get_index(closure);
13401422
if (v == NULL) {
13411423
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
13421424
return -1;
@@ -1365,13 +1447,18 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
13651447
elaborate system of handlers and state could remove the
13661448
C handler more effectively. */
13671449
if (handlernum == CharacterData && self->in_callback) {
1368-
c_handler = noop_character_data_handler;
1450+
// The cast to union type '(union T)' is NOT a cast in the regular
1451+
// sense but a constructor as it does not produce an lvalue. It
1452+
// is a valid construction as per ISO C11, §6.5.2.5.
1453+
*c_handler = (xmlhandler_union){
1454+
.parser_and_data_and_int = noop_character_data_handler
1455+
};
13691456
}
13701457
v = NULL;
13711458
}
13721459
else if (v != NULL) {
13731460
Py_INCREF(v);
1374-
c_handler = handler_info[handlernum].handler;
1461+
c_handler = &handler_info[handlernum].handler;
13751462
}
13761463
Py_XSETREF(self->handlers[handlernum], v);
13771464
CALL_XML_HANDLER_SETTER(&handler_info[handlernum], self->itself, c_handler);
@@ -2222,40 +2309,69 @@ clear_handlers(xmlparseobject *self, int initial)
22222309
}
22232310
}
22242311

2225-
static struct HandlerInfo handler_info[] = {
2226-
2227-
// The cast to `xmlhandlersetter` is needed as the signature of XML
2228-
// handler functions is not compatible with `xmlhandlersetter` since
2229-
// their second parameter is narrower than a `const void *`.
2230-
#define HANDLER_INFO(name) \
2231-
{#name, (xmlhandlersetter)XML_Set##name, my_##name},
2232-
2233-
HANDLER_INFO(StartElementHandler)
2234-
HANDLER_INFO(EndElementHandler)
2235-
HANDLER_INFO(ProcessingInstructionHandler)
2236-
HANDLER_INFO(CharacterDataHandler)
2237-
HANDLER_INFO(UnparsedEntityDeclHandler)
2238-
HANDLER_INFO(NotationDeclHandler)
2239-
HANDLER_INFO(StartNamespaceDeclHandler)
2240-
HANDLER_INFO(EndNamespaceDeclHandler)
2241-
HANDLER_INFO(CommentHandler)
2242-
HANDLER_INFO(StartCdataSectionHandler)
2243-
HANDLER_INFO(EndCdataSectionHandler)
2244-
HANDLER_INFO(DefaultHandler)
2245-
HANDLER_INFO(DefaultHandlerExpand)
2246-
HANDLER_INFO(NotStandaloneHandler)
2247-
HANDLER_INFO(ExternalEntityRefHandler)
2248-
HANDLER_INFO(StartDoctypeDeclHandler)
2249-
HANDLER_INFO(EndDoctypeDeclHandler)
2250-
HANDLER_INFO(EntityDeclHandler)
2251-
HANDLER_INFO(XmlDeclHandler)
2252-
HANDLER_INFO(ElementDeclHandler)
2253-
HANDLER_INFO(AttlistDeclHandler)
2312+
/*
2313+
* The handler type TYPE is typically useful to detect issues at compile time.
2314+
*/
2315+
#define SETTER_WRAPPER(NAME, MEMBER) \
2316+
static inline void \
2317+
my_Set ## NAME (XML_Parser parser, xmlhandler handle) \
2318+
{ \
2319+
(void)XML_Set ## NAME (parser, handle ? handle->MEMBER : NULL); \
2320+
}
2321+
SETTER_WRAPPER(StartElementHandler, start_element)
2322+
SETTER_WRAPPER(EndElementHandler, parser_and_data)
2323+
SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
2324+
SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
2325+
SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
2326+
SETTER_WRAPPER(NotationDeclHandler, notation_decl)
2327+
SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
2328+
SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
2329+
SETTER_WRAPPER(CommentHandler, parser_and_data)
2330+
SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
2331+
SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
2332+
SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
2333+
SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
2334+
SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
2335+
SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
2336+
SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
2337+
SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
2338+
SETTER_WRAPPER(EntityDeclHandler, entity_decl)
2339+
SETTER_WRAPPER(XmlDeclHandler, xml_decl)
2340+
SETTER_WRAPPER(ElementDeclHandler, element_decl)
2341+
SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
22542342
#if XML_COMBINED_VERSION >= 19504
2255-
HANDLER_INFO(SkippedEntityHandler)
2343+
SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
22562344
#endif
2345+
#undef SETTER_WRAPPER
22572346

2347+
static struct HandlerInfo handler_info[] = {
2348+
#define HANDLER_INFO(IND, NAME, MEMBER) \
2349+
[IND] = {#NAME, my_Set##NAME, {.MEMBER = my_##NAME}},
2350+
2351+
HANDLER_INFO(StartElement, StartElementHandler, start_element)
2352+
HANDLER_INFO(EndElement, EndElementHandler, parser_and_data)
2353+
HANDLER_INFO(ProcessingInstruction, ProcessingInstructionHandler, parser_and_data_and_data)
2354+
HANDLER_INFO(CharacterData, CharacterDataHandler, parser_and_data_and_int)
2355+
HANDLER_INFO(UnparsedEntityDecl, UnparsedEntityDeclHandler, unparsed_entity_decl)
2356+
HANDLER_INFO(NotationDecl, NotationDeclHandler, notation_decl)
2357+
HANDLER_INFO(StartNamespaceDecl, StartNamespaceDeclHandler, parser_and_data_and_data)
2358+
HANDLER_INFO(EndNamespaceDecl, EndNamespaceDeclHandler, parser_and_data)
2359+
HANDLER_INFO(Comment, CommentHandler, parser_and_data)
2360+
HANDLER_INFO(StartCdataSection, StartCdataSectionHandler, parser_only)
2361+
HANDLER_INFO(EndCdataSection, EndCdataSectionHandler, parser_only)
2362+
HANDLER_INFO(Default, DefaultHandler, parser_and_data_and_int)
2363+
HANDLER_INFO(DefaultHandlerExpand, DefaultHandlerExpand, parser_and_data_and_int)
2364+
HANDLER_INFO(NotStandalone, NotStandaloneHandler, not_standalone)
2365+
HANDLER_INFO(ExternalEntityRef, ExternalEntityRefHandler, external_entity_ref)
2366+
HANDLER_INFO(StartDoctypeDecl, StartDoctypeDeclHandler, start_doctype_decl)
2367+
HANDLER_INFO(EndDoctypeDecl, EndDoctypeDeclHandler, parser_only)
2368+
HANDLER_INFO(EntityDecl, EntityDeclHandler, entity_decl)
2369+
HANDLER_INFO(XmlDecl, XmlDeclHandler, xml_decl)
2370+
HANDLER_INFO(ElementDecl, ElementDeclHandler, element_decl)
2371+
HANDLER_INFO(AttlistDecl, AttlistDeclHandler, attlist_decl)
2372+
#if XML_COMBINED_VERSION >= 19504
2373+
HANDLER_INFO(SkippedEntity, SkippedEntityHandler, parser_and_data_and_int)
2374+
#endif
22582375
#undef HANDLER_INFO
2259-
2260-
{NULL, NULL, NULL} /* sentinel */
2376+
{NULL, NULL, {NULL}} /* sentinel */
22612377
};

0 commit comments

Comments
 (0)