Skip to content

Commit 459d493

Browse files
maurycyvstinner
andauthored
pythongh-140149: Use PyBytesWriter in _build_concatenated_bytes() (python#140150)
Use PyBytesWriter in action_helpers.c _build_concatenated_bytes(). 3x faster bytes concat in the parser. Co-authored-by: Victor Stinner <[email protected]>
1 parent 5f357f3 commit 459d493

File tree

2 files changed

+35
-6
lines changed

2 files changed

+35
-6
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Speed up parsing bytes literals concatenation by using PyBytesWriter API and
2+
a single memory allocation (about 3x faster).

Parser/action_helpers.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
16121612
Py_ssize_t len = asdl_seq_LEN(strings);
16131613
assert(len > 0);
16141614

1615-
PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1616-
16171615
/* Bytes literals never get a kind, but just for consistency
16181616
since they are represented as Constant nodes, we'll mirror
16191617
the same behavior as unicode strings for determining the
16201618
kind. */
1621-
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1619+
PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1620+
1621+
Py_ssize_t total = 0;
1622+
for (Py_ssize_t i = 0; i < len; i++) {
1623+
expr_ty elem = asdl_seq_GET(strings, i);
1624+
PyObject *bytes = elem->v.Constant.value;
1625+
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1626+
if (part > PY_SSIZE_T_MAX - total) {
1627+
PyErr_NoMemory();
1628+
return NULL;
1629+
}
1630+
total += part;
1631+
}
1632+
1633+
PyBytesWriter *writer = PyBytesWriter_Create(total);
1634+
if (writer == NULL) {
1635+
return NULL;
1636+
}
1637+
char *out = PyBytesWriter_GetData(writer);
1638+
16221639
for (Py_ssize_t i = 0; i < len; i++) {
16231640
expr_ty elem = asdl_seq_GET(strings, i);
1624-
PyBytes_Concat(&res, elem->v.Constant.value);
1641+
PyObject *bytes = elem->v.Constant.value;
1642+
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1643+
if (part > 0) {
1644+
memcpy(out, PyBytes_AS_STRING(bytes), part);
1645+
out += part;
1646+
}
16251647
}
1626-
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
1627-
Py_XDECREF(res);
1648+
1649+
PyObject *res = PyBytesWriter_Finish(writer);
1650+
if (res == NULL) {
1651+
return NULL;
1652+
}
1653+
if (_PyArena_AddPyObject(arena, res) < 0) {
1654+
Py_DECREF(res);
16281655
return NULL;
16291656
}
16301657
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);

0 commit comments

Comments
 (0)