Skip to content

Commit 33cfeb7

Browse files
committed
gh-139156: Optimize the UTF-7 encoder
Remove base64SetO and base64WhiteSpace parameters.
1 parent 49e83e3 commit 33cfeb7

File tree

3 files changed

+6
-13
lines changed

3 files changed

+6
-13
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,6 @@ extern int _PyUnicodeWriter_FormatV(
9292

9393
extern PyObject* _PyUnicode_EncodeUTF7(
9494
PyObject *unicode, /* Unicode object */
95-
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
96-
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
9795
const char *errors); /* error handling */
9896

9997
/* --- UTF-8 Codecs ------------------------------------------------------- */

Modules/_codecsmodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
671671
const char *errors)
672672
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
673673
{
674-
return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
674+
return codec_tuple(_PyUnicode_EncodeUTF7(str, errors),
675675
PyUnicode_GET_LENGTH(str));
676676
}
677677

Objects/unicodeobject.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4668,15 +4668,12 @@ char utf7_category[128] = {
46684668

46694669
/* ENCODE_DIRECT: this character should be encoded as itself. The
46704670
* answer depends on whether we are encoding set O as itself, and also
4671-
* on whether we are encoding whitespace as itself. RFC2152 makes it
4671+
* on whether we are encoding whitespace as itself. RFC 2152 makes it
46724672
* clear that the answers to these questions vary between
46734673
* applications, so this code needs to be flexible. */
46744674

4675-
#define ENCODE_DIRECT(c, directO, directWS) \
4676-
((c) < 128 && (c) > 0 && \
4677-
((utf7_category[(c)] == 0) || \
4678-
(directWS && (utf7_category[(c)] == 2)) || \
4679-
(directO && (utf7_category[(c)] == 1))))
4675+
#define ENCODE_DIRECT(c) \
4676+
((c) < 128 && (c) > 0 && ((utf7_category[(c)] != 3)))
46804677

46814678
PyObject *
46824679
PyUnicode_DecodeUTF7(const char *s,
@@ -4893,8 +4890,6 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
48934890

48944891
PyObject *
48954892
_PyUnicode_EncodeUTF7(PyObject *str,
4896-
int base64SetO,
4897-
int base64WhiteSpace,
48984893
const char *errors)
48994894
{
49004895
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
@@ -4921,7 +4916,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
49214916
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
49224917

49234918
if (inShift) {
4924-
if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
4919+
if (ENCODE_DIRECT(ch)) {
49254920
/* shifting out */
49264921
if (base64bits) { /* output remaining bits */
49274922
*out++ = TO_BASE64(base64buffer << (6-base64bits));
@@ -4945,7 +4940,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
49454940
*out++ = '+';
49464941
*out++ = '-';
49474942
}
4948-
else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
4943+
else if (ENCODE_DIRECT(ch)) {
49494944
*out++ = (char) ch;
49504945
}
49514946
else {

0 commit comments

Comments
 (0)