From 88eba4d72c45eae88f8d2e7e45c2ae76f20d4b78 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 28 May 2025 13:37:12 -0400 Subject: [PATCH 1/3] Add tests for invalid utf8 when formatting --- tests/simple/testdata/string_ext.textproto | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/simple/testdata/string_ext.textproto b/tests/simple/testdata/string_ext.textproto index d2455583..2aeb42a8 100644 --- a/tests/simple/testdata/string_ext.textproto +++ b/tests/simple/testdata/string_ext.textproto @@ -632,6 +632,20 @@ section: { string_value: 'xyz', } } + test: { + name: "bytes support for string with invalid utf-8 sequences" + expr: '"%s".format([b"\xF0abc\x8Cxyz"])' + value: { + string_value: '\ufffdabc\ufffdxyz', + } + } + test: { + name: "bytes support for string with multiple adjacent invalid utf-8 sequences" + expr: '"%s".format([b"\xF0\x8C\xF0"])' + value: { + string_value: '\ufffd', + } + } test: { name: "type() support for string" expr: '"%s".format([type("test string")])' From bbf70658e7192eb030ca8b67e8d1647ac66f72ab Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 28 May 2025 13:59:37 -0400 Subject: [PATCH 2/3] Fix test --- tests/simple/testdata/string_ext.textproto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/simple/testdata/string_ext.textproto b/tests/simple/testdata/string_ext.textproto index 2aeb42a8..76b2fe4a 100644 --- a/tests/simple/testdata/string_ext.textproto +++ b/tests/simple/testdata/string_ext.textproto @@ -634,13 +634,13 @@ section: { } test: { name: "bytes support for string with invalid utf-8 sequences" - expr: '"%s".format([b"\xF0abc\x8Cxyz"])' + expr: '"%s".format([b"\xF0abc\x8C\xF0xyz"])' value: { string_value: '\ufffdabc\ufffdxyz', } } test: { - name: "bytes support for string with multiple adjacent invalid utf-8 sequences" + name: "bytes support for string with only invalid utf-8 sequences" expr: '"%s".format([b"\xF0\x8C\xF0"])' value: { string_value: '\ufffd', From 375fe9db1a6f7ced4f14b2981e17ca57d7b96335 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Thu, 29 May 2025 17:44:15 -0400 Subject: [PATCH 3/3] Escape invalid utf-8 bytes --- tests/simple/testdata/string_ext.textproto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/simple/testdata/string_ext.textproto b/tests/simple/testdata/string_ext.textproto index 76b2fe4a..c8ebf3d0 100644 --- a/tests/simple/testdata/string_ext.textproto +++ b/tests/simple/testdata/string_ext.textproto @@ -634,14 +634,14 @@ section: { } test: { name: "bytes support for string with invalid utf-8 sequences" - expr: '"%s".format([b"\xF0abc\x8C\xF0xyz"])' + expr: '"%s".format([b"\\xF0abc\\x8C\\xF0xyz"])' value: { string_value: '\ufffdabc\ufffdxyz', } } test: { name: "bytes support for string with only invalid utf-8 sequences" - expr: '"%s".format([b"\xF0\x8C\xF0"])' + expr: '"%s".format([b"\\xF0\\x8C\\xF0"])' value: { string_value: '\ufffd', }