diff --git a/src/main/java/build/buf/protovalidate/Format.java b/src/main/java/build/buf/protovalidate/Format.java index cfb42eb6..38a7008a 100644 --- a/src/main/java/build/buf/protovalidate/Format.java +++ b/src/main/java/build/buf/protovalidate/Format.java @@ -144,7 +144,9 @@ private static String formatString(Val val) { } return val.value().toString(); case Bytes: - return new String((byte[]) val.value(), StandardCharsets.UTF_8); + String byteStr = new String((byte[]) val.value(), StandardCharsets.UTF_8); + // Collapse any contiguous placeholders into one + return byteStr.replaceAll("\\ufffd+", "\ufffd"); case Double: Optional result = validateNumber(val); if (result.isPresent()) { diff --git a/src/test/java/build/buf/protovalidate/FormatTest.java b/src/test/java/build/buf/protovalidate/FormatTest.java index f63c91d4..88f51b39 100644 --- a/src/test/java/build/buf/protovalidate/FormatTest.java +++ b/src/test/java/build/buf/protovalidate/FormatTest.java @@ -30,10 +30,10 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Named; @@ -79,34 +79,33 @@ class FormatTest { "object inside map"); @BeforeAll - static void setUp() throws Exception { - byte[] encoded = - Files.readAllBytes( - Paths.get("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto")); - String data = new String(encoded, StandardCharsets.UTF_8); - SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder(); - TextFormat.getParser().merge(data, bldr); - SimpleTestFile testData = bldr.build(); - - List sections = testData.getSectionList(); + private static void setUp() throws Exception { + // The test data from the cel-spec conformance tests + List celSpecSections = + loadTestData("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto"); + // Our supplemental tests of functionality not in the cel conformance file, but defined in the + // spec. + List supplementalSections = + loadTestData("src/test/resources/testdata/string_ext_supplemental.textproto"); + + // Combine the test data from both files into one + List sections = + Stream.concat(celSpecSections.stream(), supplementalSections.stream()) + .collect(Collectors.toList()); // Find the format tests which test successful formatting - // Defaults to an empty list if nothing is found formatTests = sections.stream() .filter(s -> s.getName().equals("format")) - .findFirst() - .map(SimpleTestSection::getTestList) - .orElse(Collections.emptyList()); + .flatMap(s -> s.getTestList().stream()) + .collect(Collectors.toList()); // Find the format error tests which test errors during formatting - // Defaults to an empty list if nothing is found formatErrorTests = sections.stream() .filter(s -> s.getName().equals("format_errors")) - .findFirst() - .map(SimpleTestSection::getTestList) - .orElse(Collections.emptyList()); + .flatMap(s -> s.getTestList().stream()) + .collect(Collectors.toList()); env = Env.newEnv(Library.Lib(new ValidateLibrary())); } @@ -127,6 +126,17 @@ void testFormatError(SimpleTest test) { assertThat(result.getVal().type().typeEnum()).isEqualTo(TypeEnum.Err); } + // Loads test data from the given text format file + private static List loadTestData(String fileName) throws Exception { + byte[] encoded = Files.readAllBytes(Paths.get(fileName)); + String data = new String(encoded, StandardCharsets.UTF_8); + SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder(); + TextFormat.getParser().merge(data, bldr); + SimpleTestFile testData = bldr.build(); + + return testData.getSectionList(); + } + // Runs a test by extending the cel environment with the specified // types, variables and declarations, then evaluating it with the cel runtime. private static Program.EvalResult evaluate(SimpleTest test) { diff --git a/src/test/resources/testdata/string_ext_supplemental.textproto b/src/test/resources/testdata/string_ext_supplemental.textproto new file mode 100644 index 00000000..6f4402fc --- /dev/null +++ b/src/test/resources/testdata/string_ext_supplemental.textproto @@ -0,0 +1,26 @@ +# proto-file: ../../../proto/cel/expr/conformance/test/simple.proto +# proto-message: cel.expr.conformance.test.SimpleTestFile + +# Ideally these tests should be in the cel-spec conformance test suite. +# Until they are added, we can use this to test for additional functionality +# listed in the spec. + +name: "string_ext_supplemental" +description: "Supplemental tests for the strings extension library." +section: { + name: "format" + test: { + name: "bytes support for string with invalid utf-8 encoding" + expr: '"%s".format([b"\xF0abc\x8C\xF0xyz"])' + value: { + string_value: '\ufffdabc\ufffdxyz', + } + } + test: { + name: "bytes support for string with only invalid utf-8 sequences" + expr: '"%s".format([b"\xF0\x8C\xF0"])' + value: { + string_value: '\ufffd', + } + } +}