diff --git a/cucumber-core/src/main/java/io/cucumber/core/snippets/GherkinKeywordNormalizer.java b/cucumber-core/src/main/java/io/cucumber/core/snippets/GherkinKeywordNormalizer.java new file mode 100644 index 0000000000..beb7a53346 --- /dev/null +++ b/cucumber-core/src/main/java/io/cucumber/core/snippets/GherkinKeywordNormalizer.java @@ -0,0 +1,65 @@ +package io.cucumber.core.snippets; + +import org.apiguardian.api.API; + +import java.text.Normalizer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.joining; + +@API(status = API.Status.INTERNAL) +public final class GherkinKeywordNormalizer { + + private GherkinKeywordNormalizer() { + /* no-op */ + } + + public static String normalizeKeyword(String language, String keyword) { + // Exception: Use the symbol names for the Emoj language. + // Emoji are not legal identifiers in Java. + if ("em".equals(language)) { + return normalizeEmojiKeyword(keyword); + } + return normalizeKeyword(keyword); + } + + public static String normalizeLanguage(String language) { + return language.replaceAll("[\\s-]", "_").toLowerCase(); + } + + private static String normalizeKeyword(String keyword) { + return normalize(keyword.replaceAll("[\\s',!\u00AD’]", "")); + } + + private static String normalizeEmojiKeyword(String keyword) { + String titleCasedName = getCodePoints(keyword).mapToObj(Character::getName) + .map(s -> s.split(" ")) + .flatMap(Arrays::stream) + .map(String::toLowerCase) + .map(GherkinKeywordNormalizer::capitalize) + .collect(joining(" ")); + return normalizeKeyword(titleCasedName); + } + + private static String capitalize(String str) { + return str.substring(0, 1).toUpperCase() + str.substring(1); + } + + private static String normalize(CharSequence s) { + return Normalizer.normalize(s, Normalizer.Form.NFC); + } + + private static IntStream getCodePoints(String s) { + int length = s.length(); + List codePoints = new ArrayList<>(); + for (int offset = 0; offset < length;) { + int codepoint = s.codePointAt(offset); + codePoints.add(codepoint); + offset += Character.charCount(codepoint); + } + return codePoints.stream().mapToInt(value -> value); + } +} diff --git a/cucumber-core/src/main/java/io/cucumber/core/snippets/SnippetGenerator.java b/cucumber-core/src/main/java/io/cucumber/core/snippets/SnippetGenerator.java index 536731968f..d4e9ed3176 100644 --- a/cucumber-core/src/main/java/io/cucumber/core/snippets/SnippetGenerator.java +++ b/cucumber-core/src/main/java/io/cucumber/core/snippets/SnippetGenerator.java @@ -12,20 +12,16 @@ import io.cucumber.plugin.event.StepArgument; import java.lang.reflect.Type; -import java.text.Normalizer; -import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; -import java.util.stream.IntStream; import java.util.stream.Stream; +import static io.cucumber.core.snippets.GherkinKeywordNormalizer.normalizeKeyword; import static io.cucumber.core.snippets.SnippetType.CAMELCASE; -import static java.util.stream.Collectors.joining; public final class SnippetGenerator { @@ -72,7 +68,7 @@ private String createSnippet( List parameterNames = toParameterNames(expression, parameterNameGenerator); Map arguments = arguments(step, parameterNames, expression.getParameterTypes()); return snippet.template().format(new String[] { - getNormalizedKeyWord(language, keyword), + normalizeKeyword(language, keyword), snippet.escapePattern(source), functionName, snippet.arguments(arguments), @@ -88,48 +84,6 @@ private List toParameterNames(GeneratedExpression expression, Identifier .collect(Collectors.toList()); } - private static String capitalize(String str) { - return str.substring(0, 1).toUpperCase() + str.substring(1); - } - - private static String getNormalizedKeyWord(String language, String keyword) { - // Exception: Use the symbol names for the Emoj language. - // Emoji are not legal identifiers in Java. - if ("em".equals(language)) { - return getNormalizedEmojiKeyWord(keyword); - } - return getNormalizedKeyWord(keyword); - } - - private static String getNormalizedEmojiKeyWord(String keyword) { - String titleCasedName = getCodePoints(keyword).mapToObj(Character::getName) - .map(s -> s.split(" ")) - .flatMap(Arrays::stream) - .map(String::toLowerCase) - .map(SnippetGenerator::capitalize) - .collect(joining(" ")); - return getNormalizedKeyWord(titleCasedName); - } - - private static IntStream getCodePoints(String s) { - int length = s.length(); - List codePoints = new ArrayList<>(); - for (int offset = 0; offset < length;) { - int codepoint = s.codePointAt(offset); - codePoints.add(codepoint); - offset += Character.charCount(codepoint); - } - return codePoints.stream().mapToInt(value -> value); - } - - private static String getNormalizedKeyWord(String keyword) { - return normalize(keyword.replaceAll("[\\s',!\u00AD’]", "")); - } - - static String normalize(CharSequence s) { - return Normalizer.normalize(s, Normalizer.Form.NFC); - } - private String functionName(String sentence, IdentifierGenerator functionNameGenerator) { String functionName = Stream.of(sentence) .map(DEFAULT_ARGUMENT_PATTERN::replaceMatchesWithSpace) diff --git a/cucumber-core/src/test/java/io/cucumber/core/snippets/GherkinKeywordNormalizerTest.java b/cucumber-core/src/test/java/io/cucumber/core/snippets/GherkinKeywordNormalizerTest.java new file mode 100644 index 0000000000..9af5f59623 --- /dev/null +++ b/cucumber-core/src/test/java/io/cucumber/core/snippets/GherkinKeywordNormalizerTest.java @@ -0,0 +1,36 @@ +package io.cucumber.core.snippets; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class GherkinKeywordNormalizerTest { + + @ParameterizedTest + @CsvSource({ + "en, G I V E N, GIVEN", + "en, Given', Given", + "en, Hyphen\u00ADated, Hyphenated", + "fr, Sc\u0065\u0301nario, Scénario", + "ar, مثا ل ,مثال", + "em, \uD83C\uDFE6, Bank", + "em, ✅, WhiteHeavyCheckMark" + }) + void shouldNormalizeKeyword(String language, String keyword, String expected) { + String normalizedKeyword = GherkinKeywordNormalizer.normalizeKeyword(language, keyword); + assertEquals(expected, normalizedKeyword); + } + + @ParameterizedTest + @CsvSource({ + "En US, en_us", + "en-tx, en_tx", + "AR, ar", + "cy-GB, cy_gb" + }) + void normalizeLanguage(String language, String expected) { + String normalizedLanguage = GherkinKeywordNormalizer.normalizeLanguage(language); + assertEquals(expected, normalizedLanguage); + } +} diff --git a/cucumber-java/src/codegen/java/GenerateI18n.java b/cucumber-java/src/codegen/java/GenerateI18n.java index 2cf7e1adb8..d571d1fb39 100644 --- a/cucumber-java/src/codegen/java/GenerateI18n.java +++ b/cucumber-java/src/codegen/java/GenerateI18n.java @@ -9,18 +9,17 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.text.Normalizer; -import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import static io.cucumber.core.snippets.GherkinKeywordNormalizer.normalizeKeyword; +import static io.cucumber.core.snippets.GherkinKeywordNormalizer.normalizeLanguage; import static java.nio.file.Files.newBufferedWriter; import static java.nio.file.StandardOpenOption.CREATE; import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; -import static java.util.stream.Collectors.joining; /* This class generates the cucumber-java Interfaces and package-info * based on the languages and keywords from the GherkinDialects @@ -79,8 +78,8 @@ private void writeKeyWordAnnotations(GherkinDialect dialect) { } private void writeKeyWordAnnotation(GherkinDialect dialect, String keyword) { - String normalizedLanguage = getNormalizedLanguage(dialect); - String normalizedKeyword = getNormalizedKeyWord(dialect, keyword); + String normalizedLanguage = normalizeLanguage(dialect.getLanguage()); + String normalizedKeyword = normalizeKeyword(dialect.getLanguage(), keyword); Map binding = new LinkedHashMap<>(); binding.put("lang", normalizedLanguage); @@ -103,39 +102,8 @@ private void writeKeyWordAnnotation(GherkinDialect dialect, String keyword) { } } - private static String capitalize(String s) { - return s.substring(0, 1).toUpperCase() + s.substring(1); - } - - private static String getNormalizedKeyWord(GherkinDialect dialect, String keyword) { - // Exception: Use the symbol names for the Emoj language. - // Emoji are not legal identifiers in Java. - if (dialect.getLanguage().equals("em")) { - return getNormalizedEmojiKeyWord(keyword); - } - return getNormalizedKeyWord(keyword); - } - - private static String getNormalizedEmojiKeyWord(String keyword) { - String titleCasedName = keyword.codePoints().mapToObj(Character::getName) - .map(s -> s.split(" ")) - .flatMap(Arrays::stream) - .map(String::toLowerCase) - .map(DialectWriter::capitalize) - .collect(joining(" ")); - return getNormalizedKeyWord(titleCasedName); - } - - private static String getNormalizedKeyWord(String keyword) { - return normalize(keyword.replaceAll("[\\s',!\u00AD’]", "")); - } - - private static String normalize(CharSequence s) { - return Normalizer.normalize(s, Normalizer.Form.NFC); - } - private void writePackageInfo(GherkinDialect dialect) { - String normalizedLanguage = getNormalizedLanguage(dialect); + String normalizedLanguage = normalizeLanguage(dialect.getLanguage()); String languageName = dialect.getName(); if (!dialect.getName().equals(dialect.getNativeName())) { languageName += " - " + dialect.getNativeName(); @@ -155,9 +123,5 @@ private void writePackageInfo(GherkinDialect dialect) { } } - private static String getNormalizedLanguage(GherkinDialect dialect) { - return dialect.getLanguage().replaceAll("[\\s-]", "_").toLowerCase(); - } - } } diff --git a/cucumber-java8/src/codegen/java/GenerateI18n.java b/cucumber-java8/src/codegen/java/GenerateI18n.java index 547b21a1c4..e8ba3d2098 100644 --- a/cucumber-java8/src/codegen/java/GenerateI18n.java +++ b/cucumber-java8/src/codegen/java/GenerateI18n.java @@ -9,18 +9,17 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.text.Normalizer; -import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import static io.cucumber.core.snippets.GherkinKeywordNormalizer.normalizeKeyword; +import static io.cucumber.core.snippets.GherkinKeywordNormalizer.normalizeLanguage; import static java.nio.file.Files.newBufferedWriter; import static java.nio.file.StandardOpenOption.CREATE; import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; -import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.toList; /* This class generates the cucumber-java Interfaces and package-info @@ -69,7 +68,7 @@ void writeDialect(GherkinDialect dialect) { } private void writeInterface(GherkinDialect dialect) { - String normalizedLanguage = getNormalizedLanguage(dialect); + String normalizedLanguage = normalizeLanguage(dialect.getLanguage()); String languageName = dialect.getName(); if (!dialect.getName().equals(dialect.getNativeName())) { languageName += " - " + dialect.getNativeName(); @@ -98,43 +97,12 @@ private static List extractKeywords(GherkinDialect dialect) { .filter(it -> !it.contains(String.valueOf('*'))) .filter(it -> !it.matches("^\\d.*")) .distinct() - .map(keyword -> getNormalizedKeyWord(dialect, keyword)) + .map(keyword -> normalizeKeyword(dialect.getLanguage(), keyword)) .collect(toList()); } - private static String capitalize(String str) { - return str.substring(0, 1).toUpperCase() + str.substring(1); - } - - private static String getNormalizedKeyWord(GherkinDialect dialect, String keyword) { - // Exception: Use the symbol names for the Emoj language. - // Emoji are not legal identifiers in Java. - if (dialect.getLanguage().equals("em")) { - return getNormalizedEmojiKeyWord(keyword); - } - return getNormalizedKeyWord(keyword); - } - - private static String getNormalizedEmojiKeyWord(String keyword) { - String titleCasedName = keyword.codePoints().mapToObj(Character::getName) - .map(s -> s.split(" ")) - .flatMap(Arrays::stream) - .map(String::toLowerCase) - .map(DialectWriter::capitalize) - .collect(joining(" ")); - return getNormalizedKeyWord(titleCasedName); - } - - private static String getNormalizedKeyWord(String keyword) { - return normalize(keyword.replaceAll("[\\s',!\u00AD’]", "")); - } - - static String normalize(CharSequence s) { - return Normalizer.normalize(s, Normalizer.Form.NFC); - } - - private static String getNormalizedLanguage(GherkinDialect dialect) { - return dialect.getLanguage().replaceAll("[\\s-]", "_").toLowerCase(); - } + private static String capitalize(String str) { + return str.substring(0, 1).toUpperCase() + str.substring(1); + } } }