diff --git a/src/main/kotlin/BaseStringNotation.kt b/src/main/kotlin/BaseStringNotation.kt index 455a883..1a69813 100644 --- a/src/main/kotlin/BaseStringNotation.kt +++ b/src/main/kotlin/BaseStringNotation.kt @@ -12,7 +12,7 @@ abstract class BaseStringNotation(private val splitAt: Regex): StringNotation { protected open fun transformPartAfterParse(index: Int, part: String) = part override fun parse(sourceString: String): Word = - Word(sourceString.split(splitAt).asSequence().filter(String::isNotBlank).mapIndexed(::transformPartAfterParse)) + Word(sourceString.split(splitAt).asSequence().mapIndexed(::transformPartAfterParse)) /** * Allows to transform a part before it is being printed. The default implementation does not modify the part in any way. diff --git a/src/main/kotlin/FileSystemNotations.kt b/src/main/kotlin/FileSystemNotations.kt new file mode 100644 index 0000000..6412c86 --- /dev/null +++ b/src/main/kotlin/FileSystemNotations.kt @@ -0,0 +1,35 @@ +package de.joshuagleitze.stringnotation + +/** + * A notation for paths on a Unix file system. [Parsing][StringNotation.parse] will recognise all substrings that are + * separated by `/` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `/` + * after removing `\u0000` (ASCII: `NUL`) and `\u0057` (ASCII: slash) characters from them. + * + * [Printed][StringNotation.print] paths will not start with an additional `/`. To print an absolute path, include `""` (the empty string) as the first part in the printed word. + */ +object UnixPath: BaseStringNotation(Regex("/")) { + private val invalidChars = Regex("[\u0000/]+") + override fun transformPartToPrint(index: Int, part: String) = part.replace(invalidChars, "") + override fun printBeforeInnerPart(index: Int, part: String) = "/" +} + +/** + * A notation for paths on a Windows file system. [Parsing][StringNotation.parse] will recognise all substrings that are + * separated by `\` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `\` + * after removing the following characters from them: + * * ASCII control characters + * * `<`, `>`, `:`, `"`, `/`, `\`, `|`, `?`, `*` + * + * To allow printing paths that start with a drive letter, the notation will not strip a `:` from the first part if it’s the last character. + */ +object WindowsPath: BaseStringNotation(Regex("\\\\")) { + private val invalidChars = Regex("[\\p{Cntrl}<>:\"/\\\\|?*]+") + override fun transformPartToPrint(index: Int, part: String): String { + val replaced = part.replace(invalidChars, "") + return if (index == 0 && part.endsWith(":")) { + "$replaced:" + } else replaced + } + + override fun printBeforeInnerPart(index: Int, part: String) = "\\" +} \ No newline at end of file diff --git a/src/main/kotlin/JavaNotations.kt b/src/main/kotlin/JavaNotations.kt index 4d2e4a5..60062fd 100644 --- a/src/main/kotlin/JavaNotations.kt +++ b/src/main/kotlin/JavaNotations.kt @@ -11,7 +11,7 @@ import javax.lang.model.SourceVersion * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected * using [SourceVersion.isKeyword]. */ -data object JavaTypeName : StringNotation by UpperCamelCase { +object JavaTypeName: StringNotation by UpperCamelCase { override fun print(word: Word) = UpperCamelCase.print( Word(word.parts.mapIndexed { index, wordPart -> if (index == 0) wordPart.keepOnlyJavaIdentifierChars() @@ -28,7 +28,7 @@ data object JavaTypeName : StringNotation by UpperCamelCase { * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected * using [SourceVersion.isKeyword]. */ -object JavaMemberName : BaseStringNotation(camelCaseSplitRegex) { +object JavaMemberName: BaseStringNotation(camelCaseSplitRegex) { override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT) override fun print(word: Word) = word.parts @@ -52,7 +52,7 @@ object JavaMemberName : BaseStringNotation(camelCaseSplitRegex) { * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected * using [SourceVersion.isKeyword]. */ -object JavaPackagePart : BaseStringNotation(Regex("_|${camelCaseSplitRegex.pattern}")) { +object JavaPackagePart: BaseStringNotation(Regex("_|${camelCaseSplitRegex.pattern}")) { override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT) override fun transformPartToPrint(index: Int, part: String) = part.lowercase(Locale.ROOT) @@ -67,7 +67,7 @@ object JavaPackagePart : BaseStringNotation(Regex("_|${camelCaseSplitRegex.patte * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected * using [SourceVersion.isKeyword]. */ -object JavaPackageName : BaseStringNotation(Regex("\\.")) { +object JavaPackageName: BaseStringNotation(Regex("\\.")) { override fun transformPartToPrint(index: Int, part: String) = part.lowercase(Locale.ROOT).makeValidJavaIdentifier() override fun printBeforeInnerPart(index: Int, part: String) = "." @@ -80,7 +80,7 @@ object JavaPackageName : BaseStringNotation(Regex("\\.")) { * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected * using [SourceVersion.isKeyword]. */ -data object JavaConstantName : StringNotation by ScreamingSnakeCase { +data object JavaConstantName: StringNotation by ScreamingSnakeCase { override fun print(word: Word) = ScreamingSnakeCase.print(word).makeValidJavaIdentifier() } diff --git a/src/main/kotlin/Notations.kt b/src/main/kotlin/Notations.kt index 456008d..6ff649f 100644 --- a/src/main/kotlin/Notations.kt +++ b/src/main/kotlin/Notations.kt @@ -9,7 +9,7 @@ internal val camelCaseSplitRegex = Regex("(?<=.)(?=\\p{Lu})") * * @see JavaTypeName */ -object UpperCamelCase : BaseStringNotation(camelCaseSplitRegex) { +object UpperCamelCase: BaseStringNotation(camelCaseSplitRegex) { override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT) public override fun transformPartToPrint(index: Int, part: String) = part.firstUpperThenLowerCase() @@ -20,7 +20,7 @@ object UpperCamelCase : BaseStringNotation(camelCaseSplitRegex) { * * @see JavaMemberName */ -object LowerCamelCase : BaseStringNotation(camelCaseSplitRegex) { +object LowerCamelCase: BaseStringNotation(camelCaseSplitRegex) { override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT) override fun transformPartToPrint(index: Int, part: String) = @@ -30,7 +30,7 @@ object LowerCamelCase : BaseStringNotation(camelCaseSplitRegex) { /** * The `SCREAMING_SNAKE_CASE` notation. */ -object ScreamingSnakeCase : BaseStringNotation(Regex("_")) { +object ScreamingSnakeCase: BaseStringNotation(Regex("(? + Word("C:", "bad${controlChar}File") to "C:\\badFile" + }.toTypedArray(), + Word("C:", "bad\u007FFile") to "C:\\badFile", + Word("C:", "bad") to "C:\\badFile", + ) +) diff --git a/src/test/kotlin/NotationsTest.kt b/src/test/kotlin/NotationsTest.kt index 79addde..947c50c 100644 --- a/src/test/kotlin/NotationsTest.kt +++ b/src/test/kotlin/NotationsTest.kt @@ -18,7 +18,10 @@ class LowerCamelCaseTest: BaseNotationTest( class ScreamingSnakeCaseTest: BaseNotationTest( notation = ScreamingSnakeCase, - unchangedWords = listOf("IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case")), + unchangedWords = listOf( + "IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case"), + "_I_HAVE_A_PREFIX" to Word("_i", "have", "a", "prefix") + ), parseOnlyWords = listOf("im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "in", "snake", "case", "with", "capitals")), printOnlyWords = listOf(Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") to "IM_IN_SNAKE_CASE_WITH_CAPITALS") ) @@ -27,7 +30,8 @@ class SnakeCaseTest: BaseNotationTest( notation = SnakeCase, unchangedWords = listOf( "im_in_snake_case" to Word("im", "in", "snake", "case"), - "im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") + "im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals"), + "_i_have_a_prefix" to Word("_i", "have", "a", "prefix") ) )