feat: notations for file system paths

jGleitz · jGleitz · commit 3fc94958c441 · 2025-06-10T13:26:57.000+02:00
diff --git a/src/main/kotlin/BaseStringNotation.kt b/src/main/kotlin/BaseStringNotation.kt
@@ -12,7 +12,7 @@ abstract class BaseStringNotation(private val splitAt: Regex): StringNotation {
 	protected open fun transformPartAfterParse(index: Int, part: String) = part
 
 	override fun parse(sourceString: String): Word =
-		Word(sourceString.split(splitAt).asSequence().filter(String::isNotBlank).mapIndexed(::transformPartAfterParse))
+		Word(sourceString.split(splitAt).asSequence().mapIndexed(::transformPartAfterParse))
 
 	/**
 	 * Allows to transform a part before it is being printed. The default implementation does not modify the part in any way.
diff --git a/src/main/kotlin/FileSystemNotations.kt b/src/main/kotlin/FileSystemNotations.kt
@@ -0,0 +1,34 @@
+package de.joshuagleitze.stringnotation
+
+/**
+ * A notation for paths on a Unix file system. [Parsing][StringNotation.parse] will recognise all substrings that are
+ * separated by `/` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `/`
+ * after removing `\u0000` (ASCII: `NUL`) and `\u0057` (ASCII: slash) characters from them.
+ *
+ * [Printed][StringNotation.print] paths will not start with an additional `/`. To print an absolute path, include `""` (the empty string) as the first part in the printed word.
+ */
+object UnixPath : BaseStringNotation(Regex("/")) {
+    private val invalidChars = Regex("[\u0000/]+")
+    override fun transformPartToPrint(index: Int, part: String) = part.replace(invalidChars, "")
+    override fun printBeforeInnerPart(index: Int, part: String) = "/"
+}
+
+/**
+ * A notation for paths on a Windows file system. [Parsing][StringNotation.parse] will recognise all substrings that are
+ * separated by `\` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `\`
+ * after removing the following characters from them:
+ *  * ASCII control characters
+ *  * `<`, `>`, `:`, `"`, `/`, `\`, `|`, `?`, `*`
+ *
+ *  To allow printing paths that start with a drive letter, the notation will not strip a `:` from the first part if it’s the last character.
+ */
+object WindowsPath : BaseStringNotation(Regex("\\\\")) {
+    private val invalidChars = Regex("[\\p{Cntrl}<>:\"/\\\\|?*]+")
+    override fun transformPartToPrint(index: Int, part: String): String {
+        val replaced = part.replace(invalidChars, "")
+        return if (index == 0 && part.endsWith(":")) {
+            "$replaced:"
+        } else replaced
+    }
+    override fun printBeforeInnerPart(index: Int, part: String) = "\\"
+}
diff --git a/src/main/kotlin/JavaNotations.kt b/src/main/kotlin/JavaNotations.kt
@@ -11,7 +11,7 @@ import javax.lang.model.SourceVersion
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-data object JavaTypeName : StringNotation by UpperCamelCase {
+object JavaTypeName : StringNotation by UpperCamelCase {
 	override fun print(word: Word) = UpperCamelCase.print(
 		Word(word.parts.mapIndexed { index, wordPart ->
 			if (index == 0) wordPart.keepOnlyJavaIdentifierChars()
diff --git a/src/main/kotlin/Notations.kt b/src/main/kotlin/Notations.kt
@@ -30,7 +30,7 @@ object LowerCamelCase : BaseStringNotation(camelCaseSplitRegex) {
 /**
  * The `SCREAMING_SNAKE_CASE` notation.
  */
-object ScreamingSnakeCase : BaseStringNotation(Regex("_")) {
+object ScreamingSnakeCase : BaseStringNotation(Regex("(?<!^)_")) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	override fun printBeforeInnerPart(index: Int, part: String) = "_"
@@ -41,7 +41,7 @@ object ScreamingSnakeCase : BaseStringNotation(Regex("_")) {
 /**
  * The `snake_case` notation.
  */
-object SnakeCase: BaseStringNotation(Regex("_")) {
+object SnakeCase: BaseStringNotation(Regex("(?<!^)_")) {
 	override fun printBeforeInnerPart(index: Int, part: String) = "_"
 }
 
diff --git a/src/test/kotlin/FileSystemNotationsTest.kt b/src/test/kotlin/FileSystemNotationsTest.kt
@@ -0,0 +1,28 @@
+package de.joshuagleitze.stringnotation
+
+class UnixPathTest : BaseNotationTest(
+    notation = UnixPath,
+    unchangedWords = listOf(
+        "/home/user/some/file" to Word("", "home", "user", "some", "file"),
+        "a/relative/path" to Word("a", "relative", "path")
+    ),
+    printOnlyWords = listOf(
+        Word("", "home", "null\u0000") to "/home/null",
+        Word("", "home", "user/some/file") to "/home/usersomefile",
+    )
+)
+
+class WindowsPathTest : BaseNotationTest(
+    notation = WindowsPath,
+    unchangedWords = listOf(
+        "C:\\Users\\user\\some\\file" to Word("C:", "Users", "user", "some", "file"),
+        "a\\relative\\path" to Word("a", "relative", "path")
+    ),
+    printOnlyWords = listOf(
+        *('\u0000'..'\u001F').map { controlChar ->
+            Word("C:", "bad${controlChar}File") to "C:\\badFile"
+        }.toTypedArray(),
+        Word("C:", "bad\u007FFile") to "C:\\badFile",
+        Word("C:", "bad<File>") to "C:\\badFile",
+    )
+)
diff --git a/src/test/kotlin/NotationsTest.kt b/src/test/kotlin/NotationsTest.kt
@@ -1,42 +1,46 @@
 package de.joshuagleitze.stringnotation
 
-class UpperCamelCaseTest: BaseNotationTest(
-	notation = UpperCamelCase,
-	unchangedWords = listOf("ImInUpperCamelCase" to Word("im", "in", "upper", "camel", "case")),
-	printOnlyWords = listOf(
-		Word("removes", "upperCase") to "RemovesUppercase"
-	)
+class UpperCamelCaseTest : BaseNotationTest(
+    notation = UpperCamelCase,
+    unchangedWords = listOf("ImInUpperCamelCase" to Word("im", "in", "upper", "camel", "case")),
+    printOnlyWords = listOf(
+        Word("removes", "upperCase") to "RemovesUppercase"
+    )
 )
 
-class LowerCamelCaseTest: BaseNotationTest(
-	notation = LowerCamelCase,
-	unchangedWords = listOf("imInLowerCamelCase" to Word("im", "in", "lower", "camel", "case")),
-	printOnlyWords = listOf(
-		Word("removes", "upperCase") to "removesUppercase"
-	)
+class LowerCamelCaseTest : BaseNotationTest(
+    notation = LowerCamelCase,
+    unchangedWords = listOf("imInLowerCamelCase" to Word("im", "in", "lower", "camel", "case")),
+    printOnlyWords = listOf(
+        Word("removes", "upperCase") to "removesUppercase"
+    )
 )
 
-class ScreamingSnakeCaseTest: BaseNotationTest(
-	notation = ScreamingSnakeCase,
-	unchangedWords = listOf("IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case")),
-	parseOnlyWords = listOf("im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "in", "snake", "case", "with", "capitals")),
-	printOnlyWords = listOf(Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") to "IM_IN_SNAKE_CASE_WITH_CAPITALS")
+class ScreamingSnakeCaseTest : BaseNotationTest(
+    notation = ScreamingSnakeCase,
+    unchangedWords = listOf(
+        "IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case"),
+        "_I_HAVE_A_PREFIX" to Word("_i", "have", "a", "prefix")
+    ),
+    parseOnlyWords = listOf("im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "in", "snake", "case", "with", "capitals")),
+    printOnlyWords = listOf(Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") to "IM_IN_SNAKE_CASE_WITH_CAPITALS")
 )
 
-class SnakeCaseTest: BaseNotationTest(
-	notation = SnakeCase,
-	unchangedWords = listOf(
-		"im_in_snake_case" to Word("im", "in", "snake", "case"),
-		"im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals")
-	)
+class SnakeCaseTest : BaseNotationTest(
+    notation = SnakeCase,
+    unchangedWords = listOf(
+        "im_in_snake_case" to Word("im", "in", "snake", "case"),
+        "im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals"),
+        "_i_have_a_prefix" to Word("_i", "have", "a", "prefix")
+    )
 )
 
-class NormalWordsTest: BaseNotationTest(
-	notation = NormalWords,
-	unchangedWords = listOf("I’m using normal words noTation!" to Word("I’m", "using", "normal", "words", "noTation!")),
-	parseOnlyWords = listOf(
-		"I’m     using tabs\nand\r other fancy    whitespace!" to Word(
-			"I’m", "using", "tabs", "and", "other", "fancy", "whitespace!"
-		)
-	)
+class NormalWordsTest : BaseNotationTest(
+    notation = NormalWords,
+    unchangedWords = listOf("I’m using normal words noTation!" to Word("I’m", "using", "normal", "words", "noTation!")),
+    parseOnlyWords = listOf(
+        "I’m     using tabs\nand\r other fancy    whitespace!" to Word(
+            "I’m", "using", "tabs", "and", "other", "fancy", "whitespace!"
+        )
+    )
 )