feat: notations for file system paths

jGleitz · jGleitz · commit 71b1c227acf6 · 2025-06-10T13:34:57.000+02:00
diff --git a/src/main/kotlin/BaseStringNotation.kt b/src/main/kotlin/BaseStringNotation.kt
@@ -12,7 +12,7 @@ abstract class BaseStringNotation(private val splitAt: Regex): StringNotation {
 	protected open fun transformPartAfterParse(index: Int, part: String) = part
 
 	override fun parse(sourceString: String): Word =
-		Word(sourceString.split(splitAt).asSequence().filter(String::isNotBlank).mapIndexed(::transformPartAfterParse))
+		Word(sourceString.split(splitAt).asSequence().mapIndexed(::transformPartAfterParse))
 
 	/**
 	 * Allows to transform a part before it is being printed. The default implementation does not modify the part in any way.
diff --git a/src/main/kotlin/FileSystemNotations.kt b/src/main/kotlin/FileSystemNotations.kt
@@ -0,0 +1,35 @@
+package de.joshuagleitze.stringnotation
+
+/**
+ * A notation for paths on a Unix file system. [Parsing][StringNotation.parse] will recognise all substrings that are
+ * separated by `/` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `/`
+ * after removing `\u0000` (ASCII: `NUL`) and `\u0057` (ASCII: slash) characters from them.
+ *
+ * [Printed][StringNotation.print] paths will not start with an additional `/`. To print an absolute path, include `""` (the empty string) as the first part in the printed word.
+ */
+object UnixPath: BaseStringNotation(Regex("/")) {
+	private val invalidChars = Regex("[\u0000/]+")
+	override fun transformPartToPrint(index: Int, part: String) = part.replace(invalidChars, "")
+	override fun printBeforeInnerPart(index: Int, part: String) = "/"
+}
+
+/**
+ * A notation for paths on a Windows file system. [Parsing][StringNotation.parse] will recognise all substrings that are
+ * separated by `\` as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by `\`
+ * after removing the following characters from them:
+ *  * ASCII control characters
+ *  * `<`, `>`, `:`, `"`, `/`, `\`, `|`, `?`, `*`
+ *
+ *  To allow printing paths that start with a drive letter, the notation will not strip a `:` from the first part if it’s the last character.
+ */
+object WindowsPath: BaseStringNotation(Regex("\\\\")) {
+	private val invalidChars = Regex("[\\p{Cntrl}<>:\"/\\\\|?*]+")
+	override fun transformPartToPrint(index: Int, part: String): String {
+		val replaced = part.replace(invalidChars, "")
+		return if (index == 0 && part.endsWith(":")) {
+			"$replaced:"
+		} else replaced
+	}
+
+	override fun printBeforeInnerPart(index: Int, part: String) = "\\"
+}
diff --git a/src/main/kotlin/JavaNotations.kt b/src/main/kotlin/JavaNotations.kt
@@ -11,7 +11,7 @@ import javax.lang.model.SourceVersion
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-data object JavaTypeName : StringNotation by UpperCamelCase {
+object JavaTypeName: StringNotation by UpperCamelCase {
 	override fun print(word: Word) = UpperCamelCase.print(
 		Word(word.parts.mapIndexed { index, wordPart ->
 			if (index == 0) wordPart.keepOnlyJavaIdentifierChars()
@@ -28,7 +28,7 @@ data object JavaTypeName : StringNotation by UpperCamelCase {
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-object JavaMemberName : BaseStringNotation(camelCaseSplitRegex) {
+object JavaMemberName: BaseStringNotation(camelCaseSplitRegex) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	override fun print(word: Word) = word.parts
@@ -52,7 +52,7 @@ object JavaMemberName : BaseStringNotation(camelCaseSplitRegex) {
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-object JavaPackagePart : BaseStringNotation(Regex("_|${camelCaseSplitRegex.pattern}")) {
+object JavaPackagePart: BaseStringNotation(Regex("_|${camelCaseSplitRegex.pattern}")) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	override fun transformPartToPrint(index: Int, part: String) = part.lowercase(Locale.ROOT)
@@ -67,7 +67,7 @@ object JavaPackagePart : BaseStringNotation(Regex("_|${camelCaseSplitRegex.patte
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-object JavaPackageName : BaseStringNotation(Regex("\\.")) {
+object JavaPackageName: BaseStringNotation(Regex("\\.")) {
 	override fun transformPartToPrint(index: Int, part: String) = part.lowercase(Locale.ROOT).makeValidJavaIdentifier()
 
 	override fun printBeforeInnerPart(index: Int, part: String) = "."
@@ -80,7 +80,7 @@ object JavaPackageName : BaseStringNotation(Regex("\\.")) {
  * Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
  * using [SourceVersion.isKeyword].
  */
-data object JavaConstantName : StringNotation by ScreamingSnakeCase {
+data object JavaConstantName: StringNotation by ScreamingSnakeCase {
 	override fun print(word: Word) = ScreamingSnakeCase.print(word).makeValidJavaIdentifier()
 }
 
diff --git a/src/main/kotlin/Notations.kt b/src/main/kotlin/Notations.kt
@@ -9,7 +9,7 @@ internal val camelCaseSplitRegex = Regex("(?<=.)(?=\\p{Lu})")
  *
  * @see JavaTypeName
  */
-object UpperCamelCase : BaseStringNotation(camelCaseSplitRegex) {
+object UpperCamelCase: BaseStringNotation(camelCaseSplitRegex) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	public override fun transformPartToPrint(index: Int, part: String) = part.firstUpperThenLowerCase()
@@ -20,7 +20,7 @@ object UpperCamelCase : BaseStringNotation(camelCaseSplitRegex) {
  *
  * @see JavaMemberName
  */
-object LowerCamelCase : BaseStringNotation(camelCaseSplitRegex) {
+object LowerCamelCase: BaseStringNotation(camelCaseSplitRegex) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	override fun transformPartToPrint(index: Int, part: String) =
@@ -30,7 +30,7 @@ object LowerCamelCase : BaseStringNotation(camelCaseSplitRegex) {
 /**
  * The `SCREAMING_SNAKE_CASE` notation.
  */
-object ScreamingSnakeCase : BaseStringNotation(Regex("_")) {
+object ScreamingSnakeCase: BaseStringNotation(Regex("(?<!^)_")) {
 	override fun transformPartAfterParse(index: Int, part: String) = part.lowercase(Locale.ROOT)
 
 	override fun printBeforeInnerPart(index: Int, part: String) = "_"
@@ -41,7 +41,7 @@ object ScreamingSnakeCase : BaseStringNotation(Regex("_")) {
 /**
  * The `snake_case` notation.
  */
-object SnakeCase: BaseStringNotation(Regex("_")) {
+object SnakeCase: BaseStringNotation(Regex("(?<!^)_")) {
 	override fun printBeforeInnerPart(index: Int, part: String) = "_"
 }
 
diff --git a/src/test/kotlin/BaseNotationTest.kt b/src/test/kotlin/BaseNotationTest.kt
@@ -63,7 +63,8 @@ data class NotationTestData(val word: Word, val string: String, var minimumJavaV
 
 infix fun Word.to(string: String) = NotationTestData(this, string)
 infix fun String.to(word: Word) = NotationTestData(word, this)
-infix fun NotationTestData.ifJvmVersionIsAtLeast(minimumJavaVersion: Int) = this.apply { this.minimumJavaVersion = minimumJavaVersion }
+infix fun NotationTestData.ifJvmVersionIsAtLeast(minimumJavaVersion: Int) =
+	this.apply { this.minimumJavaVersion = minimumJavaVersion }
 
 val currentJavaVersion by lazy {
 	System.getProperty("java.runtime.version")
diff --git a/src/test/kotlin/FileSystemNotationsTest.kt b/src/test/kotlin/FileSystemNotationsTest.kt
@@ -0,0 +1,28 @@
+package de.joshuagleitze.stringnotation
+
+class UnixPathTest: BaseNotationTest(
+	notation = UnixPath,
+	unchangedWords = listOf(
+		"/home/user/some/file" to Word("", "home", "user", "some", "file"),
+		"a/relative/path" to Word("a", "relative", "path")
+	),
+	printOnlyWords = listOf(
+		Word("", "home", "null\u0000") to "/home/null",
+		Word("", "home", "user/some/file") to "/home/usersomefile",
+	)
+)
+
+class WindowsPathTest: BaseNotationTest(
+	notation = WindowsPath,
+	unchangedWords = listOf(
+		"C:\\Users\\user\\some\\file" to Word("C:", "Users", "user", "some", "file"),
+		"a\\relative\\path" to Word("a", "relative", "path")
+	),
+	printOnlyWords = listOf(
+		*('\u0000'..'\u001F').map { controlChar ->
+			Word("C:", "bad${controlChar}File") to "C:\\badFile"
+		}.toTypedArray(),
+		Word("C:", "bad\u007FFile") to "C:\\badFile",
+		Word("C:", "bad<File>") to "C:\\badFile",
+	)
+)
diff --git a/src/test/kotlin/NotationsTest.kt b/src/test/kotlin/NotationsTest.kt
@@ -18,7 +18,10 @@ class LowerCamelCaseTest: BaseNotationTest(
 
 class ScreamingSnakeCaseTest: BaseNotationTest(
 	notation = ScreamingSnakeCase,
-	unchangedWords = listOf("IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case")),
+	unchangedWords = listOf(
+		"IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case"),
+		"_I_HAVE_A_PREFIX" to Word("_i", "have", "a", "prefix")
+	),
 	parseOnlyWords = listOf("im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "in", "snake", "case", "with", "capitals")),
 	printOnlyWords = listOf(Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") to "IM_IN_SNAKE_CASE_WITH_CAPITALS")
 )
@@ -27,7 +30,8 @@ class SnakeCaseTest: BaseNotationTest(
 	notation = SnakeCase,
 	unchangedWords = listOf(
 		"im_in_snake_case" to Word("im", "in", "snake", "case"),
-		"im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals")
+		"im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals"),
+		"_i_have_a_prefix" to Word("_i", "have", "a", "prefix")
 	)
 )
 

Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,10 @@ class LowerCamelCaseTest: BaseNotationTest(`
`18`	`18`
`19`	`19`	`class ScreamingSnakeCaseTest: BaseNotationTest(`
`20`	`20`	`notation = ScreamingSnakeCase,`
`21`		`- unchangedWords = listOf("IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case")),`
	`21`	`+ unchangedWords = listOf(`
	`22`	`+ "IM_IN_SCREAMING_SNAKE_CASE" to Word("im", "in", "screaming", "snake", "case"),`
	`23`	`+ "_I_HAVE_A_PREFIX" to Word("_i", "have", "a", "prefix")`
	`24`	`+ ),`
`22`	`25`	`parseOnlyWords = listOf("im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "in", "snake", "case", "with", "capitals")),`
`23`	`26`	`printOnlyWords = listOf(Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals") to "IM_IN_SNAKE_CASE_WITH_CAPITALS")`
`24`	`27`	`)`
`@@ -27,7 +30,8 @@ class SnakeCaseTest: BaseNotationTest(`
`27`	`30`	`notation = SnakeCase,`
`28`	`31`	`unchangedWords = listOf(`
`29`	`32`	`"im_in_snake_case" to Word("im", "in", "snake", "case"),`
`30`		`- "im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals")`
	`33`	`+ "im_iN_sNAKe_cASE_with_CAPItals" to Word("im", "iN", "sNAKe", "cASE", "with", "CAPItals"),`
	`34`	`+ "_i_have_a_prefix" to Word("_i", "have", "a", "prefix")`
`31`	`35`	`)`
`32`	`36`	`)`
`33`	`37`