Skip to content

Commit 314264c

Browse files
authored
fix: optimize splitOnWordBoundaries (#999)
1 parent d835b64 commit 314264c

File tree

1 file changed

+16
-9
lines changed
  • codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils

1 file changed

+16
-9
lines changed

codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
*/
55
package software.amazon.smithy.kotlin.codegen.utils
66

7+
// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
8+
private val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl", "iot", "s3")
9+
710
/**
811
* Split a string on word boundaries
912
*/
@@ -13,12 +16,12 @@ fun String.splitOnWordBoundaries(): List<String> {
1316
// https://github.com/aws/aws-sdk-java-v2/blob/2.20.162/utils/src/main/java/software/amazon/awssdk/utils/internal/CodegenNamingUtils.java#L36
1417
// but this has some edge cases it doesn't handle well
1518
val out = mutableListOf<String>()
16-
// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
17-
val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl", "iot", "s3")
1819
var currentWord = ""
20+
var computeWordInProgress = true
1921

2022
// emit the current word and update from the next character
2123
val emit = { next: Char ->
24+
computeWordInProgress = true
2225
if (currentWord.isNotEmpty()) {
2326
out += currentWord.lowercase()
2427
}
@@ -33,9 +36,13 @@ fun String.splitOnWordBoundaries(): List<String> {
3336
forEachIndexed { index, nextChar ->
3437
val peek = getOrNull(index + 1)
3538
val doublePeek = getOrNull(index + 2)
36-
val completeWordInProgress = completeWords.any {
37-
(currentWord + substring(index)).lowercase().startsWith(it)
38-
} && !completeWords.contains(currentWord.lowercase())
39+
val completeWordInProgress = {
40+
val result = computeWordInProgress && currentWord.isNotEmpty() && completeWords.any {
41+
(currentWord + substring(index)).startsWith(it, ignoreCase = true)
42+
} && !completeWords.contains(currentWord.lowercase())
43+
computeWordInProgress = result
44+
result
45+
}
3946

4047
when {
4148
// [C] in these docs indicates the value of nextCharacter
@@ -46,16 +53,16 @@ fun String.splitOnWordBoundaries(): List<String> {
4653
currentWord.isEmpty() -> currentWord += nextChar.toString()
4754

4855
// Abc[D]ef or Ab2[D]ef
49-
!completeWordInProgress && loweredFollowedByUpper(currentWord, nextChar) -> emit(nextChar)
56+
!completeWordInProgress() && loweredFollowedByUpper(currentWord, nextChar) -> emit(nextChar)
5057

5158
// s3[k]ey
52-
!completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextChar) -> emit(nextChar)
59+
!completeWordInProgress() && allLowerCase && digitFollowedByLower(currentWord, nextChar) -> emit(nextChar)
5360

5461
// emit complete words
55-
!completeWordInProgress && completeWords.contains(currentWord.lowercase()) -> emit(nextChar)
62+
!completeWordInProgress() && completeWords.contains(currentWord.lowercase()) -> emit(nextChar)
5663

5764
// DB[P]roxy, or `IAM[U]ser` but not AC[L]s
58-
!completeWordInProgress && endOfAcronym(currentWord, nextChar, peek, doublePeek) -> emit(nextChar)
65+
!completeWordInProgress() && endOfAcronym(currentWord, nextChar, peek, doublePeek) -> emit(nextChar)
5966

6067
// If we haven't found a word boundary, push it and keep going
6168
else -> currentWord += nextChar.toString()

0 commit comments

Comments
 (0)