@@ -15,11 +15,8 @@ import Foundation
1515
1616struct SwiftNameOptions : OptionSet {
1717 let rawValue : Int32
18-
1918 static let none = SwiftNameOptions ( [ ] )
20-
2119 static let capitalize = SwiftNameOptions ( rawValue: 1 << 0 )
22-
2320 static let all : SwiftNameOptions = [ . capitalize]
2421}
2522
@@ -36,7 +33,7 @@ extension String {
3633 ///
3734 /// In addition to replacing illegal characters, it also
3835 /// ensures that the identifier starts with a letter and not a number.
39- func safeForSwiftCode_defensive( options: SwiftNameOptions ) -> String {
36+ func safeForSwiftCode_defensive( options: SwiftNameOptions ) -> String {
4037 guard !isEmpty else { return " _empty " }
4138
4239 let firstCharSet : CharacterSet = . letters. union ( . init( charactersIn: " _ " ) )
@@ -84,10 +81,7 @@ extension String {
8481 /// matching `safeForSwiftCode_defensive`.
8582 func safeForSwiftCode_idiomatic( options: SwiftNameOptions ) -> String {
8683 let capitalize = options. contains ( . capitalize)
87- if isEmpty {
88- return capitalize ? " _Empty_ " : " _empty_ "
89- }
90-
84+ if isEmpty { return capitalize ? " _Empty_ " : " _empty_ " }
9185 // Detect cases like HELLO_WORLD, sometimes used for constants.
9286 let isAllUppercase = allSatisfy {
9387 // Must check that no characters are lowercased, as non-letter characters
@@ -96,22 +90,24 @@ extension String {
9690 }
9791
9892 // 1. Leave leading underscores as-are
99- // 2. In the middle: word separators: ["_", "-", <space>] -> remove and capitalize next word
100- // 3. In the middle: period: [".", "/" ] -> replace with "_"
93+ // 2. In the middle: word separators: ["_", "-", "/", <space>] -> remove and capitalize next word
94+ // 3. In the middle: period: ["."] -> replace with "_"
10195 // 4. In the middle: drop ["{", "}"] -> replace with ""
102-
96+
10397 var buffer : [ Character ] = [ ]
10498 buffer. reserveCapacity ( count)
105-
106- enum State {
99+ enum State : Equatable {
107100 case modifying
108- case fallback
109101 case preFirstWord
102+ struct AccumulatingFirstWordContext : Equatable { var isAccumulatingInitialUppercase : Bool }
103+ case accumulatingFirstWord( AccumulatingFirstWordContext )
110104 case accumulatingWord
111105 case waitingForWordStarter
106+ case fallback
112107 }
113108 var state : State = . preFirstWord
114- for char in self {
109+ for index in self [ ... ] . indices {
110+ let char = self [ index]
115111 let _state = state
116112 state = . modifying
117113 switch _state {
@@ -124,30 +120,93 @@ extension String {
124120 // Prefix with an underscore if the first character is a number.
125121 buffer. append ( " _ " )
126122 buffer. append ( char)
127- state = . accumulatingWord
123+ state = . accumulatingFirstWord ( . init ( isAccumulatingInitialUppercase : false ) )
128124 } else if char. isLetter {
129125 // First character in the identifier.
130126 buffer. append ( contentsOf: capitalize ? char. uppercased ( ) : char. lowercased ( ) )
131- state = . accumulatingWord
127+ state = . accumulatingFirstWord(
128+ . init( isAccumulatingInitialUppercase: !capitalize && char. isUppercase)
129+ )
132130 } else {
133131 // Illegal character, fall back to the defensive strategy.
134132 state = . fallback
135133 }
136- case . accumulatingWord :
134+ case . accumulatingFirstWord ( var context ) :
137135 if char. isLetter || char. isNumber {
138136 if isAllUppercase {
139137 buffer. append ( contentsOf: char. lowercased ( ) )
138+ } else if context. isAccumulatingInitialUppercase {
139+ // Example: "HTTPProxy"/"HTTP_Proxy"/"HTTP_proxy"" should all
140+ // become "httpProxy" when capitalize == false.
141+ // This means treating the first word differently.
142+ // Here we are on the second or later character of the first word (the first
143+ // character is handled in `.preFirstWord`.
144+ // If the first character was uppercase, and we're in lowercasing mode,
145+ // we need to lowercase every consequtive uppercase character while there's
146+ // another uppercase character after it.
147+ if char. isLowercase {
148+ // No accumulating anymore, just append it and turn off accumulation.
149+ buffer. append ( char)
150+ context. isAccumulatingInitialUppercase = false
151+ } else {
152+ let suffix = suffix ( from: self . index ( after: index) )
153+ if suffix. count >= 2 {
154+ let next = suffix. first!
155+ let secondNext = suffix. dropFirst ( ) . first!
156+ if next. isUppercase && secondNext. isLowercase {
157+ // Finished lowercasing.
158+ context. isAccumulatingInitialUppercase = false
159+ buffer. append ( contentsOf: char. lowercased ( ) )
160+ } else if Self . wordSeparators. contains ( next) {
161+ // Finished lowercasing.
162+ context. isAccumulatingInitialUppercase = false
163+ buffer. append ( contentsOf: char. lowercased ( ) )
164+ } else if next. isUppercase {
165+ // Keep lowercasing.
166+ buffer. append ( contentsOf: char. lowercased ( ) )
167+ } else {
168+ // Append as-is, stop accumulating.
169+ context. isAccumulatingInitialUppercase = false
170+ buffer. append ( char)
171+ }
172+ } else {
173+ // This is the last or second to last character,
174+ // since we were accumulating capitals, lowercase it.
175+ buffer. append ( contentsOf: char. lowercased ( ) )
176+ context. isAccumulatingInitialUppercase = false
177+ }
178+ }
140179 } else {
141180 buffer. append ( char)
142181 }
182+ state = . accumulatingFirstWord( context)
183+ } else if [ " _ " , " - " , " " , " / " ] . contains ( char) {
184+ // In the middle of an identifier, these are considered
185+ // word separators, so we remove the character and end the current word.
186+ state = . waitingForWordStarter
187+ } else if [ " . " ] . contains ( char) {
188+ // In the middle of an identifier, these get replaced with
189+ // an underscore, but continue the current word.
190+ buffer. append ( " _ " )
191+ state = . accumulatingFirstWord( . init( isAccumulatingInitialUppercase: false ) )
192+ } else if [ " { " , " } " ] . contains ( char) {
193+ // In the middle of an identifier, curly braces are dropped.
194+ state = . accumulatingFirstWord( . init( isAccumulatingInitialUppercase: false ) )
195+ } else {
196+ // Illegal character, fall back to the defensive strategy.
197+ state = . fallback
198+ }
199+ case . accumulatingWord:
200+ if char. isLetter || char. isNumber {
201+ if isAllUppercase { buffer. append ( contentsOf: char. lowercased ( ) ) } else { buffer. append ( char) }
143202 state = . accumulatingWord
144- } else if [ " _ " , " - " , " " ] . contains ( char) {
145- // In the middle of an identifier, dashes, underscores, and spaces are considered
203+ } else if Self . wordSeparators . contains ( char) {
204+ // In the middle of an identifier, these are considered
146205 // word separators, so we remove the character and end the current word.
147206 state = . waitingForWordStarter
148- } else if [ " . " , " / " ] . contains ( char) {
149- // In the middle of an identifier, a period or a slash gets replaced with
150- // an underscore, but continues the current word.
207+ } else if [ " . " ] . contains ( char) {
208+ // In the middle of an identifier, these get replaced with
209+ // an underscore, but continue the current word.
151210 buffer. append ( " _ " )
152211 state = . accumulatingWord
153212 } else if [ " { " , " } " ] . contains ( char) {
@@ -170,24 +229,18 @@ extension String {
170229 // Illegal character, fall back to the defensive strategy.
171230 state = . fallback
172231 }
173- case . modifying, . fallback:
174- preconditionFailure ( " Logic error in \( #function) , string: ' \( self ) ' " )
232+ case . modifying, . fallback: preconditionFailure ( " Logic error in \( #function) , string: ' \( self ) ' " )
175233 }
176234 precondition ( state != . modifying, " Logic error in \( #function) , string: ' \( self ) ' " )
177- if case . fallback = state {
178- return safeForSwiftCode_defensive ( options: options)
179- }
180- }
181- if buffer. isEmpty || state == . preFirstWord {
182- return safeForSwiftCode_defensive ( options: options)
235+ if case . fallback = state { return safeForSwiftCode_defensive ( options: options) }
183236 }
237+ if buffer. isEmpty || state == . preFirstWord { return safeForSwiftCode_defensive ( options: options) }
184238 // Check for keywords
185239 let newString = String ( buffer)
186- if Self . keywords. contains ( newString) {
187- return " _ \( newString) "
188- }
240+ if Self . keywords. contains ( newString) { return " _ \( newString) " }
189241 return newString
190242 }
243+ private static let wordSeparators : Set < Character > = [ " _ " , " - " , " " , " / " ]
191244
192245 /// A list of Swift keywords.
193246 ///
0 commit comments