Skip to content

Commit 7e2d80c

Browse files
authored
Merge pull request #1043 from z2oh/jeremy/5.10/split-windows-command-line
[5.10] Add logic to split command line arguments on Windows
2 parents 78c59db + 73266b5 commit 7e2d80c

File tree

2 files changed

+320
-0
lines changed

2 files changed

+320
-0
lines changed

Sources/SKCore/CompilationDatabase.swift

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,11 @@ extension CompilationDatabase.Command: Codable {
221221
if let arguments = try container.decodeIfPresent([String].self, forKey: .arguments) {
222222
self.commandLine = arguments
223223
} else if let command = try container.decodeIfPresent(String.self, forKey: .command) {
224+
#if os(Windows)
225+
self.commandLine = splitWindowsCommandLine(command, initialCommandName: true)
226+
#else
224227
self.commandLine = splitShellEscapedCommand(command)
228+
#endif
225229
} else {
226230
throw CompilationDatabaseDecodingError.missingCommandOrArguments
227231
}
@@ -355,3 +359,216 @@ public func splitShellEscapedCommand(_ cmd: String) -> [String] {
355359
var parser = Parser(cmd[...])
356360
return parser.parse()
357361
}
362+
363+
// MARK: - Windows
364+
365+
fileprivate extension Character {
366+
var isWhitespace: Bool {
367+
switch self {
368+
case " ", "\t":
369+
return true
370+
default:
371+
return false
372+
}
373+
}
374+
375+
var isWhitespaceOrNull: Bool {
376+
return self.isWhitespace || self == "\0"
377+
}
378+
379+
func isWindowsSpecialChar(inCommandName: Bool) -> Bool {
380+
if isWhitespace {
381+
return true
382+
}
383+
if self == #"""# {
384+
return true
385+
}
386+
if !inCommandName && self == #"\"# {
387+
return true
388+
}
389+
return false
390+
}
391+
}
392+
393+
fileprivate struct WindowsCommandParser {
394+
/// The content of the entire command that shall be parsed.
395+
private let content: String
396+
397+
/// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
398+
/// character.
399+
private var parsingCommandName: Bool
400+
401+
/// An index into `content`, pointing to the character that we are currently parsing.
402+
private var currentCharacterIndex: String.UTF8View.Index
403+
404+
/// The split command line arguments.
405+
private var result: [String] = []
406+
407+
/// The character that is currently being parsed.
408+
///
409+
/// `nil` if we have reached the end of `content`.
410+
private var currentCharacter: Character? {
411+
guard currentCharacterIndex < content.endIndex else {
412+
return nil
413+
}
414+
return self.content[currentCharacterIndex]
415+
}
416+
417+
/// The character after `currentCharacter`.
418+
///
419+
/// `nil` if we have reached the end of `content`.
420+
private var peek: Character? {
421+
let nextIndex = content.index(after: currentCharacterIndex)
422+
if nextIndex < content.endIndex {
423+
return content[nextIndex]
424+
} else {
425+
return nil
426+
}
427+
}
428+
429+
init(_ string: String, initialCommandName: Bool) {
430+
self.content = string
431+
self.currentCharacterIndex = self.content.startIndex
432+
self.parsingCommandName = initialCommandName
433+
}
434+
435+
/// Designated entry point to split a Windows command line invocation.
436+
mutating func parse() -> [String] {
437+
while let currentCharacter {
438+
if currentCharacter.isWhitespaceOrNull {
439+
// Consume any whitespace separating arguments.
440+
_ = consume()
441+
} else {
442+
result.append(parseSingleArgument())
443+
}
444+
}
445+
return result
446+
}
447+
448+
/// Consume the current character.
449+
private mutating func consume() -> Character {
450+
guard let character = currentCharacter else {
451+
preconditionFailure("Nothing to consume")
452+
}
453+
currentCharacterIndex = content.index(after: currentCharacterIndex)
454+
return character
455+
}
456+
457+
/// Consume the current character, asserting that it is `expectedCharacter`
458+
private mutating func consume(expect expectedCharacter: Character) {
459+
assert(currentCharacter == expectedCharacter)
460+
_ = consume()
461+
}
462+
463+
/// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
464+
/// (e.g. `\"` gets returned as `"`)
465+
///
466+
/// Afterwards the parser points to the character after the argument.
467+
mutating func parseSingleArgument() -> String {
468+
var str = ""
469+
while let currentCharacter {
470+
if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) {
471+
str.append(consume())
472+
continue
473+
}
474+
if currentCharacter.isWhitespaceOrNull {
475+
parsingCommandName = false
476+
return str
477+
} else if currentCharacter == "\"" {
478+
str += parseQuoted()
479+
} else if currentCharacter == #"\"# {
480+
assert(!parsingCommandName, "else we'd have treated it as a normal char");
481+
str.append(parseBackslash())
482+
} else {
483+
preconditionFailure("unexpected special character");
484+
}
485+
}
486+
return str
487+
}
488+
489+
/// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
490+
/// quotes.
491+
mutating func parseQuoted() -> String {
492+
// Discard the opening quote. Its not part of the unescaped text.
493+
consume(expect: "\"")
494+
495+
var str = ""
496+
while let currentCharacter {
497+
switch currentCharacter {
498+
case "\"":
499+
if peek == "\"" {
500+
// Two adjacent quotes inside a quoted string are an escaped single quote. For example
501+
// `" a "" b "`
502+
// represents the string
503+
// ` a " b `
504+
consume(expect: "\"")
505+
consume(expect: "\"")
506+
str += "\""
507+
} else {
508+
// We have found the closing quote. Discard it and return.
509+
consume(expect: "\"")
510+
return str
511+
}
512+
case "\\" where !parsingCommandName:
513+
str.append(parseBackslash())
514+
default:
515+
str.append(consume())
516+
}
517+
}
518+
return str
519+
}
520+
521+
/// Backslashes are interpreted in a rather complicated way in the Windows-style
522+
/// command line, because backslashes are used both to separate path and to
523+
/// escape double quote. This method consumes runs of backslashes as well as the
524+
/// following double quote if it's escaped.
525+
///
526+
/// * If an even number of backslashes is followed by a double quote, one
527+
/// backslash is output for every pair of backslashes, and the last double
528+
/// quote remains unconsumed. The double quote will later be interpreted as
529+
/// the start or end of a quoted string in the main loop outside of this
530+
/// function.
531+
///
532+
/// * If an odd number of backslashes is followed by a double quote, one
533+
/// backslash is output for every pair of backslashes, and a double quote is
534+
/// output for the last pair of backslash-double quote. The double quote is
535+
/// consumed in this case.
536+
///
537+
/// * Otherwise, backslashes are interpreted literally.
538+
mutating func parseBackslash() -> String {
539+
var str: String = ""
540+
541+
let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex
542+
let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex)
543+
544+
if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" {
545+
str += String(repeating: "\\", count: numberOfBackslashes / 2)
546+
if numberOfBackslashes.isMultiple(of: 2) {
547+
// We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
548+
// quote in the outer function.
549+
currentCharacterIndex = firstNonBackslashIndex
550+
} else {
551+
// We have an odd number of backslashes. The last backslash escapes the quote.
552+
str += "\""
553+
currentCharacterIndex = content.index(after: firstNonBackslashIndex)
554+
}
555+
return str
556+
}
557+
558+
// The sequence of backslashes is not followed by quotes. Interpret them literally.
559+
str += String(repeating: "\\", count: numberOfBackslashes)
560+
currentCharacterIndex = firstNonBackslashIndex
561+
return str
562+
}
563+
}
564+
565+
// Sometimes, this function will be handling a full command line including an
566+
// executable pathname at the start. In that situation, the initial pathname
567+
// needs different handling from the following arguments, because when
568+
// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
569+
// escaping the quote character, whereas when libc scans the rest of the
570+
// command line, it does.
571+
public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] {
572+
var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName)
573+
return parser.parse()
574+
}

Tests/SKCoreTests/CompilationDatabaseTests.swift

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,80 @@ final class CompilationDatabaseTests: XCTestCase {
5757
check("\"a\"bcd\"ef\"\"\"\"g\"", ["abcdefg"])
5858
check("a'\\b \"c\"'", ["a\\b \"c\""])
5959
}
60+
61+
func testSplitShellEscapedCommandBasic() {
62+
assertEscapedCommand("", [])
63+
assertEscapedCommand(" ", [])
64+
assertEscapedCommand("a", ["a"])
65+
assertEscapedCommand("abc", ["abc"])
66+
assertEscapedCommand("a😀c", ["a😀c"])
67+
assertEscapedCommand("😀c", ["😀c"])
68+
assertEscapedCommand("abc def", ["abc", "def"])
69+
assertEscapedCommand("abc def", ["abc", "def"])
70+
}
71+
72+
func testSplitShellEscapedCommandDoubleQuotes() {
73+
assertEscapedCommand("\"", [""])
74+
assertEscapedCommand(#""a"#, ["a"])
75+
assertEscapedCommand("\"\"", [""])
76+
assertEscapedCommand(#""a""#, ["a"])
77+
assertEscapedCommand(#""a\"""#, [#"a""#])
78+
assertEscapedCommand(#""a b c ""#, ["a b c "])
79+
assertEscapedCommand(#""a " "#, ["a "])
80+
assertEscapedCommand(#""a " b"#, ["a ", "b"])
81+
assertEscapedCommand(#""a "b"#, ["a b"])
82+
assertEscapedCommand(#"a"x ""b"#, ["ax b"], windows: [#"ax "b"#])
83+
84+
assertEscapedCommand(#""a"bcd"ef""""g""#, ["abcdefg"], windows: [#"abcdef""g"#])
85+
}
6086

87+
func testSplitShellEscapedCommandSingleQuotes() {
88+
assertEscapedCommand("'", [""], windows: ["'"])
89+
assertEscapedCommand("'a", ["a"], windows: ["'a"])
90+
assertEscapedCommand("''", [""], windows: ["''"])
91+
assertEscapedCommand("'a'", ["a"], windows: ["'a'"])
92+
assertEscapedCommand(#"'a\"'"#, [#"a\""#], windows: [#"'a"'"#])
93+
assertEscapedCommand(#"'a b c '"#, ["a b c "], windows: ["'a", "b", "c", "'"])
94+
assertEscapedCommand(#"'a ' "#, ["a "], windows: ["'a", "'"])
95+
assertEscapedCommand(#"'a ' b"#, ["a ", "b"], windows: ["'a", "'", "b"])
96+
assertEscapedCommand(#"'a 'b"#, ["a b"], windows: ["'a", "'b"])
97+
assertEscapedCommand(#"a'x ''b"#, ["ax b"], windows: ["a'x", "''b"])
98+
}
99+
100+
func testSplitShellEscapedCommandBackslash() {
101+
assertEscapedCommand(#"a\\"#, [#"a\"#], windows: [#"a\\"#])
102+
assertEscapedCommand(#"a'\b "c"'"#, ["a\\b \"c\""], windows: [#"a'\b"#, #"c'"#])
103+
104+
assertEscapedCommand(#"\""#, ["\""])
105+
assertEscapedCommand(#"\\""#, [#"\"#])
106+
assertEscapedCommand(#"\\\""#, [#"\""#])
107+
assertEscapedCommand(#"\\ "#, [#"\"#], windows: [#"\\"#])
108+
assertEscapedCommand(#"\\\ "#, [#"\ "#], windows: [#"\\\"#])
109+
}
110+
111+
func testSplitShellEscapedCommandWindowsCommand() {
112+
assertEscapedCommand(#"C:\swift.exe"#, [#"C:swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
113+
assertEscapedCommand(
114+
#"C:\ swift.exe"#,
115+
[#"C: swift.exe"#],
116+
windows: [#"C:\"#, #"swift.exe"#],
117+
initialCommandName: true
118+
)
119+
assertEscapedCommand(
120+
#"C:\ swift.exe"#,
121+
[#"C: swift.exe"#],
122+
windows: [#"C:\"#, #"swift.exe"#],
123+
initialCommandName: false
124+
)
125+
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
126+
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:"swift.exe"#], initialCommandName: false)
127+
}
128+
129+
func testSplitShellEscapedCommandWindowsTwoDoubleQuotes() {
130+
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
131+
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
132+
}
133+
61134
func testEncodeCompDBCommand() throws {
62135
// Requires JSONEncoder.OutputFormatting.sortedKeys
63136
func check(_ cmd: CompilationDatabase.Command, _ expected: String, file: StaticString = #filePath, line: UInt = #line) throws {
@@ -332,3 +405,33 @@ private func checkCompilationDatabaseBuildSystem(_ compdb: ByteString, file: Sta
332405
let buildSystem = CompilationDatabaseBuildSystem(projectRoot: try AbsolutePath(validating: "/a"), fileSystem: fs)
333406
try block(buildSystem)
334407
}
408+
409+
/// Assert that splitting `str` into its command line components results in `expected`.
410+
///
411+
/// By default assert that escaping using Unix and Windows rules results in the same split. If `windows` is specified,
412+
/// assert that escaping with Windows rules produces `windows` and escaping using Unix rules results in `expected`.
413+
///
414+
/// If set `initialCommandName` gets passed to the Windows split function.
415+
private func assertEscapedCommand(
416+
_ str: String,
417+
_ expected: [String],
418+
windows: [String]? = nil,
419+
initialCommandName: Bool = false,
420+
file: StaticString = #filePath,
421+
line: UInt = #line
422+
) {
423+
XCTAssertEqual(
424+
splitShellEscapedCommand(str),
425+
expected,
426+
"Splitting Unix command line arguments",
427+
file: file,
428+
line: line
429+
)
430+
XCTAssertEqual(
431+
splitWindowsCommandLine(str, initialCommandName: initialCommandName),
432+
windows ?? expected,
433+
"Splitting Windows command line arguments",
434+
file: file,
435+
line: line
436+
)
437+
}

0 commit comments

Comments
 (0)