Skip to content

Commit 73266b5

Browse files
committed
Add logic to split command line arguments on Windows
Previously, we were splitting command line arguments on Windows using the same rules as on Unix, which was incorrect, most importantly because backslashes in the first component of a Windows command line invocation are not escaping anything but interpreted verbatim. Fixes apple#1020 rdar://120809063
1 parent 78c59db commit 73266b5

File tree

2 files changed

+320
-0
lines changed

2 files changed

+320
-0
lines changed

Sources/SKCore/CompilationDatabase.swift

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,11 @@ extension CompilationDatabase.Command: Codable {
221221
if let arguments = try container.decodeIfPresent([String].self, forKey: .arguments) {
222222
self.commandLine = arguments
223223
} else if let command = try container.decodeIfPresent(String.self, forKey: .command) {
224+
#if os(Windows)
225+
self.commandLine = splitWindowsCommandLine(command, initialCommandName: true)
226+
#else
224227
self.commandLine = splitShellEscapedCommand(command)
228+
#endif
225229
} else {
226230
throw CompilationDatabaseDecodingError.missingCommandOrArguments
227231
}
@@ -355,3 +359,216 @@ public func splitShellEscapedCommand(_ cmd: String) -> [String] {
355359
var parser = Parser(cmd[...])
356360
return parser.parse()
357361
}
362+
363+
// MARK: - Windows
364+
365+
fileprivate extension Character {
366+
var isWhitespace: Bool {
367+
switch self {
368+
case " ", "\t":
369+
return true
370+
default:
371+
return false
372+
}
373+
}
374+
375+
var isWhitespaceOrNull: Bool {
376+
return self.isWhitespace || self == "\0"
377+
}
378+
379+
func isWindowsSpecialChar(inCommandName: Bool) -> Bool {
380+
if isWhitespace {
381+
return true
382+
}
383+
if self == #"""# {
384+
return true
385+
}
386+
if !inCommandName && self == #"\"# {
387+
return true
388+
}
389+
return false
390+
}
391+
}
392+
393+
fileprivate struct WindowsCommandParser {
394+
/// The content of the entire command that shall be parsed.
395+
private let content: String
396+
397+
/// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
398+
/// character.
399+
private var parsingCommandName: Bool
400+
401+
/// An index into `content`, pointing to the character that we are currently parsing.
402+
private var currentCharacterIndex: String.UTF8View.Index
403+
404+
/// The split command line arguments.
405+
private var result: [String] = []
406+
407+
/// The character that is currently being parsed.
408+
///
409+
/// `nil` if we have reached the end of `content`.
410+
private var currentCharacter: Character? {
411+
guard currentCharacterIndex < content.endIndex else {
412+
return nil
413+
}
414+
return self.content[currentCharacterIndex]
415+
}
416+
417+
/// The character after `currentCharacter`.
418+
///
419+
/// `nil` if we have reached the end of `content`.
420+
private var peek: Character? {
421+
let nextIndex = content.index(after: currentCharacterIndex)
422+
if nextIndex < content.endIndex {
423+
return content[nextIndex]
424+
} else {
425+
return nil
426+
}
427+
}
428+
429+
init(_ string: String, initialCommandName: Bool) {
430+
self.content = string
431+
self.currentCharacterIndex = self.content.startIndex
432+
self.parsingCommandName = initialCommandName
433+
}
434+
435+
/// Designated entry point to split a Windows command line invocation.
436+
mutating func parse() -> [String] {
437+
while let currentCharacter {
438+
if currentCharacter.isWhitespaceOrNull {
439+
// Consume any whitespace separating arguments.
440+
_ = consume()
441+
} else {
442+
result.append(parseSingleArgument())
443+
}
444+
}
445+
return result
446+
}
447+
448+
/// Consume the current character.
449+
private mutating func consume() -> Character {
450+
guard let character = currentCharacter else {
451+
preconditionFailure("Nothing to consume")
452+
}
453+
currentCharacterIndex = content.index(after: currentCharacterIndex)
454+
return character
455+
}
456+
457+
/// Consume the current character, asserting that it is `expectedCharacter`
458+
private mutating func consume(expect expectedCharacter: Character) {
459+
assert(currentCharacter == expectedCharacter)
460+
_ = consume()
461+
}
462+
463+
/// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
464+
/// (e.g. `\"` gets returned as `"`)
465+
///
466+
/// Afterwards the parser points to the character after the argument.
467+
mutating func parseSingleArgument() -> String {
468+
var str = ""
469+
while let currentCharacter {
470+
if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) {
471+
str.append(consume())
472+
continue
473+
}
474+
if currentCharacter.isWhitespaceOrNull {
475+
parsingCommandName = false
476+
return str
477+
} else if currentCharacter == "\"" {
478+
str += parseQuoted()
479+
} else if currentCharacter == #"\"# {
480+
assert(!parsingCommandName, "else we'd have treated it as a normal char");
481+
str.append(parseBackslash())
482+
} else {
483+
preconditionFailure("unexpected special character");
484+
}
485+
}
486+
return str
487+
}
488+
489+
/// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
490+
/// quotes.
491+
mutating func parseQuoted() -> String {
492+
// Discard the opening quote. Its not part of the unescaped text.
493+
consume(expect: "\"")
494+
495+
var str = ""
496+
while let currentCharacter {
497+
switch currentCharacter {
498+
case "\"":
499+
if peek == "\"" {
500+
// Two adjacent quotes inside a quoted string are an escaped single quote. For example
501+
// `" a "" b "`
502+
// represents the string
503+
// ` a " b `
504+
consume(expect: "\"")
505+
consume(expect: "\"")
506+
str += "\""
507+
} else {
508+
// We have found the closing quote. Discard it and return.
509+
consume(expect: "\"")
510+
return str
511+
}
512+
case "\\" where !parsingCommandName:
513+
str.append(parseBackslash())
514+
default:
515+
str.append(consume())
516+
}
517+
}
518+
return str
519+
}
520+
521+
/// Backslashes are interpreted in a rather complicated way in the Windows-style
522+
/// command line, because backslashes are used both to separate path and to
523+
/// escape double quote. This method consumes runs of backslashes as well as the
524+
/// following double quote if it's escaped.
525+
///
526+
/// * If an even number of backslashes is followed by a double quote, one
527+
/// backslash is output for every pair of backslashes, and the last double
528+
/// quote remains unconsumed. The double quote will later be interpreted as
529+
/// the start or end of a quoted string in the main loop outside of this
530+
/// function.
531+
///
532+
/// * If an odd number of backslashes is followed by a double quote, one
533+
/// backslash is output for every pair of backslashes, and a double quote is
534+
/// output for the last pair of backslash-double quote. The double quote is
535+
/// consumed in this case.
536+
///
537+
/// * Otherwise, backslashes are interpreted literally.
538+
mutating func parseBackslash() -> String {
539+
var str: String = ""
540+
541+
let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex
542+
let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex)
543+
544+
if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" {
545+
str += String(repeating: "\\", count: numberOfBackslashes / 2)
546+
if numberOfBackslashes.isMultiple(of: 2) {
547+
// We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
548+
// quote in the outer function.
549+
currentCharacterIndex = firstNonBackslashIndex
550+
} else {
551+
// We have an odd number of backslashes. The last backslash escapes the quote.
552+
str += "\""
553+
currentCharacterIndex = content.index(after: firstNonBackslashIndex)
554+
}
555+
return str
556+
}
557+
558+
// The sequence of backslashes is not followed by quotes. Interpret them literally.
559+
str += String(repeating: "\\", count: numberOfBackslashes)
560+
currentCharacterIndex = firstNonBackslashIndex
561+
return str
562+
}
563+
}
564+
565+
// Sometimes, this function will be handling a full command line including an
566+
// executable pathname at the start. In that situation, the initial pathname
567+
// needs different handling from the following arguments, because when
568+
// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
569+
// escaping the quote character, whereas when libc scans the rest of the
570+
// command line, it does.
571+
public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] {
572+
var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName)
573+
return parser.parse()
574+
}

Tests/SKCoreTests/CompilationDatabaseTests.swift

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,80 @@ final class CompilationDatabaseTests: XCTestCase {
5757
check("\"a\"bcd\"ef\"\"\"\"g\"", ["abcdefg"])
5858
check("a'\\b \"c\"'", ["a\\b \"c\""])
5959
}
60+
61+
func testSplitShellEscapedCommandBasic() {
62+
assertEscapedCommand("", [])
63+
assertEscapedCommand(" ", [])
64+
assertEscapedCommand("a", ["a"])
65+
assertEscapedCommand("abc", ["abc"])
66+
assertEscapedCommand("a😀c", ["a😀c"])
67+
assertEscapedCommand("😀c", ["😀c"])
68+
assertEscapedCommand("abc def", ["abc", "def"])
69+
assertEscapedCommand("abc def", ["abc", "def"])
70+
}
71+
72+
func testSplitShellEscapedCommandDoubleQuotes() {
73+
assertEscapedCommand("\"", [""])
74+
assertEscapedCommand(#""a"#, ["a"])
75+
assertEscapedCommand("\"\"", [""])
76+
assertEscapedCommand(#""a""#, ["a"])
77+
assertEscapedCommand(#""a\"""#, [#"a""#])
78+
assertEscapedCommand(#""a b c ""#, ["a b c "])
79+
assertEscapedCommand(#""a " "#, ["a "])
80+
assertEscapedCommand(#""a " b"#, ["a ", "b"])
81+
assertEscapedCommand(#""a "b"#, ["a b"])
82+
assertEscapedCommand(#"a"x ""b"#, ["ax b"], windows: [#"ax "b"#])
83+
84+
assertEscapedCommand(#""a"bcd"ef""""g""#, ["abcdefg"], windows: [#"abcdef""g"#])
85+
}
6086

87+
func testSplitShellEscapedCommandSingleQuotes() {
88+
assertEscapedCommand("'", [""], windows: ["'"])
89+
assertEscapedCommand("'a", ["a"], windows: ["'a"])
90+
assertEscapedCommand("''", [""], windows: ["''"])
91+
assertEscapedCommand("'a'", ["a"], windows: ["'a'"])
92+
assertEscapedCommand(#"'a\"'"#, [#"a\""#], windows: [#"'a"'"#])
93+
assertEscapedCommand(#"'a b c '"#, ["a b c "], windows: ["'a", "b", "c", "'"])
94+
assertEscapedCommand(#"'a ' "#, ["a "], windows: ["'a", "'"])
95+
assertEscapedCommand(#"'a ' b"#, ["a ", "b"], windows: ["'a", "'", "b"])
96+
assertEscapedCommand(#"'a 'b"#, ["a b"], windows: ["'a", "'b"])
97+
assertEscapedCommand(#"a'x ''b"#, ["ax b"], windows: ["a'x", "''b"])
98+
}
99+
100+
func testSplitShellEscapedCommandBackslash() {
101+
assertEscapedCommand(#"a\\"#, [#"a\"#], windows: [#"a\\"#])
102+
assertEscapedCommand(#"a'\b "c"'"#, ["a\\b \"c\""], windows: [#"a'\b"#, #"c'"#])
103+
104+
assertEscapedCommand(#"\""#, ["\""])
105+
assertEscapedCommand(#"\\""#, [#"\"#])
106+
assertEscapedCommand(#"\\\""#, [#"\""#])
107+
assertEscapedCommand(#"\\ "#, [#"\"#], windows: [#"\\"#])
108+
assertEscapedCommand(#"\\\ "#, [#"\ "#], windows: [#"\\\"#])
109+
}
110+
111+
func testSplitShellEscapedCommandWindowsCommand() {
112+
assertEscapedCommand(#"C:\swift.exe"#, [#"C:swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
113+
assertEscapedCommand(
114+
#"C:\ swift.exe"#,
115+
[#"C: swift.exe"#],
116+
windows: [#"C:\"#, #"swift.exe"#],
117+
initialCommandName: true
118+
)
119+
assertEscapedCommand(
120+
#"C:\ swift.exe"#,
121+
[#"C: swift.exe"#],
122+
windows: [#"C:\"#, #"swift.exe"#],
123+
initialCommandName: false
124+
)
125+
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
126+
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:"swift.exe"#], initialCommandName: false)
127+
}
128+
129+
func testSplitShellEscapedCommandWindowsTwoDoubleQuotes() {
130+
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
131+
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
132+
}
133+
61134
func testEncodeCompDBCommand() throws {
62135
// Requires JSONEncoder.OutputFormatting.sortedKeys
63136
func check(_ cmd: CompilationDatabase.Command, _ expected: String, file: StaticString = #filePath, line: UInt = #line) throws {
@@ -332,3 +405,33 @@ private func checkCompilationDatabaseBuildSystem(_ compdb: ByteString, file: Sta
332405
let buildSystem = CompilationDatabaseBuildSystem(projectRoot: try AbsolutePath(validating: "/a"), fileSystem: fs)
333406
try block(buildSystem)
334407
}
408+
409+
/// Assert that splitting `str` into its command line components results in `expected`.
410+
///
411+
/// By default assert that escaping using Unix and Windows rules results in the same split. If `windows` is specified,
412+
/// assert that escaping with Windows rules produces `windows` and escaping using Unix rules results in `expected`.
413+
///
414+
/// If set `initialCommandName` gets passed to the Windows split function.
415+
private func assertEscapedCommand(
416+
_ str: String,
417+
_ expected: [String],
418+
windows: [String]? = nil,
419+
initialCommandName: Bool = false,
420+
file: StaticString = #filePath,
421+
line: UInt = #line
422+
) {
423+
XCTAssertEqual(
424+
splitShellEscapedCommand(str),
425+
expected,
426+
"Splitting Unix command line arguments",
427+
file: file,
428+
line: line
429+
)
430+
XCTAssertEqual(
431+
splitWindowsCommandLine(str, initialCommandName: initialCommandName),
432+
windows ?? expected,
433+
"Splitting Windows command line arguments",
434+
file: file,
435+
line: line
436+
)
437+
}

0 commit comments

Comments
 (0)