@@ -221,7 +221,11 @@ extension CompilationDatabase.Command: Codable {
221
221
if let arguments = try container. decodeIfPresent ( [ String ] . self, forKey: . arguments) {
222
222
self . commandLine = arguments
223
223
} else if let command = try container. decodeIfPresent ( String . self, forKey: . command) {
224
+ #if os(Windows)
225
+ self . commandLine = splitWindowsCommandLine ( command, initialCommandName: true )
226
+ #else
224
227
self . commandLine = splitShellEscapedCommand ( command)
228
+ #endif
225
229
} else {
226
230
throw CompilationDatabaseDecodingError . missingCommandOrArguments
227
231
}
@@ -355,3 +359,216 @@ public func splitShellEscapedCommand(_ cmd: String) -> [String] {
355
359
var parser = Parser ( cmd [ ... ] )
356
360
return parser. parse ( )
357
361
}
362
+
363
+ // MARK: - Windows
364
+
365
+ fileprivate extension Character {
366
+ var isWhitespace : Bool {
367
+ switch self {
368
+ case " " , " \t " :
369
+ return true
370
+ default :
371
+ return false
372
+ }
373
+ }
374
+
375
+ var isWhitespaceOrNull : Bool {
376
+ return self . isWhitespace || self == " \0 "
377
+ }
378
+
379
+ func isWindowsSpecialChar( inCommandName: Bool ) -> Bool {
380
+ if isWhitespace {
381
+ return true
382
+ }
383
+ if self == #"""# {
384
+ return true
385
+ }
386
+ if !inCommandName && self == #"\"# {
387
+ return true
388
+ }
389
+ return false
390
+ }
391
+ }
392
+
393
+ fileprivate struct WindowsCommandParser {
394
+ /// The content of the entire command that shall be parsed.
395
+ private let content : String
396
+
397
+ /// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
398
+ /// character.
399
+ private var parsingCommandName : Bool
400
+
401
+ /// An index into `content`, pointing to the character that we are currently parsing.
402
+ private var currentCharacterIndex : String . UTF8View . Index
403
+
404
+ /// The split command line arguments.
405
+ private var result : [ String ] = [ ]
406
+
407
+ /// The character that is currently being parsed.
408
+ ///
409
+ /// `nil` if we have reached the end of `content`.
410
+ private var currentCharacter : Character ? {
411
+ guard currentCharacterIndex < content. endIndex else {
412
+ return nil
413
+ }
414
+ return self . content [ currentCharacterIndex]
415
+ }
416
+
417
+ /// The character after `currentCharacter`.
418
+ ///
419
+ /// `nil` if we have reached the end of `content`.
420
+ private var peek : Character ? {
421
+ let nextIndex = content. index ( after: currentCharacterIndex)
422
+ if nextIndex < content. endIndex {
423
+ return content [ nextIndex]
424
+ } else {
425
+ return nil
426
+ }
427
+ }
428
+
429
+ init ( _ string: String , initialCommandName: Bool ) {
430
+ self . content = string
431
+ self . currentCharacterIndex = self . content. startIndex
432
+ self . parsingCommandName = initialCommandName
433
+ }
434
+
435
+ /// Designated entry point to split a Windows command line invocation.
436
+ mutating func parse( ) -> [ String ] {
437
+ while let currentCharacter {
438
+ if currentCharacter. isWhitespaceOrNull {
439
+ // Consume any whitespace separating arguments.
440
+ _ = consume ( )
441
+ } else {
442
+ result. append ( parseSingleArgument ( ) )
443
+ }
444
+ }
445
+ return result
446
+ }
447
+
448
+ /// Consume the current character.
449
+ private mutating func consume( ) -> Character {
450
+ guard let character = currentCharacter else {
451
+ preconditionFailure ( " Nothing to consume " )
452
+ }
453
+ currentCharacterIndex = content. index ( after: currentCharacterIndex)
454
+ return character
455
+ }
456
+
457
+ /// Consume the current character, asserting that it is `expectedCharacter`
458
+ private mutating func consume( expect expectedCharacter: Character ) {
459
+ assert ( currentCharacter == expectedCharacter)
460
+ _ = consume ( )
461
+ }
462
+
463
+ /// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
464
+ /// (e.g. `\"` gets returned as `"`)
465
+ ///
466
+ /// Afterwards the parser points to the character after the argument.
467
+ mutating func parseSingleArgument( ) -> String {
468
+ var str = " "
469
+ while let currentCharacter {
470
+ if !currentCharacter. isWindowsSpecialChar ( inCommandName: parsingCommandName) {
471
+ str. append ( consume ( ) )
472
+ continue
473
+ }
474
+ if currentCharacter. isWhitespaceOrNull {
475
+ parsingCommandName = false
476
+ return str
477
+ } else if currentCharacter == " \" " {
478
+ str += parseQuoted ( )
479
+ } else if currentCharacter == #"\"# {
480
+ assert ( !parsingCommandName, " else we'd have treated it as a normal char " ) ;
481
+ str. append ( parseBackslash ( ) )
482
+ } else {
483
+ preconditionFailure ( " unexpected special character " ) ;
484
+ }
485
+ }
486
+ return str
487
+ }
488
+
489
+ /// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
490
+ /// quotes.
491
+ mutating func parseQuoted( ) -> String {
492
+ // Discard the opening quote. Its not part of the unescaped text.
493
+ consume ( expect: " \" " )
494
+
495
+ var str = " "
496
+ while let currentCharacter {
497
+ switch currentCharacter {
498
+ case " \" " :
499
+ if peek == " \" " {
500
+ // Two adjacent quotes inside a quoted string are an escaped single quote. For example
501
+ // `" a "" b "`
502
+ // represents the string
503
+ // ` a " b `
504
+ consume ( expect: " \" " )
505
+ consume ( expect: " \" " )
506
+ str += " \" "
507
+ } else {
508
+ // We have found the closing quote. Discard it and return.
509
+ consume ( expect: " \" " )
510
+ return str
511
+ }
512
+ case " \\ " where !parsingCommandName:
513
+ str. append ( parseBackslash ( ) )
514
+ default :
515
+ str. append ( consume ( ) )
516
+ }
517
+ }
518
+ return str
519
+ }
520
+
521
+ /// Backslashes are interpreted in a rather complicated way in the Windows-style
522
+ /// command line, because backslashes are used both to separate path and to
523
+ /// escape double quote. This method consumes runs of backslashes as well as the
524
+ /// following double quote if it's escaped.
525
+ ///
526
+ /// * If an even number of backslashes is followed by a double quote, one
527
+ /// backslash is output for every pair of backslashes, and the last double
528
+ /// quote remains unconsumed. The double quote will later be interpreted as
529
+ /// the start or end of a quoted string in the main loop outside of this
530
+ /// function.
531
+ ///
532
+ /// * If an odd number of backslashes is followed by a double quote, one
533
+ /// backslash is output for every pair of backslashes, and a double quote is
534
+ /// output for the last pair of backslash-double quote. The double quote is
535
+ /// consumed in this case.
536
+ ///
537
+ /// * Otherwise, backslashes are interpreted literally.
538
+ mutating func parseBackslash( ) -> String {
539
+ var str : String = " "
540
+
541
+ let firstNonBackslashIndex = content [ currentCharacterIndex... ] . firstIndex ( where: { $0 != " \\ " } ) ?? content. endIndex
542
+ let numberOfBackslashes = content. distance ( from: currentCharacterIndex, to: firstNonBackslashIndex)
543
+
544
+ if firstNonBackslashIndex != content. endIndex && content [ firstNonBackslashIndex] == " \" " {
545
+ str += String ( repeating: " \\ " , count: numberOfBackslashes / 2 )
546
+ if numberOfBackslashes. isMultiple ( of: 2 ) {
547
+ // We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
548
+ // quote in the outer function.
549
+ currentCharacterIndex = firstNonBackslashIndex
550
+ } else {
551
+ // We have an odd number of backslashes. The last backslash escapes the quote.
552
+ str += " \" "
553
+ currentCharacterIndex = content. index ( after: firstNonBackslashIndex)
554
+ }
555
+ return str
556
+ }
557
+
558
+ // The sequence of backslashes is not followed by quotes. Interpret them literally.
559
+ str += String ( repeating: " \\ " , count: numberOfBackslashes)
560
+ currentCharacterIndex = firstNonBackslashIndex
561
+ return str
562
+ }
563
+ }
564
+
565
+ // Sometimes, this function will be handling a full command line including an
566
+ // executable pathname at the start. In that situation, the initial pathname
567
+ // needs different handling from the following arguments, because when
568
+ // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
569
+ // escaping the quote character, whereas when libc scans the rest of the
570
+ // command line, it does.
571
+ public func splitWindowsCommandLine( _ cmd: String , initialCommandName: Bool ) -> [ String ] {
572
+ var parser = WindowsCommandParser ( cmd, initialCommandName: initialCommandName)
573
+ return parser. parse ( )
574
+ }
0 commit comments