Skip to content

Commit 899f5b1

Browse files
author
Iakov Senatov
committed
tgz/tbz2: extract to /tmp first then scan filesystem — no more hangs
1 parent 8932719 commit 899f5b1

File tree

1 file changed

+185
-37
lines changed

1 file changed

+185
-37
lines changed

GUI/Sources/FindFiles/Engine/FindFilesArchiveSearcher.swift

Lines changed: 185 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,11 @@ enum FindFilesArchiveSearcher {
660660

661661
// MARK: - TAR Search (CLI)
662662

663+
/// Check if extension is a compound archive (compression + tar)
664+
private static func isCompoundTarArchive(_ ext: String) -> Bool {
665+
["tgz", "gz", "gzip", "bz2", "bzip2", "tbz", "tbz2", "xz", "txz", "lzma", "tlz", "z"].contains(ext)
666+
}
667+
663668
@concurrent static func searchInsideTar(
664669
archiveURL: URL,
665670
criteria: FindFilesCriteria,
@@ -668,22 +673,166 @@ enum FindFilesArchiveSearcher {
668673
continuation: AsyncStream<FindFilesResult>.Continuation,
669674
recursionDepth: Int = 0
670675
) async -> ArchiveSearchDelta {
671-
var delta = ArchiveSearchDelta()
672676
let ext = archiveURL.pathExtension.lowercased()
673-
// Use -tvf for verbose output with size and date
674-
var args = ["-tvf"]
677+
log.info("[TAR] Starting search in \(archiveURL.lastPathComponent) (ext=\(ext))")
678+
679+
// For compound archives (tgz, tbz2, txz etc.) — extract to temp first, then list pure .tar
680+
// This is more reliable than piped decompression which can hang
681+
if isCompoundTarArchive(ext) {
682+
log.info("[TAR] Compound archive detected, extracting to temp...")
683+
return await searchInsideCompoundTar(
684+
archiveURL: archiveURL, criteria: criteria, nameRegex: nameRegex,
685+
contentPattern: contentPattern, continuation: continuation,
686+
recursionDepth: recursionDepth
687+
)
688+
}
689+
690+
// Pure .tar — list directly
691+
return await listPureTar(
692+
archiveURL: archiveURL, originalArchiveURL: archiveURL,
693+
criteria: criteria, nameRegex: nameRegex, continuation: continuation
694+
)
695+
}
696+
697+
/// Extract compound archive (tgz/tbz2/txz) to temp, then search the inner .tar
698+
@concurrent private static func searchInsideCompoundTar(
699+
archiveURL: URL,
700+
criteria: FindFilesCriteria,
701+
nameRegex: NSRegularExpression?,
702+
contentPattern: NSRegularExpression?,
703+
continuation: AsyncStream<FindFilesResult>.Continuation,
704+
recursionDepth: Int
705+
) async -> ArchiveSearchDelta {
706+
let tempDir = FileManager.default.temporaryDirectory
707+
.appendingPathComponent("MiMiNav_tar_\(UUID().uuidString)", isDirectory: true)
708+
709+
do {
710+
try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
711+
registerTempDir(tempDir)
712+
log.info("[TAR] Created temp dir: \(tempDir.path)")
713+
} catch {
714+
log.error("[TAR] Failed to create temp dir: \(error)")
715+
return ArchiveSearchDelta()
716+
}
717+
718+
// Extract archive to temp directory using tar -xf
719+
let ext = archiveURL.pathExtension.lowercased()
720+
var extractArgs = ["-xf", archiveURL.path, "-C", tempDir.path]
675721
switch ext {
676-
case "gz", "gzip", "tgz": args.insert("-z", at: 0)
677-
case "bz2", "bzip2", "tbz", "tbz2": args.insert("-j", at: 0)
678-
case "xz", "txz": args.insert("-J", at: 0)
679-
case "z": args.insert("-Z", at: 0)
722+
case "gz", "gzip", "tgz": extractArgs.insert("-z", at: 0)
723+
case "bz2", "bzip2", "tbz", "tbz2": extractArgs.insert("-j", at: 0)
724+
case "xz", "txz": extractArgs.insert("-J", at: 0)
725+
case "z": extractArgs.insert("-Z", at: 0)
680726
default: break
681727
}
682-
args.append(archiveURL.path)
728+
729+
log.info("[TAR] Extracting with: tar \(extractArgs.joined(separator: " "))")
730+
731+
let extractProcess = Process()
732+
extractProcess.executableURL = URL(fileURLWithPath: "/usr/bin/tar")
733+
extractProcess.arguments = extractArgs
734+
extractProcess.standardOutput = Pipe()
735+
extractProcess.standardError = Pipe()
736+
extractProcess.standardInput = FileHandle.nullDevice
737+
738+
do {
739+
try extractProcess.run()
740+
} catch {
741+
log.error("[TAR] Extract launch failed: \(error)")
742+
return ArchiveSearchDelta()
743+
}
744+
745+
// Wait with timeout
746+
let completed = await waitForProcess(extractProcess, timeout: tarTimeout)
747+
guard completed else {
748+
log.warning("[TAR] Extract timeout for \(archiveURL.lastPathComponent)")
749+
return ArchiveSearchDelta()
750+
}
751+
752+
guard extractProcess.terminationStatus == 0 else {
753+
log.warning("[TAR] Extract failed (exit \(extractProcess.terminationStatus)): \(archiveURL.lastPathComponent)")
754+
return ArchiveSearchDelta()
755+
}
756+
757+
log.info("[TAR] Extraction complete, scanning temp dir...")
758+
759+
// Now scan extracted files directly from filesystem
760+
return await scanExtractedTarContents(
761+
tempDir: tempDir, originalArchiveURL: archiveURL,
762+
criteria: criteria, nameRegex: nameRegex, continuation: continuation
763+
)
764+
}
765+
766+
/// Scan extracted tar contents from filesystem (fast, no parsing needed)
767+
@concurrent private static func scanExtractedTarContents(
768+
tempDir: URL,
769+
originalArchiveURL: URL,
770+
criteria: FindFilesCriteria,
771+
nameRegex: NSRegularExpression?,
772+
continuation: AsyncStream<FindFilesResult>.Continuation
773+
) async -> ArchiveSearchDelta {
774+
var delta = ArchiveSearchDelta()
775+
let fm = FileManager.default
776+
777+
// Collect all file URLs first (enumerator is not async-safe)
778+
var fileURLs: [URL] = []
779+
if let enumerator = fm.enumerator(
780+
at: tempDir,
781+
includingPropertiesForKeys: [.isRegularFileKey, .fileSizeKey, .contentModificationDateKey],
782+
options: [.skipsHiddenFiles]
783+
) {
784+
while let fileURL = enumerator.nextObject() as? URL {
785+
fileURLs.append(fileURL)
786+
}
787+
} else {
788+
log.error("[TAR] Failed to enumerate temp dir")
789+
return delta
790+
}
791+
792+
var fileCount = 0
793+
for fileURL in fileURLs {
794+
guard !Task.isCancelled else { return delta }
795+
796+
let resourceValues = try? fileURL.resourceValues(forKeys: [.isRegularFileKey, .fileSizeKey, .contentModificationDateKey])
797+
guard resourceValues?.isRegularFile == true else { continue }
798+
799+
fileCount += 1
800+
let fileName = fileURL.lastPathComponent
801+
802+
// Get relative path from temp dir
803+
let relativePath = fileURL.path.replacingOccurrences(of: tempDir.path + "/", with: "")
804+
805+
if FindFilesNameMatcher.matches(fileName: fileName, regex: nameRegex, criteria: criteria) {
806+
let virtualURL = originalArchiveURL.appendingPathComponent(relativePath)
807+
let result = FindFilesResult(
808+
fileURL: virtualURL,
809+
isInsideArchive: true,
810+
archivePath: originalArchiveURL.path,
811+
knownSize: Int64(resourceValues?.fileSize ?? 0),
812+
knownDate: resourceValues?.contentModificationDate
813+
)
814+
continuation.yield(result)
815+
delta.matchesFound += 1
816+
}
817+
}
818+
819+
log.info("[TAR] Scanned \(fileCount) files, found \(delta.matchesFound) matches")
820+
return delta
821+
}
822+
823+
/// List pure .tar file (no compression) using tar -tvf
824+
@concurrent private static func listPureTar(
825+
archiveURL: URL,
826+
originalArchiveURL: URL,
827+
criteria: FindFilesCriteria,
828+
nameRegex: NSRegularExpression?,
829+
continuation: AsyncStream<FindFilesResult>.Continuation
830+
) async -> ArchiveSearchDelta {
831+
var delta = ArchiveSearchDelta()
683832

684833
let listProcess = Process()
685834
listProcess.executableURL = URL(fileURLWithPath: "/usr/bin/tar")
686-
listProcess.arguments = args
835+
listProcess.arguments = ["-tvf", archiveURL.path]
687836
let pipe = Pipe()
688837
listProcess.standardOutput = pipe
689838
listProcess.standardError = Pipe()
@@ -692,55 +841,36 @@ enum FindFilesArchiveSearcher {
692841
do {
693842
try listProcess.run()
694843
} catch {
695-
log.error("[ArchiveSearcher] tar launch failed: \(archiveURL.lastPathComponent) \(error)")
844+
log.error("[TAR] List launch failed: \(error)")
696845
return delta
697846
}
698847

699-
// Wait with timeout to prevent hanging on corrupted archives
700-
let completed = await withCheckedContinuation { cont in
701-
DispatchQueue.global().async {
702-
let deadline = DispatchTime.now() + tarTimeout
703-
while listProcess.isRunning {
704-
if DispatchTime.now() >= deadline {
705-
log.warning("[ArchiveSearcher] tar timeout: \(archiveURL.lastPathComponent)")
706-
kill(listProcess.processIdentifier, SIGKILL)
707-
cont.resume(returning: false)
708-
return
709-
}
710-
Thread.sleep(forTimeInterval: 0.1)
711-
}
712-
cont.resume(returning: true)
713-
}
848+
let completed = await waitForProcess(listProcess, timeout: tarTimeout)
849+
guard completed else {
850+
log.warning("[TAR] List timeout for \(archiveURL.lastPathComponent)")
851+
return delta
714852
}
715853

716-
guard completed else { return delta }
717-
718854
guard listProcess.terminationStatus == 0 else {
719-
log.warning("[ArchiveSearcher] tar exit \(listProcess.terminationStatus): \(archiveURL.lastPathComponent)")
855+
log.warning("[TAR] List failed (exit \(listProcess.terminationStatus))")
720856
return delta
721857
}
722858

723859
let data = pipe.fileHandleForReading.readDataToEndOfFile()
724860
guard let listing = String(data: data, encoding: .utf8) else { return delta }
725861

726862
let lines = listing.components(separatedBy: .newlines)
727-
728863
for line in lines {
729864
guard !Task.isCancelled else { return delta }
730-
// Parse verbose tar output: -rw-r--r-- 0 user staff 1234 Feb 15 10:30 2025 path/to/file.txt
731-
// or: -rw-r--r-- user/staff 1234 2025-02-15 10:30 path/to/file.txt (GNU tar)
732-
let parsed = parseTarVerboseLine(line)
733-
guard let entry = parsed else { continue }
734-
guard !entry.isDirectory else { continue }
865+
guard let entry = parseTarVerboseLine(line), !entry.isDirectory else { continue }
735866

736867
let fileName = (entry.name as NSString).lastPathComponent
737-
738868
if FindFilesNameMatcher.matches(fileName: fileName, regex: nameRegex, criteria: criteria) {
739-
let virtualURL = archiveURL.appendingPathComponent(entry.name)
869+
let virtualURL = originalArchiveURL.appendingPathComponent(entry.name)
740870
let result = FindFilesResult(
741871
fileURL: virtualURL,
742872
isInsideArchive: true,
743-
archivePath: archiveURL.path,
873+
archivePath: originalArchiveURL.path,
744874
knownSize: entry.size,
745875
knownDate: entry.modificationDate
746876
)
@@ -751,6 +881,24 @@ enum FindFilesArchiveSearcher {
751881
return delta
752882
}
753883

884+
/// Wait for process with timeout, returns true if completed normally
885+
private static func waitForProcess(_ process: Process, timeout: TimeInterval) async -> Bool {
886+
await withCheckedContinuation { cont in
887+
DispatchQueue.global().async {
888+
let deadline = DispatchTime.now() + timeout
889+
while process.isRunning {
890+
if DispatchTime.now() >= deadline {
891+
kill(process.processIdentifier, SIGKILL)
892+
cont.resume(returning: false)
893+
return
894+
}
895+
Thread.sleep(forTimeInterval: 0.1)
896+
}
897+
cont.resume(returning: true)
898+
}
899+
}
900+
}
901+
754902
// MARK: - Parse TAR Verbose Line
755903

756904
/// Parsed entry from tar -tv output

0 commit comments

Comments
 (0)