@@ -660,6 +660,11 @@ enum FindFilesArchiveSearcher {
660660
661661 // MARK: - TAR Search (CLI)
662662
663+ /// Check if extension is a compound archive (compression + tar)
664+ private static func isCompoundTarArchive( _ ext: String ) -> Bool {
665+ [ " tgz " , " gz " , " gzip " , " bz2 " , " bzip2 " , " tbz " , " tbz2 " , " xz " , " txz " , " lzma " , " tlz " , " z " ] . contains ( ext)
666+ }
667+
663668 @concurrent static func searchInsideTar(
664669 archiveURL: URL ,
665670 criteria: FindFilesCriteria ,
@@ -668,22 +673,166 @@ enum FindFilesArchiveSearcher {
668673 continuation: AsyncStream < FindFilesResult > . Continuation ,
669674 recursionDepth: Int = 0
670675 ) async -> ArchiveSearchDelta {
671- var delta = ArchiveSearchDelta ( )
672676 let ext = archiveURL. pathExtension. lowercased ( )
673- // Use -tvf for verbose output with size and date
674- var args = [ " -tvf " ]
677+ log. info ( " [TAR] Starting search in \( archiveURL. lastPathComponent) (ext= \( ext) ) " )
678+
679+ // For compound archives (tgz, tbz2, txz etc.) — extract to temp first, then list pure .tar
680+ // This is more reliable than piped decompression which can hang
681+ if isCompoundTarArchive ( ext) {
682+ log. info ( " [TAR] Compound archive detected, extracting to temp... " )
683+ return await searchInsideCompoundTar (
684+ archiveURL: archiveURL, criteria: criteria, nameRegex: nameRegex,
685+ contentPattern: contentPattern, continuation: continuation,
686+ recursionDepth: recursionDepth
687+ )
688+ }
689+
690+ // Pure .tar — list directly
691+ return await listPureTar (
692+ archiveURL: archiveURL, originalArchiveURL: archiveURL,
693+ criteria: criteria, nameRegex: nameRegex, continuation: continuation
694+ )
695+ }
696+
697+ /// Extract compound archive (tgz/tbz2/txz) to temp, then search the inner .tar
698+ @concurrent private static func searchInsideCompoundTar(
699+ archiveURL: URL ,
700+ criteria: FindFilesCriteria ,
701+ nameRegex: NSRegularExpression ? ,
702+ contentPattern: NSRegularExpression ? ,
703+ continuation: AsyncStream < FindFilesResult > . Continuation ,
704+ recursionDepth: Int
705+ ) async -> ArchiveSearchDelta {
706+ let tempDir = FileManager . default. temporaryDirectory
707+ . appendingPathComponent ( " MiMiNav_tar_ \( UUID ( ) . uuidString) " , isDirectory: true )
708+
709+ do {
710+ try FileManager . default. createDirectory ( at: tempDir, withIntermediateDirectories: true )
711+ registerTempDir ( tempDir)
712+ log. info ( " [TAR] Created temp dir: \( tempDir. path) " )
713+ } catch {
714+ log. error ( " [TAR] Failed to create temp dir: \( error) " )
715+ return ArchiveSearchDelta ( )
716+ }
717+
718+ // Extract archive to temp directory using tar -xf
719+ let ext = archiveURL. pathExtension. lowercased ( )
720+ var extractArgs = [ " -xf " , archiveURL. path, " -C " , tempDir. path]
675721 switch ext {
676- case " gz " , " gzip " , " tgz " : args . insert ( " -z " , at: 0 )
677- case " bz2 " , " bzip2 " , " tbz " , " tbz2 " : args . insert ( " -j " , at: 0 )
678- case " xz " , " txz " : args . insert ( " -J " , at: 0 )
679- case " z " : args . insert ( " -Z " , at: 0 )
722+ case " gz " , " gzip " , " tgz " : extractArgs . insert ( " -z " , at: 0 )
723+ case " bz2 " , " bzip2 " , " tbz " , " tbz2 " : extractArgs . insert ( " -j " , at: 0 )
724+ case " xz " , " txz " : extractArgs . insert ( " -J " , at: 0 )
725+ case " z " : extractArgs . insert ( " -Z " , at: 0 )
680726 default : break
681727 }
682- args. append ( archiveURL. path)
728+
729+ log. info ( " [TAR] Extracting with: tar \( extractArgs. joined ( separator: " " ) ) " )
730+
731+ let extractProcess = Process ( )
732+ extractProcess. executableURL = URL ( fileURLWithPath: " /usr/bin/tar " )
733+ extractProcess. arguments = extractArgs
734+ extractProcess. standardOutput = Pipe ( )
735+ extractProcess. standardError = Pipe ( )
736+ extractProcess. standardInput = FileHandle . nullDevice
737+
738+ do {
739+ try extractProcess. run ( )
740+ } catch {
741+ log. error ( " [TAR] Extract launch failed: \( error) " )
742+ return ArchiveSearchDelta ( )
743+ }
744+
745+ // Wait with timeout
746+ let completed = await waitForProcess ( extractProcess, timeout: tarTimeout)
747+ guard completed else {
748+ log. warning ( " [TAR] Extract timeout for \( archiveURL. lastPathComponent) " )
749+ return ArchiveSearchDelta ( )
750+ }
751+
752+ guard extractProcess. terminationStatus == 0 else {
753+ log. warning ( " [TAR] Extract failed (exit \( extractProcess. terminationStatus) ): \( archiveURL. lastPathComponent) " )
754+ return ArchiveSearchDelta ( )
755+ }
756+
757+ log. info ( " [TAR] Extraction complete, scanning temp dir... " )
758+
759+ // Now scan extracted files directly from filesystem
760+ return await scanExtractedTarContents (
761+ tempDir: tempDir, originalArchiveURL: archiveURL,
762+ criteria: criteria, nameRegex: nameRegex, continuation: continuation
763+ )
764+ }
765+
766+ /// Scan extracted tar contents from filesystem (fast, no parsing needed)
767+ @concurrent private static func scanExtractedTarContents(
768+ tempDir: URL ,
769+ originalArchiveURL: URL ,
770+ criteria: FindFilesCriteria ,
771+ nameRegex: NSRegularExpression ? ,
772+ continuation: AsyncStream < FindFilesResult > . Continuation
773+ ) async -> ArchiveSearchDelta {
774+ var delta = ArchiveSearchDelta ( )
775+ let fm = FileManager . default
776+
777+ // Collect all file URLs first (enumerator is not async-safe)
778+ var fileURLs : [ URL ] = [ ]
779+ if let enumerator = fm. enumerator (
780+ at: tempDir,
781+ includingPropertiesForKeys: [ . isRegularFileKey, . fileSizeKey, . contentModificationDateKey] ,
782+ options: [ . skipsHiddenFiles]
783+ ) {
784+ while let fileURL = enumerator. nextObject ( ) as? URL {
785+ fileURLs. append ( fileURL)
786+ }
787+ } else {
788+ log. error ( " [TAR] Failed to enumerate temp dir " )
789+ return delta
790+ }
791+
792+ var fileCount = 0
793+ for fileURL in fileURLs {
794+ guard !Task. isCancelled else { return delta }
795+
796+ let resourceValues = try ? fileURL. resourceValues ( forKeys: [ . isRegularFileKey, . fileSizeKey, . contentModificationDateKey] )
797+ guard resourceValues? . isRegularFile == true else { continue }
798+
799+ fileCount += 1
800+ let fileName = fileURL. lastPathComponent
801+
802+ // Get relative path from temp dir
803+ let relativePath = fileURL. path. replacingOccurrences ( of: tempDir. path + " / " , with: " " )
804+
805+ if FindFilesNameMatcher . matches ( fileName: fileName, regex: nameRegex, criteria: criteria) {
806+ let virtualURL = originalArchiveURL. appendingPathComponent ( relativePath)
807+ let result = FindFilesResult (
808+ fileURL: virtualURL,
809+ isInsideArchive: true ,
810+ archivePath: originalArchiveURL. path,
811+ knownSize: Int64 ( resourceValues? . fileSize ?? 0 ) ,
812+ knownDate: resourceValues? . contentModificationDate
813+ )
814+ continuation. yield ( result)
815+ delta. matchesFound += 1
816+ }
817+ }
818+
819+ log. info ( " [TAR] Scanned \( fileCount) files, found \( delta. matchesFound) matches " )
820+ return delta
821+ }
822+
823+ /// List pure .tar file (no compression) using tar -tvf
824+ @concurrent private static func listPureTar(
825+ archiveURL: URL ,
826+ originalArchiveURL: URL ,
827+ criteria: FindFilesCriteria ,
828+ nameRegex: NSRegularExpression ? ,
829+ continuation: AsyncStream < FindFilesResult > . Continuation
830+ ) async -> ArchiveSearchDelta {
831+ var delta = ArchiveSearchDelta ( )
683832
684833 let listProcess = Process ( )
685834 listProcess. executableURL = URL ( fileURLWithPath: " /usr/bin/tar " )
686- listProcess. arguments = args
835+ listProcess. arguments = [ " -tvf " , archiveURL . path ]
687836 let pipe = Pipe ( )
688837 listProcess. standardOutput = pipe
689838 listProcess. standardError = Pipe ( )
@@ -692,55 +841,36 @@ enum FindFilesArchiveSearcher {
692841 do {
693842 try listProcess. run ( )
694843 } catch {
695- log. error ( " [ArchiveSearcher] tar launch failed: \( archiveURL . lastPathComponent ) — \( error) " )
844+ log. error ( " [TAR] List launch failed: \( error) " )
696845 return delta
697846 }
698847
699- // Wait with timeout to prevent hanging on corrupted archives
700- let completed = await withCheckedContinuation { cont in
701- DispatchQueue . global ( ) . async {
702- let deadline = DispatchTime . now ( ) + tarTimeout
703- while listProcess. isRunning {
704- if DispatchTime . now ( ) >= deadline {
705- log. warning ( " [ArchiveSearcher] tar timeout: \( archiveURL. lastPathComponent) " )
706- kill ( listProcess. processIdentifier, SIGKILL)
707- cont. resume ( returning: false )
708- return
709- }
710- Thread . sleep ( forTimeInterval: 0.1 )
711- }
712- cont. resume ( returning: true )
713- }
848+ let completed = await waitForProcess ( listProcess, timeout: tarTimeout)
849+ guard completed else {
850+ log. warning ( " [TAR] List timeout for \( archiveURL. lastPathComponent) " )
851+ return delta
714852 }
715853
716- guard completed else { return delta }
717-
718854 guard listProcess. terminationStatus == 0 else {
719- log. warning ( " [ArchiveSearcher] tar exit \( listProcess. terminationStatus) : \( archiveURL . lastPathComponent ) " )
855+ log. warning ( " [TAR] List failed ( exit \( listProcess. terminationStatus) ) " )
720856 return delta
721857 }
722858
723859 let data = pipe. fileHandleForReading. readDataToEndOfFile ( )
724860 guard let listing = String ( data: data, encoding: . utf8) else { return delta }
725861
726862 let lines = listing. components ( separatedBy: . newlines)
727-
728863 for line in lines {
729864 guard !Task. isCancelled else { return delta }
730- // Parse verbose tar output: -rw-r--r-- 0 user staff 1234 Feb 15 10:30 2025 path/to/file.txt
731- // or: -rw-r--r-- user/staff 1234 2025-02-15 10:30 path/to/file.txt (GNU tar)
732- let parsed = parseTarVerboseLine ( line)
733- guard let entry = parsed else { continue }
734- guard !entry. isDirectory else { continue }
865+ guard let entry = parseTarVerboseLine ( line) , !entry. isDirectory else { continue }
735866
736867 let fileName = ( entry. name as NSString ) . lastPathComponent
737-
738868 if FindFilesNameMatcher . matches ( fileName: fileName, regex: nameRegex, criteria: criteria) {
739- let virtualURL = archiveURL . appendingPathComponent ( entry. name)
869+ let virtualURL = originalArchiveURL . appendingPathComponent ( entry. name)
740870 let result = FindFilesResult (
741871 fileURL: virtualURL,
742872 isInsideArchive: true ,
743- archivePath: archiveURL . path,
873+ archivePath: originalArchiveURL . path,
744874 knownSize: entry. size,
745875 knownDate: entry. modificationDate
746876 )
@@ -751,6 +881,24 @@ enum FindFilesArchiveSearcher {
751881 return delta
752882 }
753883
884+ /// Wait for process with timeout, returns true if completed normally
885+ private static func waitForProcess( _ process: Process , timeout: TimeInterval ) async -> Bool {
886+ await withCheckedContinuation { cont in
887+ DispatchQueue . global ( ) . async {
888+ let deadline = DispatchTime . now ( ) + timeout
889+ while process. isRunning {
890+ if DispatchTime . now ( ) >= deadline {
891+ kill ( process. processIdentifier, SIGKILL)
892+ cont. resume ( returning: false )
893+ return
894+ }
895+ Thread . sleep ( forTimeInterval: 0.1 )
896+ }
897+ cont. resume ( returning: true )
898+ }
899+ }
900+ }
901+
754902 // MARK: - Parse TAR Verbose Line
755903
756904 /// Parsed entry from tar -tv output
0 commit comments