@@ -13,24 +13,39 @@ module Codec.Archive.Tar.Pack.Tests
13
13
import Control.DeepSeq
14
14
import qualified Data.ByteString as B
15
15
import qualified Data.ByteString.Lazy as BL
16
+ import Data.Char
16
17
import Data.FileEmbed
17
18
import qualified Codec.Archive.Tar as Tar
18
19
import qualified Codec.Archive.Tar.Pack as Pack
19
- import Codec.Archive.Tar.Types (GenEntries (.. ), Entries , simpleEntry , toTarPath )
20
+ import Codec.Archive.Tar.Types (GenEntries (.. ), Entries , simpleEntry , toTarPath , GenEntry ( entryTarPath ) )
20
21
import qualified Codec.Archive.Tar.Unpack as Unpack
21
22
import qualified Codec.Archive.Tar.Write as Write
22
23
import Control.Exception
23
24
import Data.List.NonEmpty (NonEmpty (.. ))
25
+ import GHC.IO.Encoding
24
26
import System.Directory
25
27
import System.FilePath
28
+ import qualified System.FilePath.Posix as Posix
26
29
import qualified System.Info
27
30
import System.IO.Temp
31
+ import System.IO.Unsafe
28
32
import Test.Tasty.QuickCheck
29
33
34
+ supportsUnicode :: Bool
35
+ supportsUnicode = unsafePerformIO $ do
36
+ -- Normally getFileSystemEncoding returns a Unicode encoding,
37
+ -- but if it is ASCII, we should not generate Unicode filenames.
38
+ enc <- getFileSystemEncoding
39
+ pure $ case textEncodingName enc of
40
+ " ASCII" -> False
41
+ " ANSI_X3.4-1968" -> False
42
+ _ -> True
43
+ {-# NOINLINE supportsUnicode #-}
44
+
30
45
-- | Write a single file, deeply buried within nested folders;
31
46
-- pack and unpack; read back and compare results.
32
- prop_roundtrip :: [ASCIIString ] -> ASCIIString -> Property
33
- prop_roundtrip xss ( ASCIIString cnt)
47
+ prop_roundtrip :: [String ] -> String -> Property
48
+ prop_roundtrip xss cnt
34
49
| x : xs <- filter (not . null ) $ map mkFilePath xss
35
50
= ioProperty $ withSystemTempDirectory " tar-test" $ \ baseDir -> do
36
51
file : dirs <- pure $ trimUpToMaxPathLength baseDir (x : xs)
@@ -39,38 +54,82 @@ prop_roundtrip xss (ASCIIString cnt)
39
54
absDir = baseDir </> relDir
40
55
relFile = relDir </> file
41
56
absFile = absDir </> file
42
- createDirectoryIfMissing True absDir
43
- writeFile absFile cnt
44
- -- Forcing the result, otherwise lazy IO misbehaves.
45
- ! entries <- Pack. pack baseDir [relFile] >>= evaluate . force
46
-
47
- -- Try hard to clean up
48
- removeFile absFile
49
- writeFile absFile " <should be overwritten>"
50
- case dirs of
51
- [] -> pure ()
52
- d : _ -> removeDirectoryRecursive (baseDir </> d)
53
-
54
- -- Unpack back
55
- Unpack. unpack baseDir (foldr Next Done entries :: Entries IOException )
56
- cnt' <- readFile absFile
57
- pure $ cnt === cnt'
57
+ errMsg = " relDir = " ++ relDir ++
58
+ " \n absDir = " ++ absDir ++
59
+ " \n relFile = " ++ relFile ++
60
+ " \n absFile = " ++ absFile
61
+
62
+ -- Not all filesystems allow paths to contain arbitrary Unicode.
63
+ -- E. g., at the moment of writing Apple FS does not support characters
64
+ -- introduced in Unicode 15.0.
65
+ canCreateDirectory <- try (createDirectoryIfMissing True absDir)
66
+ case canCreateDirectory of
67
+ Left (e :: IOException ) -> discard
68
+ Right () -> do
69
+ canWriteFile <- try (writeFile absFile cnt)
70
+ case canWriteFile of
71
+ Left (e :: IOException ) -> discard
72
+ Right () -> counterexample errMsg <$> do
73
+
74
+ -- Forcing the result, otherwise lazy IO misbehaves.
75
+ ! entries <- Pack. pack baseDir [relFile] >>= evaluate . force
76
+
77
+ let fileNames
78
+ = map (map (\ c -> if c == Posix. pathSeparator then pathSeparator else c))
79
+ $ Tar. foldEntries ((:) . entryTarPath) [] undefined
80
+ -- decodeLongNames produces FilePath with POSIX path separators
81
+ $ Tar. decodeLongNames $ foldr Next Done entries
82
+
83
+ if [relFile] /= fileNames then pure ([relFile] === fileNames) else do
84
+
85
+ -- Try hard to clean up
86
+ removeFile absFile
87
+ writeFile absFile " <should be overwritten>"
88
+ case dirs of
89
+ [] -> pure ()
90
+ d : _ -> removeDirectoryRecursive (baseDir </> d)
91
+
92
+ -- Unpack back
93
+ Unpack. unpack baseDir (foldr Next Done entries :: Entries IOException )
94
+ exist <- doesFileExist absFile
95
+ if exist then do
96
+ cnt' <- readFile absFile >>= evaluate . force
97
+ pure $ cnt === cnt'
98
+ else do
99
+ -- Forcing the result, otherwise lazy IO misbehaves.
100
+ recFiles <- Pack. getDirectoryContentsRecursive baseDir >>= evaluate . force
101
+ pure $ counterexample (" File " ++ absFile ++ " does not exist; instead found\n " ++ unlines recFiles) False
58
102
59
103
| otherwise = discard
60
104
61
- mkFilePath :: ASCIIString -> FilePath
62
- mkFilePath (ASCIIString xs) = makeValid $
63
- filter (\ c -> not $ isPathSeparator c || c `elem` [' ' , ' .' , ' :' ]) xs
105
+ mkFilePath :: String -> FilePath
106
+ mkFilePath xs = makeValid $ filter isGood $
107
+ map (if supportsUnicode then id else chr . (`mod` 128 ) . ord) xs
108
+ where
109
+ isGood c
110
+ = not (isPathSeparator c)
111
+ && c `notElem` [' ' , ' \n ' , ' \r ' , ' .' , ' :' ]
112
+ && generalCategory c /= Surrogate
113
+ && (supportsUnicode || isAscii c)
64
114
65
115
trimUpToMaxPathLength :: FilePath -> [FilePath ] -> [FilePath ]
66
- trimUpToMaxPathLength baseDir = go (maxPathLength - length baseDir - 1 )
116
+ trimUpToMaxPathLength baseDir = go (maxPathLength - utf8Length baseDir - 1 )
67
117
where
68
118
go :: Int -> [FilePath ] -> [FilePath ]
69
119
go cnt [] = []
70
120
go cnt (x : xs)
71
- | cnt <= 0 = []
72
- | cnt <= length x = [take cnt x]
73
- | otherwise = x : go (cnt - length x - 1 ) xs
121
+ | cnt < 4 = []
122
+ | cnt <= utf8Length x = [take (cnt `quot` 4 ) x]
123
+ | otherwise = x : go (cnt - utf8Length x - 1 ) xs
124
+
125
+ utf8Length :: String -> Int
126
+ utf8Length = sum . map charLength
127
+ where
128
+ charLength c
129
+ | c < chr 0x80 = 1
130
+ | c < chr 0x800 = 2
131
+ | c < chr 0x10000 = 3
132
+ | otherwise = 4
74
133
75
134
maxPathLength :: Int
76
135
maxPathLength = case System.Info. os of
0 commit comments