Skip to content

Commit aa683b0

Browse files
committed
Test Unicode paths
1 parent 3c5b11e commit aa683b0

File tree

2 files changed

+86
-26
lines changed

2 files changed

+86
-26
lines changed

test/Codec/Archive/Tar/Pack/Tests.hs

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,39 @@ module Codec.Archive.Tar.Pack.Tests
1313
import Control.DeepSeq
1414
import qualified Data.ByteString as B
1515
import qualified Data.ByteString.Lazy as BL
16+
import Data.Char
1617
import Data.FileEmbed
1718
import qualified Codec.Archive.Tar as Tar
1819
import qualified Codec.Archive.Tar.Pack as Pack
19-
import Codec.Archive.Tar.Types (GenEntries(..), Entries, simpleEntry, toTarPath)
20+
import Codec.Archive.Tar.Types (GenEntries(..), Entries, simpleEntry, toTarPath, GenEntry (entryTarPath))
2021
import qualified Codec.Archive.Tar.Unpack as Unpack
2122
import qualified Codec.Archive.Tar.Write as Write
2223
import Control.Exception
2324
import Data.List.NonEmpty (NonEmpty(..))
25+
import GHC.IO.Encoding
2426
import System.Directory
2527
import System.FilePath
28+
import qualified System.FilePath.Posix as Posix
2629
import qualified System.Info
2730
import System.IO.Temp
31+
import System.IO.Unsafe
2832
import Test.Tasty.QuickCheck
2933

34+
supportsUnicode :: Bool
35+
supportsUnicode = unsafePerformIO $ do
36+
-- Normally getFileSystemEncoding returns a Unicode encoding,
37+
-- but if it is ASCII, we should not generate Unicode filenames.
38+
enc <- getFileSystemEncoding
39+
pure $ case textEncodingName enc of
40+
"ASCII" -> False
41+
"ANSI_X3.4-1968" -> False
42+
_ -> True
43+
{-# NOINLINE supportsUnicode #-}
44+
3045
-- | Write a single file, deeply buried within nested folders;
3146
-- pack and unpack; read back and compare results.
32-
prop_roundtrip :: [ASCIIString] -> ASCIIString -> Property
33-
prop_roundtrip xss (ASCIIString cnt)
47+
prop_roundtrip :: [String] -> String -> Property
48+
prop_roundtrip xss cnt
3449
| x : xs <- filter (not . null) $ map mkFilePath xss
3550
= ioProperty $ withSystemTempDirectory "tar-test" $ \baseDir -> do
3651
file : dirs <- pure $ trimUpToMaxPathLength baseDir (x : xs)
@@ -39,38 +54,82 @@ prop_roundtrip xss (ASCIIString cnt)
3954
absDir = baseDir </> relDir
4055
relFile = relDir </> file
4156
absFile = absDir </> file
42-
createDirectoryIfMissing True absDir
43-
writeFile absFile cnt
44-
-- Forcing the result, otherwise lazy IO misbehaves.
45-
!entries <- Pack.pack baseDir [relFile] >>= evaluate . force
46-
47-
-- Try hard to clean up
48-
removeFile absFile
49-
writeFile absFile "<should be overwritten>"
50-
case dirs of
51-
[] -> pure ()
52-
d : _ -> removeDirectoryRecursive (baseDir </> d)
53-
54-
-- Unpack back
55-
Unpack.unpack baseDir (foldr Next Done entries :: Entries IOException)
56-
cnt' <- readFile absFile
57-
pure $ cnt === cnt'
57+
errMsg = "relDir = " ++ relDir ++
58+
"\nabsDir = " ++ absDir ++
59+
"\nrelFile = " ++ relFile ++
60+
"\nabsFile = " ++ absFile
61+
62+
-- Not all filesystems allow paths to contain arbitrary Unicode.
63+
-- E. g., at the moment of writing Apple FS does not support characters
64+
-- introduced in Unicode 15.0.
65+
canCreateDirectory <- try (createDirectoryIfMissing True absDir)
66+
case canCreateDirectory of
67+
Left (e :: IOException) -> discard
68+
Right () -> do
69+
canWriteFile <- try (writeFile absFile cnt)
70+
case canWriteFile of
71+
Left (e :: IOException) -> discard
72+
Right () -> counterexample errMsg <$> do
73+
74+
-- Forcing the result, otherwise lazy IO misbehaves.
75+
!entries <- Pack.pack baseDir [relFile] >>= evaluate . force
76+
77+
let fileNames
78+
= map (map (\c -> if c == Posix.pathSeparator then pathSeparator else c))
79+
$ Tar.foldEntries ((:) . entryTarPath) [] undefined
80+
-- decodeLongNames produces FilePath with POSIX path separators
81+
$ Tar.decodeLongNames $ foldr Next Done entries
82+
83+
if [relFile] /= fileNames then pure ([relFile] === fileNames) else do
84+
85+
-- Try hard to clean up
86+
removeFile absFile
87+
writeFile absFile "<should be overwritten>"
88+
case dirs of
89+
[] -> pure ()
90+
d : _ -> removeDirectoryRecursive (baseDir </> d)
91+
92+
-- Unpack back
93+
Unpack.unpack baseDir (foldr Next Done entries :: Entries IOException)
94+
exist <- doesFileExist absFile
95+
if exist then do
96+
cnt' <- readFile absFile >>= evaluate . force
97+
pure $ cnt === cnt'
98+
else do
99+
-- Forcing the result, otherwise lazy IO misbehaves.
100+
recFiles <- Pack.getDirectoryContentsRecursive baseDir >>= evaluate . force
101+
pure $ counterexample ("File " ++ absFile ++ " does not exist; instead found\n" ++ unlines recFiles) False
58102

59103
| otherwise = discard
60104

61-
mkFilePath :: ASCIIString -> FilePath
62-
mkFilePath (ASCIIString xs) = makeValid $
63-
filter (\c -> not $ isPathSeparator c || c `elem` [' ', '.', ':']) xs
105+
mkFilePath :: String -> FilePath
106+
mkFilePath xs = makeValid $ filter isGood $
107+
map (if supportsUnicode then id else chr . (`mod` 128) . ord) xs
108+
where
109+
isGood c
110+
= not (isPathSeparator c)
111+
&& c `notElem` [' ', '\n', '\r', '.', ':']
112+
&& generalCategory c /= Surrogate
113+
&& (supportsUnicode || isAscii c)
64114

65115
trimUpToMaxPathLength :: FilePath -> [FilePath] -> [FilePath]
66-
trimUpToMaxPathLength baseDir = go (maxPathLength - length baseDir - 1)
116+
trimUpToMaxPathLength baseDir = go (maxPathLength - utf8Length baseDir - 1)
67117
where
68118
go :: Int -> [FilePath] -> [FilePath]
69119
go cnt [] = []
70120
go cnt (x : xs)
71-
| cnt <= 0 = []
72-
| cnt <= length x = [take cnt x]
73-
| otherwise = x : go (cnt - length x - 1) xs
121+
| cnt < 4 = []
122+
| cnt <= utf8Length x = [take (cnt `quot` 4) x]
123+
| otherwise = x : go (cnt - utf8Length x - 1) xs
124+
125+
utf8Length :: String -> Int
126+
utf8Length = sum . map charLength
127+
where
128+
charLength c
129+
| c < chr 0x80 = 1
130+
| c < chr 0x800 = 2
131+
| c < chr 0x10000 = 3
132+
| otherwise = 4
74133

75134
maxPathLength :: Int
76135
maxPathLength = case System.Info.os of

test/Properties.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ main =
6464
]
6565

6666
, testGroup "pack" [
67+
adjustOption (\(QuickCheckMaxRatio n) -> QuickCheckMaxRatio (max n 100)) $
6768
testProperty "roundtrip" Pack.prop_roundtrip,
6869
testProperty "symlink" Pack.unit_roundtrip_symlink,
6970
testProperty "long filepath" Pack.unit_roundtrip_long_filepath,

0 commit comments

Comments
 (0)