Skip to content

Commit f3675c2

Browse files
committed
Implement Unicode support by utilizing PosixString and friends
Fixes #78
1 parent b684654 commit f3675c2

File tree

27 files changed

+694
-623
lines changed

27 files changed

+694
-623
lines changed

.github/workflows/haskell-ci.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,6 @@ jobs:
6363
compilerVersion: 8.8.4
6464
setup-method: hvr-ppa
6565
allow-failure: false
66-
- compiler: ghc-8.6.5
67-
compilerKind: ghc
68-
compilerVersion: 8.6.5
69-
setup-method: hvr-ppa
70-
allow-failure: false
71-
- compiler: ghc-8.4.4
72-
compilerKind: ghc
73-
compilerVersion: 8.4.4
74-
setup-method: hvr-ppa
75-
allow-failure: false
7666
fail-fast: false
7767
steps:
7868
- name: apt
@@ -211,7 +201,7 @@ jobs:
211201
echo " ghc-options: -Werror=missing-methods" >> cabal.project
212202
cat >> cabal.project <<EOF
213203
EOF
214-
$HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(bytestring|directory|htar|tar|unix)$/; }' >> cabal.project.local
204+
$HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(bytestring|directory|htar|tar|unix|filepath)$/; }' >> cabal.project.local
215205
cat cabal.project
216206
cat cabal.project.local
217207
- name: dump install plan

Codec/Archive/Tar.hs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ module Codec.Archive.Tar (
164164
FormatError(..),
165165
) where
166166

167-
import Codec.Archive.Tar.Check
168167
import Codec.Archive.Tar.Entry
169168
import Codec.Archive.Tar.Index (hSeekEndEntryOffset)
170169
import Codec.Archive.Tar.LongNames (decodeLongNames, encodeLongNames, DecodeLongNamesError(..))
@@ -174,12 +173,13 @@ import Codec.Archive.Tar.Types (unfoldEntries, foldlEntries, foldEntries, mapEnt
174173
import Codec.Archive.Tar.Unpack (unpack, unpackAndCheck)
175174
import Codec.Archive.Tar.Write (write)
176175

177-
import Control.Applicative ((<|>))
178-
import Control.Exception (Exception, throw, catch, SomeException(..))
179176
import qualified Data.ByteString.Lazy as BL
180-
import System.IO (withFile, IOMode(..))
177+
import System.IO (IOMode(..))
181178
import Prelude hiding (read)
182179

180+
import System.OsPath (OsPath)
181+
import qualified System.File.OsPath as OSP
182+
183183
-- | Create a new @\".tar\"@ file from a directory of files.
184184
--
185185
-- It is equivalent to calling the standard @tar@ program like so:
@@ -213,11 +213,11 @@ import Prelude hiding (read)
213213
--
214214
-- * @rwxr-xr-x@ for directories
215215
--
216-
create :: FilePath -- ^ Path of the \".tar\" file to write.
217-
-> FilePath -- ^ Base directory
218-
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
216+
create :: OsPath -- ^ Path of the \".tar\" file to write.
217+
-> OsPath -- ^ Base directory
218+
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
219219
-> IO ()
220-
create tar base paths = BL.writeFile tar . write =<< pack base paths
220+
create tar base paths = OSP.writeFile tar . write =<< pack base paths
221221

222222
-- | Extract all the files contained in a @\".tar\"@ file.
223223
--
@@ -249,22 +249,22 @@ create tar base paths = BL.writeFile tar . write =<< pack base paths
249249
-- containing entries that point outside of the tarball (either absolute paths
250250
-- or relative paths) will be caught and an exception will be thrown.
251251
--
252-
extract :: FilePath -- ^ Destination directory
253-
-> FilePath -- ^ Tarball
252+
extract :: OsPath -- ^ Destination directory
253+
-> OsPath -- ^ Tarball
254254
-> IO ()
255-
extract dir tar = unpack dir . read =<< BL.readFile tar
255+
extract dir tar = unpack dir . read =<< OSP.readFile tar
256256

257257
-- | Append new entries to a @\".tar\"@ file from a directory of files.
258258
--
259259
-- This is much like 'create', except that all the entries are added to the
260260
-- end of an existing tar file. Or if the file does not already exists then
261261
-- it behaves the same as 'create'.
262262
--
263-
append :: FilePath -- ^ Path of the \".tar\" file to write.
264-
-> FilePath -- ^ Base directory
265-
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
263+
append :: OsPath -- ^ Path of the \".tar\" file to write.
264+
-> OsPath -- ^ Base directory
265+
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
266266
-> IO ()
267267
append tar base paths =
268-
withFile tar ReadWriteMode $ \hnd -> do
268+
OSP.withFile tar ReadWriteMode $ \hnd -> do
269269
_ <- hSeekEndEntryOffset hnd Nothing
270270
BL.hPut hnd . write =<< pack base paths

Codec/Archive/Tar/Check/Internal.hs

Lines changed: 87 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
{-# LANGUAGE ViewPatterns #-}
44
{-# LANGUAGE RankNTypes #-}
55
{-# LANGUAGE ScopedTypeVariables #-}
6+
{-# LANGUAGE QuasiQuotes #-}
7+
{-# LANGUAGE MultiWayIf #-}
68
{-# OPTIONS_GHC -Wno-orphans #-}
79
{-# OPTIONS_HADDOCK hide #-}
810
-----------------------------------------------------------------------------
@@ -40,15 +42,19 @@ module Codec.Archive.Tar.Check.Internal (
4042
import Codec.Archive.Tar.LongNames
4143
import Codec.Archive.Tar.Types
4244
import Control.Applicative ((<|>))
43-
import qualified Data.ByteString.Lazy.Char8 as Char8
44-
import Data.Maybe (fromMaybe)
4545
import Data.Typeable (Typeable)
4646
import Control.Exception (Exception(..))
47-
import qualified System.FilePath as FilePath.Native
48-
( splitDirectories, isAbsolute, isValid, (</>), takeDirectory, hasDrive )
4947

50-
import qualified System.FilePath.Windows as FilePath.Windows
51-
import qualified System.FilePath.Posix as FilePath.Posix
48+
import System.OsPath (OsPath)
49+
import System.OsPath.Posix (PosixPath)
50+
import qualified System.OsPath as OSP
51+
import qualified System.OsPath.Posix as PFP
52+
import qualified System.OsPath.Windows as WFP
53+
54+
import System.OsString.Posix (pstr)
55+
import System.OsString (osstr)
56+
import qualified System.OsString.Posix as PS
57+
import qualified System.OsString.Windows as WS
5258

5359

5460
--------------------------
@@ -78,57 +84,79 @@ import qualified System.FilePath.Posix as FilePath.Posix
7884
-- such as exhaustion of file handlers.
7985
checkSecurity
8086
:: Entries e
81-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError)
87+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) FileNameError)
8288
checkSecurity = checkEntries checkEntrySecurity . decodeLongNames
8389

8490
-- | Worker of 'Codec.Archive.Tar.Check.checkSecurity'.
8591
--
8692
-- @since 0.6.0.0
87-
checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError
93+
checkEntrySecurity :: GenEntry PosixPath PosixPath -> Maybe FileNameError
8894
checkEntrySecurity e =
8995
check (entryTarPath e) <|>
9096
case entryContent e of
9197
HardLink link ->
9298
check link
9399
SymbolicLink link ->
94-
check (FilePath.Posix.takeDirectory (entryTarPath e) FilePath.Posix.</> link)
100+
check (PFP.takeDirectory (entryTarPath e) PFP.</> link)
95101
_ -> Nothing
96102
where
103+
checkPosix :: PosixPath -> Maybe FileNameError
97104
checkPosix name
98-
| FilePath.Posix.isAbsolute name
99-
= Just $ AbsoluteFileName name
100-
| not (FilePath.Posix.isValid name)
101-
= Just $ InvalidFileName name
102-
| not (isInsideBaseDir (FilePath.Posix.splitDirectories name))
103-
= Just $ UnsafeLinkTarget name
104-
| otherwise = Nothing
105-
106-
checkNative (fromFilePathToNative -> name)
107-
| FilePath.Native.isAbsolute name || FilePath.Native.hasDrive name
105+
| PFP.isAbsolute name
108106
= Just $ AbsoluteFileName name
109-
| not (FilePath.Native.isValid name)
107+
| not (PFP.isValid name)
110108
= Just $ InvalidFileName name
111-
| not (isInsideBaseDir (FilePath.Native.splitDirectories name))
109+
| not (isInsideBaseDir (PFP.splitDirectories name))
112110
= Just $ UnsafeLinkTarget name
113111
| otherwise = Nothing
114112

115-
check name = checkPosix name <|> checkNative (fromFilePathToNative name)
116-
117-
isInsideBaseDir :: [FilePath] -> Bool
113+
checkNative :: PosixPath -> Maybe FileNameError
114+
checkNative name'
115+
| OSP.isAbsolute name || OSP.hasDrive name
116+
= Just $ AbsoluteFileName name'
117+
| not (OSP.isValid name)
118+
= Just $ InvalidFileName name'
119+
| not (isInsideBaseDir' (OSP.splitDirectories name))
120+
= Just $ UnsafeLinkTarget name'
121+
| otherwise
122+
= Nothing
123+
where
124+
name = fromPosixPath name'
125+
126+
check name = checkPosix name <|> checkNative name
127+
128+
isInsideBaseDir :: [PosixPath] -> Bool
118129
isInsideBaseDir = go 0
119130
where
120-
go :: Word -> [FilePath] -> Bool
131+
go :: Word -> [PosixPath] -> Bool
132+
go !_ [] = True
133+
go 0 (x : _)
134+
| x == [pstr|..|] = False
135+
go lvl (x : xs)
136+
| x == [pstr|..|] = go (lvl - 1) xs
137+
go lvl (x : xs)
138+
| x == [pstr|.|] = go lvl xs
139+
go lvl (_ : xs) = go (lvl + 1) xs
140+
141+
isInsideBaseDir' :: [OsPath] -> Bool
142+
isInsideBaseDir' = go 0
143+
where
144+
go :: Word -> [OsPath] -> Bool
121145
go !_ [] = True
122-
go 0 (".." : _) = False
123-
go lvl (".." : xs) = go (lvl - 1) xs
124-
go lvl ("." : xs) = go lvl xs
146+
go 0 (x : _)
147+
| x == [osstr|..|] = False
148+
go lvl (x : xs)
149+
| x == [osstr|..|] = go (lvl - 1) xs
150+
go lvl (x : xs)
151+
| x == [osstr|.|] = go lvl xs
125152
go lvl (_ : xs) = go (lvl + 1) xs
126153

127154
-- | Errors arising from tar file names being in some way invalid or dangerous
128155
data FileNameError
129-
= InvalidFileName FilePath
130-
| AbsoluteFileName FilePath
131-
| UnsafeLinkTarget FilePath
156+
= InvalidFileName PosixPath
157+
| AbsoluteFileName PosixPath
158+
| UnsafeLinkTarget PosixPath
159+
| FileNameDecodingFailure PosixPath
132160
-- ^ @since 0.6.0.0
133161
deriving (Typeable)
134162

@@ -142,6 +170,7 @@ showFileNameError mb_plat err = case err of
142170
InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path
143171
AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
144172
UnsafeLinkTarget path -> "Unsafe" ++ plat ++ " link target in tar archive: " ++ show path
173+
FileNameDecodingFailure path -> "Decoding failure of path " ++ show path
145174
where plat = maybe "" (' ':) mb_plat
146175

147176

@@ -167,17 +196,17 @@ showFileNameError mb_plat err = case err of
167196
-- Not only it is faster, but also alleviates issues with lazy I/O
168197
-- such as exhaustion of file handlers.
169198
checkTarbomb
170-
:: FilePath
199+
:: PosixPath
171200
-> Entries e
172-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError)
201+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) TarBombError)
173202
checkTarbomb expectedTopDir
174203
= checkEntries (checkEntryTarbomb expectedTopDir)
175204
. decodeLongNames
176205

177206
-- | Worker of 'checkTarbomb'.
178207
--
179208
-- @since 0.6.0.0
180-
checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError
209+
checkEntryTarbomb :: PosixPath -> GenEntry PosixPath linkTarget -> Maybe TarBombError
181210
checkEntryTarbomb expectedTopDir entry = do
182211
case entryContent entry of
183212
-- Global extended header aka XGLTYPE aka pax_global_header
@@ -186,18 +215,18 @@ checkEntryTarbomb expectedTopDir entry = do
186215
-- Extended header referring to the next file in the archive aka XHDTYPE
187216
OtherEntryType 'x' _ _ -> Nothing
188217
_ ->
189-
case FilePath.Posix.splitDirectories (entryTarPath entry) of
218+
case PFP.splitDirectories (entryTarPath entry) of
190219
(topDir:_) | topDir == expectedTopDir -> Nothing
191220
_ -> Just $ TarBombError expectedTopDir (entryTarPath entry)
192221

193222
-- | An error that occurs if a tar file is a \"tar bomb\" that would extract
194223
-- files outside of the intended directory.
195224
data TarBombError
196225
= TarBombError
197-
FilePath -- ^ Path inside archive.
226+
PosixPath -- ^ Path inside archive.
198227
--
199228
-- @since 0.6.0.0
200-
FilePath -- ^ Expected top directory.
229+
PosixPath -- ^ Expected top directory.
201230
deriving (Typeable)
202231

203232
instance Exception TarBombError
@@ -236,43 +265,44 @@ instance Show TarBombError where
236265
-- such as exhaustion of file handlers.
237266
checkPortability
238267
:: Entries e
239-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError)
268+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) PortabilityError)
240269
checkPortability = checkEntries checkEntryPortability . decodeLongNames
241270

242271
-- | Worker of 'checkPortability'.
243272
--
244273
-- @since 0.6.0.0
245-
checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError
274+
checkEntryPortability :: GenEntry PosixPath linkTarget -> Maybe PortabilityError
246275
checkEntryPortability entry
247276
| entryFormat entry `elem` [V7Format, GnuFormat]
248277
= Just $ NonPortableFormat (entryFormat entry)
249278

250279
| not (portableFileType (entryContent entry))
251280
= Just NonPortableFileType
252281

253-
| not (all portableChar posixPath)
282+
| not (PS.all portableChar posixPath)
254283
= Just $ NonPortableEntryNameChar posixPath
255284

256-
| not (FilePath.Posix.isValid posixPath)
285+
| not (PFP.isValid posixPath)
257286
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
258-
| not (FilePath.Windows.isValid windowsPath)
259-
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
287+
| not (WFP.isValid windowsPath)
288+
= Just $ NonPortableFileName "windows" (InvalidFileName posixPath)
260289

261-
| FilePath.Posix.isAbsolute posixPath
290+
| PFP.isAbsolute posixPath
262291
= Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
263-
| FilePath.Windows.isAbsolute windowsPath
264-
= Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
292+
| WFP.isAbsolute windowsPath
293+
= Just $ NonPortableFileName "windows" (AbsoluteFileName posixPath)
265294

266-
| any (=="..") (FilePath.Posix.splitDirectories posixPath)
295+
| any (== [PS.pstr|..|]) (PFP.splitDirectories posixPath)
267296
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
268-
| any (=="..") (FilePath.Windows.splitDirectories windowsPath)
269-
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
297+
| any (== [WS.pstr|..|]) (WFP.splitDirectories windowsPath)
298+
= Just $ NonPortableFileName "windows" (InvalidFileName posixPath)
270299

271-
| otherwise = Nothing
300+
| otherwise
301+
= Nothing
272302

273303
where
274-
posixPath = entryTarPath entry
275-
windowsPath = fromFilePathToWindowsPath posixPath
304+
posixPath = entryTarPath entry
305+
windowsPath = toWindowsPath posixPath
276306

277307
portableFileType ftype = case ftype of
278308
NormalFile {} -> True
@@ -281,14 +311,15 @@ checkEntryPortability entry
281311
Directory -> True
282312
_ -> False
283313

284-
portableChar c = c <= '\127'
314+
portableChar c = PS.toChar c <= '\127'
285315

286316
-- | Portability problems in a tar archive
287317
data PortabilityError
288318
= NonPortableFormat Format
289319
| NonPortableFileType
290-
| NonPortableEntryNameChar FilePath
320+
| NonPortableEntryNameChar PosixPath
291321
| NonPortableFileName PortabilityPlatform FileNameError
322+
| NonPortableDecodingFailure PosixPath
292323
deriving (Typeable)
293324

294325
-- | The name of a platform that portability issues arise from
@@ -306,6 +337,8 @@ instance Show PortabilityError where
306337
= "Non-portable character in archive entry name: " ++ show posixPath
307338
show (NonPortableFileName platform err)
308339
= showFileNameError (Just platform) err
340+
show (NonPortableDecodingFailure posixPath)
341+
= "Decoding failure of path " ++ show posixPath
309342

310343
--------------------------
311344
-- Utils

0 commit comments

Comments
 (0)