Skip to content

Commit f366d67

Browse files
committed
Implement Unicode support by utilizing PosixString and friends
Fixes #78
1 parent b684654 commit f366d67

File tree

20 files changed

+686
-487
lines changed

20 files changed

+686
-487
lines changed

Codec/Archive/Tar.hs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,12 @@ import Codec.Archive.Tar.Write (write)
177177
import Control.Applicative ((<|>))
178178
import Control.Exception (Exception, throw, catch, SomeException(..))
179179
import qualified Data.ByteString.Lazy as BL
180-
import System.IO (withFile, IOMode(..))
180+
import System.IO (IOMode(..))
181181
import Prelude hiding (read)
182182

183+
import System.OsPath (OsPath)
184+
import qualified System.File.OsPath as OSP
185+
183186
-- | Create a new @\".tar\"@ file from a directory of files.
184187
--
185188
-- It is equivalent to calling the standard @tar@ program like so:
@@ -213,11 +216,11 @@ import Prelude hiding (read)
213216
--
214217
-- * @rwxr-xr-x@ for directories
215218
--
216-
create :: FilePath -- ^ Path of the \".tar\" file to write.
217-
-> FilePath -- ^ Base directory
218-
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
219+
create :: OsPath -- ^ Path of the \".tar\" file to write.
220+
-> OsPath -- ^ Base directory
221+
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
219222
-> IO ()
220-
create tar base paths = BL.writeFile tar . write =<< pack base paths
223+
create tar base paths = OSP.writeFile tar . write =<< pack base paths
221224

222225
-- | Extract all the files contained in a @\".tar\"@ file.
223226
--
@@ -249,22 +252,22 @@ create tar base paths = BL.writeFile tar . write =<< pack base paths
249252
-- containing entries that point outside of the tarball (either absolute paths
250253
-- or relative paths) will be caught and an exception will be thrown.
251254
--
252-
extract :: FilePath -- ^ Destination directory
253-
-> FilePath -- ^ Tarball
255+
extract :: OsPath -- ^ Destination directory
256+
-> OsPath -- ^ Tarball
254257
-> IO ()
255-
extract dir tar = unpack dir . read =<< BL.readFile tar
258+
extract dir tar = unpack dir . read =<< OSP.readFile tar
256259

257260
-- | Append new entries to a @\".tar\"@ file from a directory of files.
258261
--
259262
-- This is much like 'create', except that all the entries are added to the
260263
-- end of an existing tar file. Or if the file does not already exists then
261264
-- it behaves the same as 'create'.
262265
--
263-
append :: FilePath -- ^ Path of the \".tar\" file to write.
264-
-> FilePath -- ^ Base directory
265-
-> [FilePath] -- ^ Files and directories to archive, relative to base dir
266+
append :: OsPath -- ^ Path of the \".tar\" file to write.
267+
-> OsPath -- ^ Base directory
268+
-> [OsPath] -- ^ Files and directories to archive, relative to base dir
266269
-> IO ()
267270
append tar base paths =
268-
withFile tar ReadWriteMode $ \hnd -> do
271+
OSP.withFile tar ReadWriteMode $ \hnd -> do
269272
_ <- hSeekEndEntryOffset hnd Nothing
270273
BL.hPut hnd . write =<< pack base paths

Codec/Archive/Tar/Check/Internal.hs

Lines changed: 97 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
{-# LANGUAGE ViewPatterns #-}
44
{-# LANGUAGE RankNTypes #-}
55
{-# LANGUAGE ScopedTypeVariables #-}
6+
{-# LANGUAGE QuasiQuotes #-}
7+
{-# LANGUAGE MultiWayIf #-}
68
{-# OPTIONS_GHC -Wno-orphans #-}
79
{-# OPTIONS_HADDOCK hide #-}
810
-----------------------------------------------------------------------------
@@ -50,6 +52,17 @@ import qualified System.FilePath as FilePath.Native
5052
import qualified System.FilePath.Windows as FilePath.Windows
5153
import qualified System.FilePath.Posix as FilePath.Posix
5254

55+
import System.OsPath (OsPath)
56+
import System.OsPath.Posix (PosixPath)
57+
import qualified System.OsPath as OSP
58+
import qualified System.OsPath.Posix as PFP
59+
import qualified System.OsPath.Windows as WFP
60+
61+
import System.OsString.Posix (pstr)
62+
import System.OsString (osstr)
63+
import qualified System.OsString.Posix as PS
64+
import qualified System.OsString.Windows as WS
65+
5366

5467
--------------------------
5568
-- Security
@@ -78,57 +91,79 @@ import qualified System.FilePath.Posix as FilePath.Posix
7891
-- such as exhaustion of file handlers.
7992
checkSecurity
8093
:: Entries e
81-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError)
94+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) FileNameError)
8295
checkSecurity = checkEntries checkEntrySecurity . decodeLongNames
8396

8497
-- | Worker of 'Codec.Archive.Tar.Check.checkSecurity'.
8598
--
8699
-- @since 0.6.0.0
87-
checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError
100+
checkEntrySecurity :: GenEntry PosixPath PosixPath -> Maybe FileNameError
88101
checkEntrySecurity e =
89102
check (entryTarPath e) <|>
90103
case entryContent e of
91104
HardLink link ->
92105
check link
93106
SymbolicLink link ->
94-
check (FilePath.Posix.takeDirectory (entryTarPath e) FilePath.Posix.</> link)
107+
check (PFP.takeDirectory (entryTarPath e) PFP.</> link)
95108
_ -> Nothing
96109
where
110+
checkPosix :: PosixPath -> Maybe FileNameError
97111
checkPosix name
98-
| FilePath.Posix.isAbsolute name
99-
= Just $ AbsoluteFileName name
100-
| not (FilePath.Posix.isValid name)
101-
= Just $ InvalidFileName name
102-
| not (isInsideBaseDir (FilePath.Posix.splitDirectories name))
103-
= Just $ UnsafeLinkTarget name
104-
| otherwise = Nothing
105-
106-
checkNative (fromFilePathToNative -> name)
107-
| FilePath.Native.isAbsolute name || FilePath.Native.hasDrive name
112+
| PFP.isAbsolute name
108113
= Just $ AbsoluteFileName name
109-
| not (FilePath.Native.isValid name)
114+
| not (PFP.isValid name)
110115
= Just $ InvalidFileName name
111-
| not (isInsideBaseDir (FilePath.Native.splitDirectories name))
116+
| not (isInsideBaseDir (PFP.splitDirectories name))
112117
= Just $ UnsafeLinkTarget name
113118
| otherwise = Nothing
114119

115-
check name = checkPosix name <|> checkNative (fromFilePathToNative name)
116-
117-
isInsideBaseDir :: [FilePath] -> Bool
120+
checkNative :: PosixPath -> Maybe FileNameError
121+
checkNative name'
122+
| (Just name) <- fromPosixPath name' =
123+
if | OSP.isAbsolute name || OSP.hasDrive name
124+
-> Just $ AbsoluteFileName name'
125+
| not (OSP.isValid name)
126+
-> Just $ InvalidFileName name'
127+
| not (isInsideBaseDir' (OSP.splitDirectories name))
128+
-> Just $ UnsafeLinkTarget name'
129+
| otherwise
130+
-> Nothing
131+
| otherwise = Just $ FileNameDecodingFailure name'
132+
133+
check name = checkPosix name <|> checkNative name
134+
135+
isInsideBaseDir :: [PosixPath] -> Bool
118136
isInsideBaseDir = go 0
119137
where
120-
go :: Word -> [FilePath] -> Bool
138+
go :: Word -> [PosixPath] -> Bool
139+
go !_ [] = True
140+
go 0 (x : _)
141+
| x == [pstr|..|] = False
142+
go lvl (x : xs)
143+
| x == [pstr|..|] = go (lvl - 1) xs
144+
go lvl (x : xs)
145+
| x == [pstr|.|] = go lvl xs
146+
go lvl (_ : xs) = go (lvl + 1) xs
147+
148+
isInsideBaseDir' :: [OsPath] -> Bool
149+
isInsideBaseDir' = go 0
150+
where
151+
go :: Word -> [OsPath] -> Bool
121152
go !_ [] = True
122-
go 0 (".." : _) = False
123-
go lvl (".." : xs) = go (lvl - 1) xs
124-
go lvl ("." : xs) = go lvl xs
153+
go 0 (x : _)
154+
| x == [osstr|..|] = False
155+
go lvl (x : xs)
156+
| x == [osstr|..|] = go (lvl - 1) xs
157+
go lvl (x : xs)
158+
| x == [osstr|.|] = go lvl xs
125159
go lvl (_ : xs) = go (lvl + 1) xs
126160

127161
-- | Errors arising from tar file names being in some way invalid or dangerous
128162
data FileNameError
129-
= InvalidFileName FilePath
130-
| AbsoluteFileName FilePath
131-
| UnsafeLinkTarget FilePath
163+
= InvalidFileName PosixPath
164+
| AbsoluteFileName PosixPath
165+
| UnsafeLinkTarget PosixPath
166+
| FileNameDecodingFailure PosixPath
132167
-- ^ @since 0.6.0.0
133168
deriving (Typeable)
134169

@@ -142,6 +177,7 @@ showFileNameError mb_plat err = case err of
142177
InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path
143178
AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
144179
UnsafeLinkTarget path -> "Unsafe" ++ plat ++ " link target in tar archive: " ++ show path
180+
FileNameDecodingFailure path -> "Decoding failure of path " ++ show path
145181
where plat = maybe "" (' ':) mb_plat
146182

147183

@@ -167,17 +203,17 @@ showFileNameError mb_plat err = case err of
167203
-- Not only it is faster, but also alleviates issues with lazy I/O
168204
-- such as exhaustion of file handlers.
169205
checkTarbomb
170-
:: FilePath
206+
:: PosixPath
171207
-> Entries e
172-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError)
208+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) TarBombError)
173209
checkTarbomb expectedTopDir
174210
= checkEntries (checkEntryTarbomb expectedTopDir)
175211
. decodeLongNames
176212

177213
-- | Worker of 'checkTarbomb'.
178214
--
179215
-- @since 0.6.0.0
180-
checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError
216+
checkEntryTarbomb :: PosixPath -> GenEntry PosixPath linkTarget -> Maybe TarBombError
181217
checkEntryTarbomb expectedTopDir entry = do
182218
case entryContent entry of
183219
-- Global extended header aka XGLTYPE aka pax_global_header
@@ -186,18 +222,18 @@ checkEntryTarbomb expectedTopDir entry = do
186222
-- Extended header referring to the next file in the archive aka XHDTYPE
187223
OtherEntryType 'x' _ _ -> Nothing
188224
_ ->
189-
case FilePath.Posix.splitDirectories (entryTarPath entry) of
225+
case PFP.splitDirectories (entryTarPath entry) of
190226
(topDir:_) | topDir == expectedTopDir -> Nothing
191227
_ -> Just $ TarBombError expectedTopDir (entryTarPath entry)
192228

193229
-- | An error that occurs if a tar file is a \"tar bomb\" that would extract
194230
-- files outside of the intended directory.
195231
data TarBombError
196232
= TarBombError
197-
FilePath -- ^ Path inside archive.
233+
PosixPath -- ^ Path inside archive.
198234
--
199235
-- @since 0.6.0.0
200-
FilePath -- ^ Expected top directory.
236+
PosixPath -- ^ Expected top directory.
201237
deriving (Typeable)
202238

203239
instance Exception TarBombError
@@ -236,43 +272,45 @@ instance Show TarBombError where
236272
-- such as exhaustion of file handlers.
237273
checkPortability
238274
:: Entries e
239-
-> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError)
275+
-> GenEntries PosixPath PosixPath (Either (Either e DecodeLongNamesError) PortabilityError)
240276
checkPortability = checkEntries checkEntryPortability . decodeLongNames
241277

242278
-- | Worker of 'checkPortability'.
243279
--
244280
-- @since 0.6.0.0
245-
checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError
281+
checkEntryPortability :: GenEntry PosixPath linkTarget -> Maybe PortabilityError
246282
checkEntryPortability entry
247-
| entryFormat entry `elem` [V7Format, GnuFormat]
248-
= Just $ NonPortableFormat (entryFormat entry)
283+
| (Just windowsPath) <- toWindowsPath posixPath =
284+
if | entryFormat entry `elem` [V7Format, GnuFormat]
285+
-> Just $ NonPortableFormat (entryFormat entry)
249286

250-
| not (portableFileType (entryContent entry))
251-
= Just NonPortableFileType
287+
| not (portableFileType (entryContent entry))
288+
-> Just NonPortableFileType
252289

253-
| not (all portableChar posixPath)
254-
= Just $ NonPortableEntryNameChar posixPath
290+
| not (PS.all portableChar posixPath)
291+
-> Just $ NonPortableEntryNameChar posixPath
255292

256-
| not (FilePath.Posix.isValid posixPath)
257-
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
258-
| not (FilePath.Windows.isValid windowsPath)
259-
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
293+
| not (PFP.isValid posixPath)
294+
-> Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
295+
| not (WFP.isValid windowsPath)
296+
-> Just $ NonPortableFileName "windows" (InvalidFileName posixPath)
260297

261-
| FilePath.Posix.isAbsolute posixPath
262-
= Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
263-
| FilePath.Windows.isAbsolute windowsPath
264-
= Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
298+
| PFP.isAbsolute posixPath
299+
-> Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath)
300+
| WFP.isAbsolute windowsPath
301+
-> Just $ NonPortableFileName "windows" (AbsoluteFileName posixPath)
265302

266-
| any (=="..") (FilePath.Posix.splitDirectories posixPath)
267-
= Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
268-
| any (=="..") (FilePath.Windows.splitDirectories windowsPath)
269-
= Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
303+
| any (== [PS.pstr|..|]) (PFP.splitDirectories posixPath)
304+
-> Just $ NonPortableFileName "unix" (InvalidFileName posixPath)
305+
| any (== [WS.pstr|..|]) (WFP.splitDirectories windowsPath)
306+
-> Just $ NonPortableFileName "windows" (InvalidFileName posixPath)
270307

271-
| otherwise = Nothing
308+
| otherwise
309+
-> Nothing
310+
| otherwise = Just $ NonPortableDecodingFailure posixPath
272311

273312
where
274-
posixPath = entryTarPath entry
275-
windowsPath = fromFilePathToWindowsPath posixPath
313+
posixPath = entryTarPath entry
276314

277315
portableFileType ftype = case ftype of
278316
NormalFile {} -> True
@@ -281,14 +319,15 @@ checkEntryPortability entry
281319
Directory -> True
282320
_ -> False
283321

284-
portableChar c = c <= '\127'
322+
portableChar c = PS.toChar c <= '\127'
285323

286324
-- | Portability problems in a tar archive
287325
data PortabilityError
288326
= NonPortableFormat Format
289327
| NonPortableFileType
290-
| NonPortableEntryNameChar FilePath
328+
| NonPortableEntryNameChar PosixPath
291329
| NonPortableFileName PortabilityPlatform FileNameError
330+
| NonPortableDecodingFailure PosixPath
292331
deriving (Typeable)
293332

294333
-- | The name of a platform that portability issues arise from
@@ -306,6 +345,8 @@ instance Show PortabilityError where
306345
= "Non-portable character in archive entry name: " ++ show posixPath
307346
show (NonPortableFileName platform err)
308347
= showFileNameError (Just platform) err
348+
show (NonPortableDecodingFailure posixPath)
349+
= "Decoding failure of path " ++ show posixPath
309350

310351
--------------------------
311352
-- Utils

0 commit comments

Comments
 (0)