@@ -12,6 +12,8 @@ module Streamly.Internal.FileSystem.Path.Common
1212
1313 -- * Construction
1414 , isValid
15+ , validatePath
16+ , validateFile
1517 , fromChunk
1618 , unsafeFromChunk
1719 , fromChars
@@ -32,7 +34,6 @@ module Streamly.Internal.FileSystem.Path.Common
3234 , hasTrailingSeparator
3335 , isBranch
3436 , isRooted
35- , maybeFile
3637 , isAbsolute
3738 , isRootRelative
3839 , isRelativeWithDrive
8485import Control.Monad (when )
8586import Control.Monad.Catch (MonadThrow (.. ))
8687import Control.Monad.IO.Class (MonadIO (.. ))
87- import Data.Char (ord , isAlpha , toUpper )
88+ import Data.Char (chr , ord , isAlpha , toUpper )
8889import Data.Function ((&) )
8990import Data.Functor.Identity (Identity (.. ))
9091#ifdef DEBUG
@@ -189,8 +190,8 @@ primarySeparator Windows = windowsSeparator
189190-- @/@ or @\\@.
190191{-# INLINE isSeparator #-}
191192isSeparator :: OS -> Char -> Bool
192- isSeparator Windows c = (c == windowsSeparator) || (c == posixSeparator)
193193isSeparator Posix c = c == posixSeparator
194+ isSeparator Windows c = (c == windowsSeparator) || (c == posixSeparator)
194195
195196{-# INLINE isSeparatorWord #-}
196197isSeparatorWord :: Integral a => OS -> a -> Bool
@@ -625,6 +626,9 @@ unsafeSplitUNC arr =
625626--
626627{-# INLINE splitRoot #-}
627628splitRoot :: (Unbox a , Integral a ) => OS -> Array a -> (Array a , Array a )
629+ -- NOTE: validatePath depends on splitRoot splitting the path without removing
630+ -- any redundant chars etc. It should just split and do nothing else.
631+ -- XXX We can put an assert here "arrLen == rootLen + stemLen".
628632splitRoot Posix arr
629633 | isRooted Posix arr
630634 = unsafeSplitTopLevel Posix arr
@@ -898,8 +902,8 @@ splitTail _os _arr = undefined
898902
899903-- | Returns () if the path can be a valid file, otherwise throws an
900904-- exception.
901- maybeFile :: (MonadThrow m , Unbox a , Integral a ) => OS -> Array a -> m ()
902- maybeFile os arr = do
905+ validateFile :: (MonadThrow m , Unbox a , Integral a ) => OS -> Array a -> m ()
906+ validateFile os arr = do
903907 s1 <-
904908 Stream. toList
905909 $ Stream. take 3
@@ -1332,7 +1336,7 @@ splitAllExtensions = splitAllExtensionsBy False extensionWord
13321336-- Construction
13331337------------------------------------------------------------------------------
13341338
1335- -- | Only for Windows.
1339+ {-# INLINE isInvalidPathChar #-}
13361340isInvalidPathChar :: Integral a => OS -> a -> Bool
13371341isInvalidPathChar Posix x = x == 0
13381342isInvalidPathChar Windows x =
@@ -1347,6 +1351,11 @@ isInvalidPathChar Windows x =
13471351 124 -> True -- '|'
13481352 _ -> x <= charToWord ' \US '
13491353
1354+ countLeadingValid :: (Unbox a , Integral a ) => OS -> Array a -> Int
1355+ countLeadingValid os path =
1356+ let f = Fold. takeEndBy_ (isInvalidPathChar os) Fold. length
1357+ in foldArr f path
1358+
13501359-- XXX Supply it an array for checking and use a more efficient prefix matching
13511360-- check.
13521361
@@ -1358,12 +1367,111 @@ isInvalidPathComponent = fmap (fmap charToWord)
13581367 , " LPT1" ," LPT2" ," LPT3" ," LPT4" ," LPT5" ," LPT6" ," LPT7" ," LPT8" ," LPT9"
13591368 ]
13601369
1361- -- Note: "//share/x" works in powershell.
1370+ validatePath :: (MonadThrow m , Integral a , Unbox a ) => OS -> Array a -> m ()
1371+ validatePath Posix path =
1372+ let pathLen = Array. length path
1373+ validLen = countLeadingValid Posix path
1374+ in if pathLen == 0
1375+ then throwM $ InvalidPath " Empty path"
1376+ else if pathLen /= validLen
1377+ then throwM $ InvalidPath
1378+ $ " Null char found after " ++ show validLen ++ " characters."
1379+ else pure ()
1380+ validatePath Windows path
1381+ | Array. null path = throwM $ InvalidPath " Empty path"
1382+ | otherwise = do
1383+ -- XXX give position of the first invalid char.
1384+ if hasDrive path && postDriveSep > 1 -- "C://"
1385+ then throwM $ InvalidPath
1386+ $ " More than one separators between drive root and the path"
1387+ else if isAbsoluteUNC path
1388+ then
1389+ if postDriveSep > 1 -- "///x"
1390+ then throwM $ InvalidPath
1391+ $ " Path starts with more than two separators"
1392+ -- XXX covered by the previous check
1393+ -- else if Array.length path == postDriveSep + 2
1394+ -- then throwM $ InvalidPath $ "Only separators in share root"
1395+ else if invalidRootComponent -- "//prn/x"
1396+ then throwM $ InvalidPath
1397+ -- XXX print the invalid component name
1398+ $ " Special filename component in share root"
1399+ else if rootEndSeps /= 1 -- "//share//x"
1400+ then throwM $ InvalidPath
1401+ $ " Share name is needed and exactly one separator is needed "
1402+ ++ " between share root and the path"
1403+ else if Array. null stem -- "//share/"
1404+ then throwM $ InvalidPath
1405+ $ " the share root must be followed by a non-empty path"
1406+ else pure ()
1407+ else pure ()
1408+
1409+ if stemLen /= validStemLen -- "x/x>y"
1410+ then throwM $ InvalidPath
1411+ $ " Disallowed char found after "
1412+ ++ show (rootLen + validStemLen)
1413+ ++ " characters. The invalid char is: "
1414+ ++ show (chr (fromIntegral invalidVal))
1415+ ++ " [" ++ show invalidVal ++ " ]"
1416+ else if invalidComponent -- "x/prn/y"
1417+ -- XXX print the invalid component name
1418+ then throwM $ InvalidPath $ " Disallowed Windows filename in path"
1419+ else pure ()
1420+
1421+ where
1422+
1423+ postDrive = snd $ Array. unsafeSplitAt 2 path
1424+ postDriveSep = countLeadingBy (isSeparatorWord Windows ) postDrive
1425+
1426+ -- XXX check invalid chars in the path root as well - except . and '?'?
1427+ (root, stem) = splitRoot Windows path
1428+ rootLen = Array. length root
1429+ stemLen = Array. length stem
1430+ validStemLen = countLeadingValid Windows stem
1431+ invalidVal = fromIntegral (Array. unsafeGetIndex validStemLen stem) :: Word16
1432+
1433+ rootEndSeps = countTrailingBy (isSeparatorWord Windows ) root
1434+
1435+ -- TBD: We are not currently validating the sharenames against disallowed
1436+ -- file names. Apparently windows does not allow even sharenames with those
1437+ -- names. To match against sharenames we will have to strip the separators
1438+ -- and drive etc from the root. Or we can use the parsing routines
1439+ -- themselves to validate.
1440+ toUp w16 =
1441+ if w16 < 256
1442+ then charToWord $ toUpper (wordToChar w16)
1443+ else w16
1444+
1445+ -- Should we strip all space chars as in Data.Char.isSpace?
1446+ isSpace x = x == charToWord ' '
1447+
1448+ -- XXX instead of using a list based check, pass the array to the checker.
1449+ -- We do not need to upcase the array, it can be done in the checker. Thus
1450+ -- we do not need to create a new array, the original slice can be checked.
1451+ getBaseName x =
1452+ runIdentity
1453+ $ Stream. toList
1454+ $ fmap toUp
1455+ $ Array. read
1456+ $ Array. strip isSpace
1457+ $ fst $ Array. breakEndBy_ (== extensionWord) x
1458+
1459+ components =
1460+ runIdentity
1461+ . Stream. toList
1462+ . fmap getBaseName
1463+ . splitCompact False Windows
1464+
1465+ invalidRootComponent =
1466+ List. any (`List.elem` isInvalidPathComponent) (components root)
1467+ invalidComponent =
1468+ List. any (`List.elem` isInvalidPathComponent) (components stem)
1469+
1470+ -- Note: We can use powershell for testing path validity.
1471+ -- "//share/x" works in powershell.
13621472-- But mixed forward and backward slashes do not work, it is treated as a path
13631473-- relative to current drive e.g. "\\/share/x" is treated as "C:/share/x".
13641474
1365- -- XXX Throw exception pinpointing the failure reason.
1366-
13671475-- | Check if the filepath is valid i.e. does the operating system allow such a
13681476-- path in listing or creating files?
13691477--
@@ -1486,63 +1594,10 @@ isInvalidPathComponent = fmap (fmap charToWord)
14861594-- >>> isValidWin "\\\\??\\x"
14871595-- True
14881596isValid :: (Integral a , Unbox a ) => OS -> Array a -> Bool
1489- isValid Posix path
1490- | Array. null path = False
1491- | foldArr (Fold. elem 0 ) path = False
1492- | otherwise = True
1493- isValid Windows path
1494- | Array. null path = False
1495- | otherwise =
1496- not (foldArr (Fold. any (isInvalidPathChar Windows )) stem)
1497- && not (List. any (`List.elem` isInvalidPathComponent) (components stem))
1498- && not (hasDrive path && postDriveSep > 1 )
1499- && not (isAbsoluteUNC path
1500- && ( postDriveSep > 1
1501- || Array. length path == postDriveSep + 2
1502- || List. any (`List.elem` isInvalidPathComponent) (components root)
1503- -- | | not (hasTrailingSeparator Windows root)
1504- || rootEndSeps /= 1
1505- || Array. null stem
1506- )
1507- )
1508-
1509- where
1510-
1511- postDrive = snd $ Array. unsafeSplitAt 2 path
1512- postDriveSep = countLeadingBy (isSeparatorWord Windows ) postDrive
1513-
1514- (root, stem) = splitRoot Windows path
1515- rootEndSeps = countTrailingBy (isSeparatorWord Windows ) root
1516-
1517- -- TBD: We are not currently validating the sharenames against disallowed
1518- -- file names. Apparently windows does not allow even sharenames with those
1519- -- names. To match against sharenames we will have to strip the separators
1520- -- and drive etc from the root. Or we can use the parsing routines
1521- -- themselves to validate.
1522- toUp w16 =
1523- if w16 < 256
1524- then charToWord $ toUpper (wordToChar w16)
1525- else w16
1526-
1527- -- Should we strip all space chars as in Data.Char.isSpace?
1528- isSpace x = x == charToWord ' '
1529-
1530- -- XXX instead of using a list based check, pass the array to the checker.
1531- -- We do not need to upcase the array, it can be done in the checker. Thus
1532- -- we do not need to create a new array, the original slice can be checked.
1533- getBaseName x =
1534- runIdentity
1535- $ Stream. toList
1536- $ fmap toUp
1537- $ Array. read
1538- $ Array. strip isSpace
1539- $ fst $ Array. breakEndBy_ (== extensionWord) x
1540-
1541- components =
1542- runIdentity
1543- . Stream. toList
1544- . fmap getBaseName
1545- . splitCompact False Windows
1597+ isValid os path =
1598+ case validatePath os path of
1599+ Nothing -> False
1600+ Just _ -> True
15461601
15471602-- A chunk is essentially an untyped Array i.e. Array Word8. We can either use
15481603-- the term ByteArray for that or just Chunk. The latter is shorter and we have
@@ -1553,17 +1608,8 @@ isValid Windows path
15531608-- the definition of the 'Path' type. On Windows, the array passed must be a
15541609-- multiple of 2 bytes as the underlying representation uses 'Word16'.
15551610{-# INLINE unsafeFromChunk #-}
1556- unsafeFromChunk ::
1557- #ifdef DEBUG
1558- Unbox a =>
1559- #endif
1560- Array Word8 -> Array a
1561- unsafeFromChunk =
1562- #ifndef DEBUG
1563- Array. unsafeCast
1564- #else
1565- fromJust . fromChunk
1566- #endif
1611+ unsafeFromChunk :: Array Word8 -> Array a
1612+ unsafeFromChunk = Array. unsafeCast
15671613
15681614-- XXX Also check for invalid chars on windows.
15691615
@@ -1572,33 +1618,31 @@ unsafeFromChunk =
15721618-- representation uses 'Word16'.
15731619--
15741620-- Throws 'InvalidPath'.
1575- fromChunk :: (MonadThrow m , Unbox a ) => Array Word8 -> m (Array a )
1576- fromChunk arr =
1621+ fromChunk :: forall m a . (MonadThrow m , Unbox a , Integral a ) =>
1622+ OS -> Array Word8 -> m (Array a )
1623+ fromChunk Posix arr =
1624+ let arr1 = Array. unsafeCast arr :: Array a
1625+ in validatePath Posix arr1 >> pure arr1
1626+ fromChunk Windows arr =
15771627 case Array. cast arr of
15781628 Nothing ->
1579- -- XXX Windows only message.
15801629 throwM
15811630 $ InvalidPath
15821631 $ " Encoded path length " ++ show (Array. byteLength arr)
15831632 ++ " is not a multiple of 16-bit."
1584- Just x -> pure x
1633+ Just x -> validatePath Windows x >> pure x
15851634
15861635-- | Convert 'Path' to an array of bytes.
15871636toChunk :: Array a -> Array Word8
15881637toChunk = Array. asBytes
15891638
15901639unsafeFromChars :: (Unbox a ) =>
1591- (Char -> Bool )
1592- -> (Stream Identity Char -> Stream Identity a )
1640+ (Stream Identity Char -> Stream Identity a )
15931641 -> Stream Identity Char
15941642 -> Array a
1595- unsafeFromChars _p encode s =
1596- #ifndef DEBUG
1643+ unsafeFromChars encode s =
15971644 let n = runIdentity $ Stream. fold Fold. length s
15981645 in Array. fromPureStreamN n (encode s)
1599- #else
1600- fromJust (fromChars _p encode s)
1601- #endif
16021646
16031647-- Note: We do not sanitize the path i.e. remove duplicate separators, .
16041648-- segments, trailing separator etc because that would require unnecessary
@@ -1609,29 +1653,21 @@ unsafeFromChars _p encode s =
16091653-- XXX Writing a custom fold for parsing a Posix path may be better for
16101654-- efficient bulk parsing when needed. We need the same code to validate a
16111655-- Chunk where we do not need to create an array.
1612- fromChars :: (MonadThrow m , Unbox a ) =>
1613- ( Char -> Bool )
1656+ fromChars :: (MonadThrow m , Unbox a , Integral a ) =>
1657+ OS
16141658 -> (Stream Identity Char -> Stream Identity a )
16151659 -> Stream Identity Char
16161660 -> m (Array a )
1617- fromChars p encode s =
1618- -- XXX on windows terminate at first invalid char
1619- let lengths = Fold. tee Fold. length (Fold. takeEndBy_ p Fold. length )
1620- (n, n1) = runIdentity $ Stream. fold lengths s
1621- arr = Array. fromPureStreamN n (encode s)
1622- sample = Stream. takeWhile p s
1623- in
1624- if n <= 0
1625- then throwM $ InvalidPath " Path cannot be empty."
1626- else if n1 < n
1627- then throwM $ InvalidPath $ " Path contains a NULL char at position: "
1628- ++ show n1 ++ " after " ++ runIdentity (Stream. toList sample)
1629- else pure arr
1630-
1631- toChars :: (Monad m , Unbox a ) => (Stream m a -> Stream m Char ) -> Array a -> Stream m Char
1661+ fromChars os encode s =
1662+ let arr = unsafeFromChars encode s
1663+ in fromChunk os (Array. unsafeCast arr)
1664+
1665+ toChars :: (Monad m , Unbox a ) =>
1666+ (Stream m a -> Stream m Char ) -> Array a -> Stream m Char
16321667toChars decode arr = decode $ Array. read arr
16331668
1634- toString :: Unbox a => (Stream Identity a -> Stream Identity Char ) -> Array a -> [Char ]
1669+ toString :: Unbox a =>
1670+ (Stream Identity a -> Stream Identity Char ) -> Array a -> [Char ]
16351671toString decode = runIdentity . Stream. toList . toChars decode
16361672
16371673------------------------------------------------------------------------------
@@ -1697,6 +1733,8 @@ unsafeAppend os toStr a b =
16971733-- need a joinRoot.
16981734--
16991735-- XXX Also, we cannot append "/" to "c:/" as it will make the path invalid.
1736+ -- XXX On Windows a path starting with / is not absolute and can be appended to
1737+ -- a path/drive ending with :.
17001738
17011739-- | Note that append joins two paths using a separator between the paths.
17021740-- Using append to join a root with a path segment can change the meaning of
@@ -1791,6 +1829,14 @@ eqWindowsAbsRootStrict a b =
17911829 (fmap toDefaultSeparator $ Array. read a)
17921830 (fmap toDefaultSeparator $ Array. read b)
17931831
1832+ -- XXX Use options in the same eqPath routine instead of having different
1833+ -- routines. On posix even macos can have case insensitive comparison.
1834+ -- ALLOW_RELATIVE_PATH_EQUALITY, IGNORE_TRAILING_SEPARATOR,
1835+ -- IGNORE_CASE.
1836+ --
1837+ -- The following options can be added later: PROCESS_PARENT_REFS,
1838+ -- DONT_IGNORE_REDUNDANT_SEPARATORS, DONT_IGNORE_DOT_COMPONENTS.
1839+
17941840-- | Checks two paths for logical equality. It performs some normalizations on
17951841-- the paths before comparing them, specifically it drops redundant path
17961842-- separators between path segments and redundant "/./" components between
@@ -1961,6 +2007,7 @@ eqWindowsComponents a b =
19612007-- strict equality routine.
19622008--
19632009-- * A leading dot is ignored, thus "./x == ./x" and "./x == x".
2010+ -- * On Windows leading non-root drive prefix is ignored "C:x == C:x"
19642011-- * A trailing separator is ignored thus "x/ == x".
19652012-- * On Windows the comparison is case insensitive thus "X == x".
19662013--
0 commit comments