@@ -66,6 +66,7 @@ module Mismi.S3.Commands (
6666 , grantReadAccess
6767 , hoistUploadError
6868 , hoistDownloadError
69+ , chunkFilesBySize
6970 ) where
7071
7172import Control.Arrow ((***) )
@@ -122,7 +123,8 @@ import System.IO.Error (IOError)
122123import System.Directory (createDirectoryIfMissing , doesFileExist , getDirectoryContents )
123124import System.FilePath (FilePath , (</>) , takeDirectory )
124125import System.Posix.IO (OpenMode (.. ), openFd , closeFd , fdSeek , defaultFileFlags )
125- import System.Posix.Files (getFileStatus , isDirectory , isRegularFile )
126+ import System.Posix.Files (fileSize , getFileStatus , isDirectory , isRegularFile )
127+ import System.Posix.Types (FileOffset )
126128import qualified "unix-bytestring" System.Posix.IO.ByteString as UBS
127129
128130import System.Timeout.Lifted (timeout )
@@ -410,8 +412,7 @@ uploadWithMode m f a = do
410412 unlessM (liftIO $ doesFileExist f) . left $ UploadSourceMissing f
411413 s <- liftIO $ withFile f ReadMode $ \ h ->
412414 hFileSize h
413- let chunk = 100 * 1024 * 1024
414- case s < chunk of
415+ case s < bigChunkSize of
415416 True ->
416417 lift $ uploadSingle f a
417418 False ->
@@ -422,9 +423,15 @@ uploadWithMode m f a = do
422423 -- better default.
423424 case s > 1024 * 1024 * 1024 of
424425 True ->
425- multipartUpload f a s (2 * chunk ) 20
426+ multipartUpload f a s (2 * bigChunkSize ) 20
426427 False ->
427- multipartUpload f a s chunk 20
428+ multipartUpload f a s bigChunkSize 20
429+
430+
431+
432+ bigChunkSize :: Integer
433+ bigChunkSize = 100 * 1024 * 1024
434+
428435
429436uploadSingle :: FilePath -> Address -> AWS ()
430437uploadSingle file a = do
@@ -480,16 +487,37 @@ uploadRecursiveWithMode m src (Address buck ky) = do
480487 case es of
481488 Left _ -> left $ UploadSourceMissing src
482489 Right st -> unless (isDirectory st) . left $ UploadSourceNotDirectory src
483- files <- liftIO $ listRecursivelyLocal src
484- let prefixLen = L. length (src </> " a" ) - 1
485- outputAddrs = fmap (\ fp -> Address buck (ky // Key (T. pack $ L. drop prefixLen fp))) files
490+ files <- fmap fst <$> liftIO (listRecursivelyLocal src)
491+ let outputAddrs = fmap (\ fp -> Address buck (ky // Key (T. pack $ L. drop prefixLen fp))) files
486492 mapM_ (uncurry (uploadWithMode m)) $ L. zip files outputAddrs
493+ where
494+ prefixLen = L. length (src </> " a" ) - 1
495+
496+ -- uploadAddress :: FilePath -> Address
497+ -- uploadAddress fp = Address buck (ky // Key (T.pack $ L.drop prefixLen fp))
498+
499+ -- Take a list of files and their sizes, and return a list of list of files
500+ -- where the total size of of the files in the sub list is less than `bigChunkSize`
501+ -- and the length of the sub lists is <= `maxCount`.
502+ chunkFilesBySize :: Int -> Int64 -> [(FilePath , Int64 )] -> [[FilePath ]]
503+ chunkFilesBySize maxCount maxSize =
504+ takeFiles 0 [] . L. sortOn snd
505+ where
506+ takeFiles :: Int64 -> [FilePath ] -> [(FilePath , Int64 )] -> [[FilePath ]]
507+ takeFiles _ acc [] = [acc]
508+ takeFiles current acc ((x, s): xs) =
509+ if current + s < maxSize && L. length acc < maxCount
510+ then takeFiles (current + s) (x: acc) xs
511+ else (x: acc) : takeFiles 0 [] xs
512+
513+ -- bigChunkSize
487514
488515-- | Like `listRecursively` but for the local filesystem.
489- listRecursivelyLocal :: MonadIO m => FilePath -> m [FilePath ]
516+ -- Also returns
517+ listRecursivelyLocal :: MonadIO m => FilePath -> m [(FilePath , FileOffset )]
490518listRecursivelyLocal topdir = do
491519 entries <- liftIO $ listDirectory topdir
492- (dirs, files) <- liftIO . partitionDirsFiles $ fmap (topdir </> ) entries
520+ (dirs, files) <- liftIO . partitionDirsFilesWithSizes $ fmap (topdir </> ) entries
493521 others <- concatMapM listRecursivelyLocal dirs
494522 pure $ files <> others
495523
@@ -502,15 +530,15 @@ listDirectory path =
502530 f filename =
503531 filename /= " ." && filename /= " .."
504532
505- partitionDirsFiles :: MonadIO m => [FilePath ] -> m ([FilePath ], [FilePath ])
506- partitionDirsFiles =
533+ partitionDirsFilesWithSizes :: MonadIO m => [FilePath ] -> m ([FilePath ], [( FilePath , FileOffset ) ])
534+ partitionDirsFilesWithSizes =
507535 pworker ([] , [] )
508536 where
509537 pworker (dirs, files) [] = pure (dirs, files)
510538 pworker (dirs, files) (x: xs) = do
511539 xstat <- liftIO $ getFileStatus x
512540 pworker
513- (if isDirectory xstat then x : dirs else dirs, if isRegularFile xstat then x : files else files)
541+ (if isDirectory xstat then x : dirs else dirs, if isRegularFile xstat then (x, fileSize xstat) : files else files)
514542 xs
515543
516544write :: Address -> Text -> AWS WriteResult
0 commit comments