@@ -66,10 +66,11 @@ module Mismi.S3.Commands (
6666 , grantReadAccess
6767 , hoistUploadError
6868 , hoistDownloadError
69+ , chunkFilesBySize
6970 ) where
7071
7172import Control.Arrow ((***) )
72-
73+ import Control.Concurrent.Async.Lifted ( mapConcurrently_ )
7374import Control.Exception (ioError )
7475import qualified Control.Exception as CE
7576import Control.Lens ((.~) , (^.) , to , view )
@@ -122,7 +123,7 @@ import System.IO.Error (IOError)
122123import System.Directory (createDirectoryIfMissing , doesFileExist , getDirectoryContents )
123124import System.FilePath (FilePath , (</>) , takeDirectory )
124125import System.Posix.IO (OpenMode (.. ), openFd , closeFd , fdSeek , defaultFileFlags )
125- import System.Posix.Files (getFileStatus , isDirectory , isRegularFile )
126+ import System.Posix.Files (fileSize , getFileStatus , isDirectory , isRegularFile )
126127import qualified "unix-bytestring" System.Posix.IO.ByteString as UBS
127128
128129import System.Timeout.Lifted (timeout )
@@ -370,13 +371,13 @@ upload :: FilePath -> Address -> EitherT UploadError AWS ()
370371upload =
371372 uploadWithMode Fail
372373
373- uploadRecursive :: FilePath -> Address -> EitherT UploadError AWS ()
374+ uploadRecursive :: FilePath -> Address -> Int -> EitherT UploadError AWS ()
374375uploadRecursive =
375376 uploadRecursiveWithMode Fail
376377
377- uploadRecursiveOrFail :: FilePath -> Address -> AWS ()
378- uploadRecursiveOrFail f a =
379- eitherT hoistUploadError pure $ uploadRecursive f a
378+ uploadRecursiveOrFail :: FilePath -> Address -> Int -> AWS ()
379+ uploadRecursiveOrFail f a i =
380+ eitherT hoistUploadError pure $ uploadRecursive f a i
380381
381382uploadOrFail :: FilePath -> Address -> AWS ()
382383uploadOrFail f a =
@@ -386,9 +387,9 @@ uploadWithModeOrFail :: WriteMode -> FilePath -> Address -> AWS ()
386387uploadWithModeOrFail w f a =
387388 eitherT hoistUploadError pure $ uploadWithMode w f a
388389
389- uploadRecursiveWithModeOrFail :: WriteMode -> FilePath -> Address -> AWS ()
390- uploadRecursiveWithModeOrFail w f a =
391- eitherT hoistUploadError pure $ uploadRecursiveWithMode w f a
390+ uploadRecursiveWithModeOrFail :: WriteMode -> FilePath -> Address -> Int -> AWS ()
391+ uploadRecursiveWithModeOrFail w f a i =
392+ eitherT hoistUploadError pure $ uploadRecursiveWithMode w f a i
392393
393394hoistUploadError :: UploadError -> AWS ()
394395hoistUploadError e =
@@ -410,8 +411,7 @@ uploadWithMode m f a = do
410411 unlessM (liftIO $ doesFileExist f) . left $ UploadSourceMissing f
411412 s <- liftIO $ withFile f ReadMode $ \ h ->
412413 hFileSize h
413- let chunk = 100 * 1024 * 1024
414- case s < chunk of
414+ case s < bigChunkSize of
415415 True ->
416416 lift $ uploadSingle f a
417417 False ->
@@ -422,21 +422,27 @@ uploadWithMode m f a = do
422422 -- better default.
423423 case s > 1024 * 1024 * 1024 of
424424 True ->
425- multipartUpload f a s (2 * chunk ) 20
425+ multipartUpload f a s (2 * bigChunkSize ) 20
426426 False ->
427- multipartUpload f a s chunk 20
427+ multipartUpload f a s bigChunkSize 20
428+
429+
430+
431+ bigChunkSize :: Integer
432+ bigChunkSize = 100 * 1024 * 1024
433+
428434
429435uploadSingle :: FilePath -> Address -> AWS ()
430436uploadSingle file a = do
431437 rq <- N. chunkedFile (ChunkSize $ 1024 * 1024 ) file
432438 void . send $ f' A. putObject a rq & A. poServerSideEncryption .~ pure sse
433439
434440multipartUpload :: FilePath -> Address -> Integer -> Integer -> Int -> EitherT UploadError AWS ()
435- multipartUpload file a fileSize chunk fork = do
441+ multipartUpload file a fSize chunk fork = do
436442 e <- ask
437443 mpu <- lift $ createMultipartUpload a
438444
439- let chunks = calculateChunksCapped (fromInteger fileSize ) (fromInteger chunk) 4096 -- max 4096 prts returned
445+ let chunks = calculateChunksCapped (fromInteger fSize ) (fromInteger chunk) 4096 -- max 4096 prts returned
440446
441447 r <- liftIO $
442448 consume (forM_ chunks . writeQueue) fork $ multipartUploadWorker e mpu file a
@@ -474,22 +480,49 @@ multipartUploadWorker e mpu file a (o, c, i) =
474480 pure $! Right $! PartResponse i m
475481
476482
477- uploadRecursiveWithMode :: WriteMode -> FilePath -> Address -> EitherT UploadError AWS ()
478- uploadRecursiveWithMode m src (Address buck ky) = do
483+ uploadRecursiveWithMode :: WriteMode -> FilePath -> Address -> Int -> EitherT UploadError AWS ()
484+ uploadRecursiveWithMode mode src (Address buck ky) fork = do
479485 es <- tryIO $ getFileStatus src
480486 case es of
481487 Left _ -> left $ UploadSourceMissing src
482488 Right st -> unless (isDirectory st) . left $ UploadSourceNotDirectory src
483- files <- liftIO $ listRecursivelyLocal src
484- let prefixLen = L. length (src </> " a" ) - 1
485- outputAddrs = fmap (\ fp -> Address buck (ky // Key (T. pack $ L. drop prefixLen fp))) files
486- mapM_ (uncurry (uploadWithMode m)) $ L. zip files outputAddrs
489+ files <- liftIO (listRecursivelyLocal src)
490+ mapM_ uploadFiles $ chunkFilesBySize fork (fromIntegral bigChunkSize) files
491+ where
492+ uploadFiles :: [(FilePath , Int64 )] -> EitherT UploadError AWS ()
493+ uploadFiles [] = pure ()
494+ uploadFiles [(f,s)]
495+ | fromIntegral s < bigChunkSize = lift . uploadSingle f $ uploadAddress f
496+ | otherwise = uploadWithMode mode f $ uploadAddress f
497+ uploadFiles xs =
498+ mapConcurrently_ (\ (f, _) -> lift . uploadSingle f $ uploadAddress f) xs
499+
500+
501+ prefixLen = L. length (src </> " a" ) - 1
502+
503+ uploadAddress :: FilePath -> Address
504+ uploadAddress fp = Address buck (ky // Key (T. pack $ L. drop prefixLen fp))
505+
506+ -- Take a list of files and their sizes, and convert it to a list of tests
507+ -- where the total size of of the files in the sub list is less than `maxSize`
508+ -- and the length of the sub lists is <= `maxCount`.
509+ chunkFilesBySize :: Int -> Int64 -> [(FilePath , Int64 )] -> [[(FilePath , Int64 )]]
510+ chunkFilesBySize maxCount maxSize =
511+ takeFiles 0 [] . L. sortOn snd
512+ where
513+ takeFiles :: Int64 -> [(FilePath , Int64 )] -> [(FilePath , Int64 )] -> [[(FilePath , Int64 )]]
514+ takeFiles _ acc [] = [acc]
515+ takeFiles current acc ((x, s): xs) =
516+ if current + s < maxSize && L. length acc < maxCount
517+ then takeFiles (current + s) ((x, s): acc) xs
518+ else ((x, s): acc) : takeFiles 0 [] xs
487519
488520-- | Like `listRecursively` but for the local filesystem.
489- listRecursivelyLocal :: MonadIO m => FilePath -> m [FilePath ]
521+ -- Also returns
522+ listRecursivelyLocal :: MonadIO m => FilePath -> m [(FilePath , Int64 )]
490523listRecursivelyLocal topdir = do
491524 entries <- liftIO $ listDirectory topdir
492- (dirs, files) <- liftIO . partitionDirsFiles $ fmap (topdir </> ) entries
525+ (dirs, files) <- liftIO . partitionDirsFilesWithSizes $ fmap (topdir </> ) entries
493526 others <- concatMapM listRecursivelyLocal dirs
494527 pure $ files <> others
495528
@@ -502,16 +535,17 @@ listDirectory path =
502535 f filename =
503536 filename /= " ." && filename /= " .."
504537
505- partitionDirsFiles :: MonadIO m => [FilePath ] -> m ([FilePath ], [FilePath ])
506- partitionDirsFiles =
538+ partitionDirsFilesWithSizes :: MonadIO m => [FilePath ] -> m ([FilePath ], [( FilePath , Int64 ) ])
539+ partitionDirsFilesWithSizes =
507540 pworker ([] , [] )
508541 where
509542 pworker (dirs, files) [] = pure (dirs, files)
510543 pworker (dirs, files) (x: xs) = do
511544 xstat <- liftIO $ getFileStatus x
512- pworker
513- (if isDirectory xstat then x : dirs else dirs, if isRegularFile xstat then x : files else files)
514- xs
545+ let xsize = fromIntegral $ fileSize xstat
546+ newDirs = if isDirectory xstat then x : dirs else dirs
547+ newFiles = if isRegularFile xstat then (x, xsize) : files else files
548+ pworker (newDirs, newFiles) xs
515549
516550write :: Address -> Text -> AWS WriteResult
517551write =
0 commit comments