@@ -2081,8 +2081,7 @@ class Datasets @Inject()(
20812081 val filenameMap = scala.collection.mutable.Map .empty[UUID , String ]
20822082 val inputFiles = scala.collection.mutable.ListBuffer .empty[models.File ]
20832083
2084- // compute list of all files and folder in dataset. This will also make sure
2085- // that all files and folder names are unique.
2084+ // Get list of all files and folder in dataset and enforce unique names
20862085 fileIDs match {
20872086 case Some (fids) => {
20882087 Logger .info(" Downloading only some files" )
@@ -2100,179 +2099,118 @@ class Datasets @Inject()(
21002099 val md5Files = scala.collection.mutable.HashMap .empty[String , MessageDigest ] // for the files
21012100 val md5Bag = scala.collection.mutable.HashMap .empty[String , MessageDigest ] // for the bag files
21022101
2103- // which file we are currently processing
2104-
21052102 val byteArrayOutputStream = new ByteArrayOutputStream (chunkSize)
21062103 val zip = new ZipOutputStream (byteArrayOutputStream)
2107- // zip compression level
21082104 zip.setLevel(compression)
21092105
2106+ // Prep enumeration handlers
21102107 var totalBytes = 0L
2111- var level = 0 // dataset,file, bag
2112- var file_type = 0 //
2113- var count = 0 // count for files
2114-
2115- /*
2116- * Explanation for the cases
2117- *
2118- * the level can be:
2119- * 0 (file)
2120- * 1 (dataset)
2121- * 2 (bag)
2122- *
2123- * when the level is file, the file_type can be:
2124- * 0 (info)
2125- * 1 (metadata)
2126- * 2 (the actual files)
2127- *
2128- * when the level is dataset, the file_type can be:
2129- * 0 (info)
2130- * 1 (metadata)
2131- *
2132- * when the level is bag, the file_type can be:
2133- * 0 - bagit.txt
2134- * 1 - bag-info.txt
2135- * 2 - manifest-md5.txt
2136- * 3 - tagmanifest-md5.txt
2137- *
2138- * when the dataset is finished (in either mode) the level = -1 and file_type = -1 and
2139- * the enumerator is finished
2140- */
2108+ var level = " dataset"
2109+ var file_type = " metadata"
2110+ var file_index = 0 // count for files
21412111
2142- var is : Option [ InputStream ] = addDatasetInfoToZip(dataFolder, dataset, zip)
2143- // digest input stream
2112+ // Begin input stream with dataset info file
2113+ var is = addDatasetInfoToZip(dataFolder, dataset, zip)
21442114 val md5 = MessageDigest .getInstance(" MD5" )
2145- md5Files.put(dataFolder+ " _info.json" ,md5)
2146- is = Some (new DigestInputStream (is.get,md5))
2147- file_type = 1 // next is metadata
2115+ md5Files.put(dataFolder+ " _info.json" , md5)
2116+ is = Some (new DigestInputStream (is.get, md5))
21482117
2118+ // Handle rest of dataset structure by individual file
21492119 Enumerator .generateM({
21502120 is match {
21512121 case Some (inputStream) => {
21522122 val buffer = new Array [Byte ](chunkSize)
21532123 val bytesRead = scala.concurrent.blocking {
21542124 inputStream.read(buffer)
2155-
21562125 }
21572126 val chunk = bytesRead match {
21582127 case - 1 => {
21592128 // finished individual file
21602129 zip.closeEntry()
21612130 inputStream.close()
21622131
2163- (level,file_type) match {
2164- // dataset, info
2165- case (0 ,0 ) => {
2166- is = addDatasetInfoToZip(dataFolder,dataset,zip)
2167- val md5 = MessageDigest .getInstance(" MD5" )
2168- md5Files.put(" _info.json" ,md5)
2169- is = Some (new DigestInputStream (is.get, md5))
2170- file_type = file_type + 1
2132+ (level, file_type) match {
2133+ case (" dataset" , " metadata" ) => {
2134+ is = addDatasetMetadataToZip(dataFolder, dataset, zip)
2135+ is = addMD5Entry(" _metadata.json" , is, md5Files)
2136+ val (level, file_type) = (" file" , " info" )
21712137 }
2172- // dataset, metadata
2173- case (0 ,1 ) => {
2174- is = addDatasetMetadataToZip(dataFolder,dataset,zip)
2175- val md5 = MessageDigest .getInstance(" MD5" )
2176- md5Files.put(" _metadata.json" ,md5)
2177- is = Some (new DigestInputStream (is.get, md5))
2178- level = 1
2179- file_type = 0
2180- }
2181- // file info
2182- case (1 ,0 ) => {
2183- is = addFileInfoToZip(filenameMap(inputFiles(count).id), inputFiles(count), zip)
2184- val md5 = MessageDigest .getInstance(" MD5" )
2185- md5Files.put(filenameMap(inputFiles(count).id)+ " _info.json" ,md5)
2186- is = Some (new DigestInputStream (is.get, md5))
2187- if (count+ 1 < inputFiles.size ){
2188- count += 1
2189- } else {
2190- count = 0
2191- file_type = 1
2138+ case (" file" , " info" ) => {
2139+ val filename = filenameMap(inputFiles(file_index).id)
2140+ is = addFileInfoToZip(filename, inputFiles(file_index), zip)
2141+ is = addMD5Entry(filename+ " _info.json" , is, md5Files)
2142+ file_index += 1
2143+ if (file_index >= inputFiles.size) {
2144+ file_index = 0
2145+ file_type = " metadata"
21922146 }
21932147 }
2194- // file metadata
2195- case (1 ,1 ) => {
2196- is = addFileMetadataToZip(filenameMap(inputFiles(count).id), inputFiles(count), zip)
2197- val md5 = MessageDigest .getInstance(" MD5" )
2198- md5Files.put(filenameMap(inputFiles(count).id)+ " _metadata.json" ,md5)
2199- is = Some (new DigestInputStream (is.get, md5))
2200- if (count+ 1 < inputFiles.size ){
2201- count += 1
2202- } else {
2203- count = 0
2204- file_type = 2
2148+ case (" file" , " metadata" ) => {
2149+ val filename = filenameMap(inputFiles(file_index).id)
2150+ is = addFileMetadataToZip(filename, inputFiles(file_index), zip)
2151+ is = addMD5Entry(filename+ " _metadata.json" , is, md5Files)
2152+ file_index += 1
2153+ if (file_index >= inputFiles.size){
2154+ file_index = 0
2155+ file_type = " bytes"
22052156 }
22062157 }
2207- // files
2208- case (1 ,2 ) => {
2209- is = addFileToZip(filenameMap(inputFiles(count).id), inputFiles(count), zip)
2210- val md5 = MessageDigest .getInstance(" MD5" )
2211- md5Files.put(filenameMap(inputFiles(count).id),md5)
2212- is = Some (new DigestInputStream (is.get, md5))
2213- if (count+ 1 < inputFiles.size ){
2214- count += 1
2215- } else {
2216- if (bagit){
2217- count = 0
2218- level = 2
2219- file_type = 0
2158+ case (" file" , " bytes" ) => {
2159+ val filename = filenameMap(inputFiles(file_index).id)
2160+ is = addFileToZip(filename, inputFiles(file_index), zip)
2161+ is = addMD5Entry(filename, is, md5Files)
2162+ file_index += 1
2163+ if (file_index >= inputFiles.size) {
2164+ if (bagit) {
2165+ file_index = 0
2166+ val (level, file_type) = (" bag" , " bagit.txt" )
22202167 } else {
2221- // done
2222- level = - 1
2223- file_type = - 1
2168+ val (level, file_type) = (" done" , " none" )
22242169 }
2225-
22262170 }
22272171 }
2228- // bagit.txt
2229- case (2 ,0 ) => {
2172+ case (" bag" , " bagit.txt" ) => {
22302173 is = addBagItTextToZip(totalBytes,filenameMap.size,zip,dataset,user)
2231- val md5 = MessageDigest .getInstance(" MD5" )
2232- md5Bag.put(" bagit.txt" ,md5)
2233- is = Some (new DigestInputStream (is.get, md5))
2234- file_type = 1
2174+ is = addMD5Entry(" bagit.txt" , is, md5Files)
2175+ file_type = " bag-info.txt"
22352176 }
2236- // bag-info.txt
2237- case (2 ,1 ) => {
2177+ case (" bag" , " bag-info.txt" ) => {
22382178 is = addBagInfoToZip(zip)
2239- val md5 = MessageDigest .getInstance(" MD5" )
2240- md5Bag.put(" bag-info.txt" ,md5)
2241- is = Some (new DigestInputStream (is.get, md5))
2242- file_type = 2
2179+ is = addMD5Entry(" bag-info.txt" , is, md5Files)
2180+ file_type = " manifest-md5.txt"
22432181 }
2244- // manifest-md5.txt
2245- case (2 ,2 ) => {
2182+ case (" bag" , " manifest-md5.txt" ) => {
22462183 is = addManifestMD5ToZip(md5Files.toMap[String ,MessageDigest ],zip)
2247- val md5 = MessageDigest .getInstance(" MD5" )
2248- md5Bag.put(" manifest-md5.txt" ,md5)
2249- is = Some (new DigestInputStream (is.get, md5))
2250- file_type = 3
2184+ is = addMD5Entry(" manifest-md5.txt" , is, md5Files)
2185+ file_type = " datacite.xml"
2186+ }
2187+ case (" bag" , " datacite.xml" ) => {
2188+ is = addBagitMetadataToZip(zip)
2189+ file_type = " tagmanifest-md5.txt"
22512190 }
2252- // tagmanifest-md5.txt
2253- case (2 ,3 ) => {
2191+ case (" bag" , " tagmanifest-md5.txt" ) => {
22542192 is = addTagManifestMD5ToZip(md5Bag.toMap[String ,MessageDigest ],zip)
2255- val md5 = MessageDigest .getInstance(" MD5" )
2256- md5Bag.put(" tagmanifest-md5.txt" ,md5)
2257- is = Some (new DigestInputStream (is.get, md5))
2258- level = - 1
2259- file_type = - 1
2193+ is = addMD5Entry(" tagmanifest-md5.txt" , is, md5Files)
2194+ val (level, file_type) = (" done" , " none" )
2195+ }
2196+ case (" done" , " done" ) => {
2197+ zip.close()
2198+ is = None
22602199 }
2261- // the end, or a bad case
22622200 case (_,_) => {
2201+ Logger .error(" Unexpected values in dataset zip enum. Closing out anyway." )
22632202 zip.close()
22642203 is = None
22652204 }
22662205 }
2267- // this is generated after all the matches
22682206 Some (byteArrayOutputStream.toByteArray)
22692207 }
22702208 case read => {
22712209 zip.write(buffer, 0 , read)
22722210 Some (byteArrayOutputStream.toByteArray)
22732211 }
22742212 }
2275- if (level < 2 ){
2213+ if (level == " file " || level == " dataset " ){
22762214 totalBytes += bytesRead
22772215 }
22782216 // reset temporary byte array
@@ -2286,6 +2224,12 @@ class Datasets @Inject()(
22862224 })(pec)
22872225 }
22882226
2227+ private def addMD5Entry (name : String , is : Option [InputStream ],
2228+ md5HashMap : scala.collection.mutable.HashMap [String , MessageDigest ]) = {
2229+ val md5 = MessageDigest .getInstance(" MD5" )
2230+ md5HashMap.put(name, md5)
2231+ Some (new DigestInputStream (is.get, md5))
2232+ }
22892233
22902234 private def addFileToZip (filename : String , file : models.File , zip : ZipOutputStream ): Option [InputStream ] = {
22912235 files.getBytes(file.id) match {
@@ -2420,6 +2364,12 @@ class Datasets @Inject()(
24202364 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
24212365 }
24222366
2367+ private def addBagitMetadataToZip (zip : ZipOutputStream ): Option [InputStream ] = {
2368+ zip.putNextEntry(new ZipEntry (" metadata/datacite.xml" ))
2369+ var s = " Datacite v4 stuff goes here."
2370+ Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
2371+ }
2372+
24232373 def download (id : UUID , compression : Int , tracking : Boolean ) = PermissionAction (Permission .DownloadFiles , Some (ResourceRef (ResourceRef .dataset, id))) { implicit request =>
24242374 implicit val user = request.user
24252375 datasets.get(id) match {
0 commit comments