@@ -2096,8 +2096,9 @@ class Datasets @Inject()(
20962096 }
20972097 }
20982098
2099- val md5Files = scala.collection.mutable.HashMap .empty[String , MessageDigest ] // for the files
2100- val md5Bag = scala.collection.mutable.HashMap .empty[String , MessageDigest ] // for the bag files
2099+ // Keep two MD5 checksum lists, one for dataset files and one for BagIt files
2100+ val md5Files = scala.collection.mutable.HashMap .empty[String , MessageDigest ]
2101+ val md5Bag = scala.collection.mutable.HashMap .empty[String , MessageDigest ]
21012102
21022103 val byteArrayOutputStream = new ByteArrayOutputStream (chunkSize)
21032104 val zip = new ZipOutputStream (byteArrayOutputStream)
@@ -2107,7 +2108,7 @@ class Datasets @Inject()(
21072108 var totalBytes = 0L
21082109 var level = " dataset"
21092110 var file_type = " metadata"
2110- var file_index = 0 // count for files
2111+ var file_index = 0
21112112
21122113 // Begin input stream with dataset info file
21132114 var is = addDatasetInfoToZip(dataFolder, dataset, zip)
@@ -2170,26 +2171,31 @@ class Datasets @Inject()(
21702171 }
21712172 }
21722173 case (" bag" , " bagit.txt" ) => {
2173- is = addBagItTextToZip(totalBytes,filenameMap.size,zip,dataset,user)
2174+ // BagIt "header" data e.g. date, author
2175+ is = addBagItTextToZip(totalBytes, filenameMap.size, zip, dataset, user)
21742176 is = addMD5Entry(" bagit.txt" , is, md5Files)
21752177 file_type = " bag-info.txt"
21762178 }
21772179 case (" bag" , " bag-info.txt" ) => {
2180+ // BagIt version & encoding
21782181 is = addBagInfoToZip(zip)
21792182 is = addMD5Entry(" bag-info.txt" , is, md5Files)
21802183 file_type = " manifest-md5.txt"
21812184 }
21822185 case (" bag" , " manifest-md5.txt" ) => {
2183- is = addManifestMD5ToZip(md5Files.toMap[String ,MessageDigest ],zip)
2186+ // List of all dataset (i.e. not BagIt) files and their checksums
2187+ is = addManifestMD5ToZip(md5Files.toMap[String ,MessageDigest ], zip)
21842188 is = addMD5Entry(" manifest-md5.txt" , is, md5Files)
21852189 file_type = " datacite.xml"
21862190 }
21872191 case (" bag" , " datacite.xml" ) => {
2188- is = addBagitMetadataToZip(zip)
2192+ // RDA-recommended DataCite xml file
2193+ is = addDataCiteMetadataToZip(zip)
21892194 file_type = " tagmanifest-md5.txt"
21902195 }
21912196 case (" bag" , " tagmanifest-md5.txt" ) => {
2192- is = addTagManifestMD5ToZip(md5Bag.toMap[String ,MessageDigest ],zip)
2197+ // List of all BagIt, xml or non-dataset files and their checksums
2198+ is = addTagManifestMD5ToZip(md5Bag.toMap[String ,MessageDigest ], zip)
21932199 is = addMD5Entry(" tagmanifest-md5.txt" , is, md5Files)
21942200 val (level, file_type) = (" done" , " none" )
21952201 }
@@ -2210,22 +2216,18 @@ class Datasets @Inject()(
22102216 Some (byteArrayOutputStream.toByteArray)
22112217 }
22122218 }
2213- if (level == " file" || level == " dataset" ){
2219+
2220+ if (level == " file" || level == " dataset" )
22142221 totalBytes += bytesRead
2215- }
2216- // reset temporary byte array
22172222 byteArrayOutputStream.reset()
22182223 Future .successful(chunk)
22192224 }
2220- case None => {
2221- Future .successful(None )
2222- }
2225+ case None => Future .successful(None )
22232226 }
22242227 })(pec)
22252228 }
22262229
2227- private def addMD5Entry (name : String , is : Option [InputStream ],
2228- md5HashMap : scala.collection.mutable.HashMap [String , MessageDigest ]) = {
2230+ private def addMD5Entry (name : String , is : Option [InputStream ], md5HashMap : scala.collection.mutable.HashMap [String , MessageDigest ]) = {
22292231 val md5 = MessageDigest .getInstance(" MD5" )
22302232 md5HashMap.put(name, md5)
22312233 Some (new DigestInputStream (is.get, md5))
@@ -2314,32 +2316,36 @@ class Datasets @Inject()(
23142316 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23152317 }
23162318
2317- private def addBagItTextToZip (totalbytes : Long , totalFiles : Long , zip : ZipOutputStream , dataset : models.Dataset , user : Option [models.User ]) = {
2319+ // BagIt "header" data e.g. date, author
2320+ private def addBagItTextToZip (totalbytes : Long , totalFiles : Long , zip : ZipOutputStream , dataset : models.Dataset , contact : Option [User ]) = {
23182321 zip.putNextEntry(new ZipEntry (" bagit.txt" ))
2319- val softwareLine = " Bag-Software-Agent: clowder.ncsa.illinois.edu \n "
2320- val baggingDate = " Bagging-Date: " + ( new SimpleDateFormat ( " yyyy-MM-dd hh:mm:ss " )).format( Calendar .getInstance.getTime) + " \n "
2321- val baggingSize = " Bag-Size : " + _root_.util. Formatters .humanReadableByteCount(totalbytes ) + " \n "
2322- val payLoadOxum = " Payload-Oxum : " + totalbytes + " . " + totalFiles + " \n "
2323- val senderIdentifier = " Internal-Sender-Identifier : " + dataset.id + " \n "
2324- val senderDescription = " Internal-Sender-Description : " + dataset.description + " \n "
2325- var s : String = " "
2326- if (user.isDefined) {
2327- val contactName = " Contact-Name: " + user.get.fullName + " \n "
2328- val contactEmail = " Contact-Email : " + user.get.email.getOrElse( " " ) + " \n "
2329- s = softwareLine + baggingDate + baggingSize + payLoadOxum + contactName + contactEmail + senderIdentifier + senderDescription
2330- } else {
2331- s = softwareLine + baggingDate + baggingSize + payLoadOxum + senderIdentifier + senderDescription
2322+ var s = " "
2323+ s + = " Bag-Software-Agent: clowder.ncsa.illinois.edu \n "
2324+ s + = " Bagging-Date : " + ( new SimpleDateFormat ( " yyyy-MM-dd hh:mm:ss " )).format( Calendar .getInstance.getTime ) + " \n "
2325+ s + = " Bag-Size : " + _root_.util. Formatters .humanReadableByteCount(totalbytes) + " \n "
2326+ s += " Payload-Oxum : " + totalbytes + " . " + totalFiles + " \n "
2327+ s + = " Internal-Sender-Identifier : " + dataset.id + " \n "
2328+ s + = " Internal-Sender-Description: " + dataset.description + " \n "
2329+ contact match {
2330+ case Some (u) => {
2331+ s + = " Contact-Name : " + user.get.fullName + " \n "
2332+ s += " Contact-Email: " + user.get.email.getOrElse( " " ) + " \n "
2333+ }
2334+ case None => {}
23322335 }
2333-
23342336 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23352337 }
23362338
2339+ // BagIt version & encoding
23372340 private def addBagInfoToZip (zip : ZipOutputStream ) : Option [InputStream ] = {
23382341 zip.putNextEntry(new ZipEntry (" bag-info.txt" ))
2339- val s : String = " BagIt-Version: 0.97\n " + " Tag-File-Character-Encoding: UTF-8\n "
2342+ val s = " "
2343+ s += " BagIt-Version: 0.97\n "
2344+ s += " Tag-File-Character-Encoding: UTF-8\n "
23402345 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23412346 }
23422347
2348+ // List of all dataset (i.e. not BagIt) files and their checksums
23432349 private def addManifestMD5ToZip (md5map : Map [String ,MessageDigest ] ,zip : ZipOutputStream ) : Option [InputStream ] = {
23442350 zip.putNextEntry(new ZipEntry (" manifest-md5.txt" ))
23452351 var s : String = " "
@@ -2352,6 +2358,16 @@ class Datasets @Inject()(
23522358 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23532359 }
23542360
2361+ private def addDataCiteMetadataToZip (zip : ZipOutputStream ): Option [InputStream ] = {
2362+ zip.putNextEntry(new ZipEntry (" metadata/datacite.xml" ))
2363+ var s = " "
2364+ s += " <resource xsi:schemaLocation=\" http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd\" >\n "
2365+
2366+
2367+ Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
2368+ }
2369+
2370+ // List of all BagIt, xml or non-dataset files and their checksums
23552371 private def addTagManifestMD5ToZip (md5map : Map [String ,MessageDigest ],zip : ZipOutputStream ) : Option [InputStream ] = {
23562372 zip.putNextEntry(new ZipEntry (" tagmanifest-md5.txt" ))
23572373 var s : String = " "
@@ -2364,11 +2380,6 @@ class Datasets @Inject()(
23642380 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23652381 }
23662382
2367- private def addBagitMetadataToZip (zip : ZipOutputStream ): Option [InputStream ] = {
2368- zip.putNextEntry(new ZipEntry (" metadata/datacite.xml" ))
2369- var s = " Datacite v4 stuff goes here."
2370- Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
2371- }
23722383
23732384 def download (id : UUID , compression : Int , tracking : Boolean ) = PermissionAction (Permission .DownloadFiles , Some (ResourceRef (ResourceRef .dataset, id))) { implicit request =>
23742385 implicit val user = request.user
0 commit comments