Skip to content

Commit c1bd2af

Browse files
committed
Add better comments
1 parent a76483b commit c1bd2af

File tree

1 file changed

+47
-36
lines changed

1 file changed

+47
-36
lines changed

app/api/Datasets.scala

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,8 +2096,9 @@ class Datasets @Inject()(
20962096
}
20972097
}
20982098

2099-
val md5Files = scala.collection.mutable.HashMap.empty[String, MessageDigest] //for the files
2100-
val md5Bag = scala.collection.mutable.HashMap.empty[String, MessageDigest] //for the bag files
2099+
// Keep two MD5 checksum lists, one for dataset files and one for BagIt files
2100+
val md5Files = scala.collection.mutable.HashMap.empty[String, MessageDigest]
2101+
val md5Bag = scala.collection.mutable.HashMap.empty[String, MessageDigest]
21012102

21022103
val byteArrayOutputStream = new ByteArrayOutputStream(chunkSize)
21032104
val zip = new ZipOutputStream(byteArrayOutputStream)
@@ -2107,7 +2108,7 @@ class Datasets @Inject()(
21072108
var totalBytes = 0L
21082109
var level = "dataset"
21092110
var file_type = "metadata"
2110-
var file_index = 0 //count for files
2111+
var file_index = 0
21112112

21122113
// Begin input stream with dataset info file
21132114
var is = addDatasetInfoToZip(dataFolder, dataset, zip)
@@ -2170,26 +2171,31 @@ class Datasets @Inject()(
21702171
}
21712172
}
21722173
case ("bag", "bagit.txt") => {
2173-
is = addBagItTextToZip(totalBytes,filenameMap.size,zip,dataset,user)
2174+
// BagIt "header" data e.g. date, author
2175+
is = addBagItTextToZip(totalBytes, filenameMap.size, zip, dataset, user)
21742176
is = addMD5Entry("bagit.txt", is, md5Files)
21752177
file_type = "bag-info.txt"
21762178
}
21772179
case ("bag", "bag-info.txt") => {
2180+
// BagIt version & encoding
21782181
is = addBagInfoToZip(zip)
21792182
is = addMD5Entry("bag-info.txt", is, md5Files)
21802183
file_type = "manifest-md5.txt"
21812184
}
21822185
case ("bag", "manifest-md5.txt") => {
2183-
is = addManifestMD5ToZip(md5Files.toMap[String,MessageDigest],zip)
2186+
// List of all dataset (i.e. not BagIt) files and their checksums
2187+
is = addManifestMD5ToZip(md5Files.toMap[String,MessageDigest], zip)
21842188
is = addMD5Entry("manifest-md5.txt", is, md5Files)
21852189
file_type = "datacite.xml"
21862190
}
21872191
case ("bag", "datacite.xml") => {
2188-
is = addBagitMetadataToZip(zip)
2192+
// RDA-recommended DataCite xml file
2193+
is = addDataCiteMetadataToZip(zip)
21892194
file_type = "tagmanifest-md5.txt"
21902195
}
21912196
case ("bag", "tagmanifest-md5.txt") => {
2192-
is = addTagManifestMD5ToZip(md5Bag.toMap[String,MessageDigest],zip)
2197+
// List of all BagIt, xml or non-dataset files and their checksums
2198+
is = addTagManifestMD5ToZip(md5Bag.toMap[String,MessageDigest], zip)
21932199
is = addMD5Entry("tagmanifest-md5.txt", is, md5Files)
21942200
val (level, file_type) = ("done", "none")
21952201
}
@@ -2210,22 +2216,18 @@ class Datasets @Inject()(
22102216
Some(byteArrayOutputStream.toByteArray)
22112217
}
22122218
}
2213-
if (level == "file" || level == "dataset"){
2219+
2220+
if (level == "file" || level == "dataset")
22142221
totalBytes += bytesRead
2215-
}
2216-
// reset temporary byte array
22172222
byteArrayOutputStream.reset()
22182223
Future.successful(chunk)
22192224
}
2220-
case None => {
2221-
Future.successful(None)
2222-
}
2225+
case None => Future.successful(None)
22232226
}
22242227
})(pec)
22252228
}
22262229

2227-
private def addMD5Entry(name: String, is: Option[InputStream],
2228-
md5HashMap: scala.collection.mutable.HashMap[String, MessageDigest]) = {
2230+
private def addMD5Entry(name: String, is: Option[InputStream], md5HashMap: scala.collection.mutable.HashMap[String, MessageDigest]) = {
22292231
val md5 = MessageDigest.getInstance("MD5")
22302232
md5HashMap.put(name, md5)
22312233
Some(new DigestInputStream(is.get, md5))
@@ -2314,32 +2316,36 @@ class Datasets @Inject()(
23142316
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23152317
}
23162318

2317-
private def addBagItTextToZip(totalbytes: Long, totalFiles: Long, zip: ZipOutputStream, dataset: models.Dataset, user: Option[models.User]) = {
2319+
// BagIt "header" data e.g. date, author
2320+
private def addBagItTextToZip(totalbytes: Long, totalFiles: Long, zip: ZipOutputStream, dataset: models.Dataset, contact: Option[User]) = {
23182321
zip.putNextEntry(new ZipEntry("bagit.txt"))
2319-
val softwareLine = "Bag-Software-Agent: clowder.ncsa.illinois.edu\n"
2320-
val baggingDate = "Bagging-Date: "+(new SimpleDateFormat("yyyy-MM-dd hh:mm:ss")).format(Calendar.getInstance.getTime)+"\n"
2321-
val baggingSize = "Bag-Size: " + _root_.util.Formatters.humanReadableByteCount(totalbytes) + "\n"
2322-
val payLoadOxum = "Payload-Oxum: "+ totalbytes + "." + totalFiles +"\n"
2323-
val senderIdentifier="Internal-Sender-Identifier: "+dataset.id+"\n"
2324-
val senderDescription = "Internal-Sender-Description: "+dataset.description+"\n"
2325-
var s:String = ""
2326-
if (user.isDefined) {
2327-
val contactName = "Contact-Name: " + user.get.fullName + "\n"
2328-
val contactEmail = "Contact-Email: " + user.get.email.getOrElse("") + "\n"
2329-
s = softwareLine+baggingDate+baggingSize+payLoadOxum+contactName+contactEmail+senderIdentifier+senderDescription
2330-
} else {
2331-
s = softwareLine+baggingDate+baggingSize+payLoadOxum+senderIdentifier+senderDescription
2322+
var s = ""
2323+
s += "Bag-Software-Agent: clowder.ncsa.illinois.edu\n"
2324+
s += "Bagging-Date: " + (new SimpleDateFormat("yyyy-MM-dd hh:mm:ss")).format(Calendar.getInstance.getTime) + "\n"
2325+
s += "Bag-Size: " + _root_.util.Formatters.humanReadableByteCount(totalbytes) + "\n"
2326+
s += "Payload-Oxum: " + totalbytes + "." + totalFiles + "\n"
2327+
s += "Internal-Sender-Identifier: " + dataset.id + "\n"
2328+
s += "Internal-Sender-Description: " + dataset.description + "\n"
2329+
contact match {
2330+
case Some(u) => {
2331+
s += "Contact-Name: " + user.get.fullName + "\n"
2332+
s += "Contact-Email: " + user.get.email.getOrElse("") + "\n"
2333+
}
2334+
case None => {}
23322335
}
2333-
23342336
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23352337
}
23362338

2339+
// BagIt version & encoding
23372340
private def addBagInfoToZip(zip : ZipOutputStream) : Option[InputStream] = {
23382341
zip.putNextEntry(new ZipEntry("bag-info.txt"))
2339-
val s : String = "BagIt-Version: 0.97\n"+"Tag-File-Character-Encoding: UTF-8\n"
2342+
val s = ""
2343+
s += "BagIt-Version: 0.97\n"
2344+
s += "Tag-File-Character-Encoding: UTF-8\n"
23402345
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23412346
}
23422347

2348+
// List of all dataset (i.e. not BagIt) files and their checksums
23432349
private def addManifestMD5ToZip(md5map : Map[String,MessageDigest] ,zip : ZipOutputStream) : Option[InputStream] = {
23442350
zip.putNextEntry(new ZipEntry("manifest-md5.txt"))
23452351
var s : String = ""
@@ -2352,6 +2358,16 @@ class Datasets @Inject()(
23522358
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23532359
}
23542360

2361+
private def addDataCiteMetadataToZip(zip: ZipOutputStream): Option[InputStream] = {
2362+
zip.putNextEntry(new ZipEntry("metadata/datacite.xml"))
2363+
var s = ""
2364+
s += "<resource xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd\">\n"
2365+
2366+
2367+
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
2368+
}
2369+
2370+
// List of all BagIt, xml or non-dataset files and their checksums
23552371
private def addTagManifestMD5ToZip(md5map : Map[String,MessageDigest],zip : ZipOutputStream) : Option[InputStream] = {
23562372
zip.putNextEntry(new ZipEntry("tagmanifest-md5.txt"))
23572373
var s : String = ""
@@ -2364,11 +2380,6 @@ class Datasets @Inject()(
23642380
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23652381
}
23662382

2367-
private def addBagitMetadataToZip(zip: ZipOutputStream): Option[InputStream] = {
2368-
zip.putNextEntry(new ZipEntry("metadata/datacite.xml"))
2369-
var s = "Datacite v4 stuff goes here."
2370-
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
2371-
}
23722383

23732384
def download(id: UUID, compression: Int, tracking: Boolean) = PermissionAction(Permission.DownloadFiles, Some(ResourceRef(ResourceRef.dataset, id))) { implicit request =>
23742385
implicit val user = request.user

0 commit comments

Comments
 (0)