Skip to content

Commit c69b2f6

Browse files
committed
add a bunch of fields to datacite
1 parent bf3b828 commit c69b2f6

File tree

1 file changed

+96
-15
lines changed

1 file changed

+96
-15
lines changed

app/api/Datasets.scala

Lines changed: 96 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2193,7 +2193,7 @@ class Datasets @Inject()(
21932193
}
21942194
case ("bag", "datacite.xml") => {
21952195
// RDA-recommended DataCite xml file
2196-
is = addDataCiteMetadataToZip(zip)
2196+
is = addDataCiteMetadataToZip(zip, dataset)
21972197
file_type = "tagmanifest-md5.txt"
21982198
}
21992199
case ("bag", "tagmanifest-md5.txt") => {
@@ -2348,7 +2348,7 @@ class Datasets @Inject()(
23482348
}
23492349

23502350
// List of all dataset (i.e. not BagIt) files and their checksums
2351-
private def addManifestMD5ToZip(md5map : Map[String,MessageDigest] ,zip : ZipOutputStream) : Option[InputStream] = {
2351+
private def addManifestMD5ToZip(md5map: Map[String,MessageDigest], zip: ZipOutputStream) : Option[InputStream] = {
23522352
zip.putNextEntry(new ZipEntry("manifest-md5.txt"))
23532353
var s : String = ""
23542354
md5map.foreach{
@@ -2360,29 +2360,110 @@ class Datasets @Inject()(
23602360
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23612361
}
23622362

2363-
private def addDataCiteMetadataToZip(zip: ZipOutputStream): Option[InputStream] = {
2363+
private def addDataCiteMetadataToZip(zip: ZipOutputStream, dataset: Dataset): Option[InputStream] = {
23642364
zip.putNextEntry(new ZipEntry("metadata/datacite.xml"))
2365-
val nodata = "None"
23662365
var s = "<resource xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd\">\n"
2366+
// https://support.datacite.org/docs/schema-40
2367+
2368+
// Prep user data (DataCite v4 specifies Family, Given as name format)
2369+
var creatorName = dataset.author.fullName
2370+
var creatorOrcid = ""
2371+
userService.get(dataset.author.id) match {
2372+
case Some(u: User) => {
2373+
creatorName = u.fullName
2374+
creatorOrcid = u.profile match {
2375+
case Some(p: Profile) => p.orcidID.getOrElse("")
2376+
case None => ""
2377+
}
2378+
}
2379+
case None => {}
2380+
}
2381+
2382+
// ---------- REQUIRED FIELDS ----------
2383+
// Identifier (DOI)
23672384

23682385
// Creators
23692386
s += "<creators>\n"
23702387
s += "\t<creator>\n"
2371-
s += "\t\t<creatorName>"+nodata+"</creatorName>\n"
2372-
s += "\t\t<nameIdentifier>"+nodata+"</nameIdentifier>\n"
2373-
s += "\t\t<nameIdentifierScheme>ORCID</nameIdentifierScheme>\n"
2388+
s += "\t\t<creatorName>"+creatorName+"</creatorName>\n"
2389+
if (creatorOrcid.length > 0) {
2390+
s += "\t\t<nameIdentifier>"+creatorOrcid+"</nameIdentifier>\n"
2391+
s += "\t\t<nameIdentifierScheme>ORCID</nameIdentifierScheme>\n"
2392+
}
23742393
s += "\t</creator>\n"
23752394
s += "</creators>\n"
2376-
// Title
2377-
s += "<titles>\n\t<title>"+nodata+"</title>\n</titles>\n"
2378-
// Publisher (required?)
2395+
2396+
// Title (Required)
2397+
s += "<titles>\n\t<title>"+dataset.name+"</title>\n</titles>\n"
2398+
2399+
// Publisher (Required)
2400+
/**
2401+
* "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces
2402+
* the resource. This property will be used to formulate the citation, so consider the prominence of the role."
2403+
*
2404+
* Not sure Clowder is right here.
2405+
*/
23792406
s += "<publisher>Clowder</publisher>\n"
2380-
// Year
2381-
s += "<publicationYear>"+nodata+"</publicationYear>\n"
2382-
// Description
2383-
s += "<descriptions>\n\t<description>"+nodata+"</description>\n</descriptions>\n"
2384-
s += "</resource>"
23852407

2408+
// PublicationYear (Required)
2409+
val yyyy = new SimpleDateFormat("yyyy").format(dataset.created)
2410+
s += "<publicationYear>"+yyyy+"</publicationYear>\n"
2411+
2412+
// ResourceType
2413+
/**
2414+
* The format is open, but the preferred format is a single term of some detail so that a pair can be formed with the sub-property.
2415+
* Text formats can be free-text OR terms from the CASRAI Publications resource type list. (14)
2416+
* Examples:
2417+
* Dataset/Census Data, where "Dataset" is resourceTypeGeneral value and "Census Data" is ResourceType value.
2418+
* Text/Conference Abstract, where "Text" is resourceTypeGeneral value and "Conference Abstract" is resourceType value aligned with CASRAI Publications term.
2419+
*/
2420+
s += "<ResourceType resourceTypeGeneral=\"Dataset\">Clowder Dataset</ResourceType>\n"
2421+
2422+
2423+
// ---------- RECOMMENDED/OPTIONAL FIELDS ----------
2424+
2425+
// Description (R)
2426+
s += "<descriptions>" +
2427+
"\n\t<description>"+dataset.description+"</description>" +
2428+
"\n\t<descriptionType>Abstract</descriptionType>" +
2429+
"\n</descriptions>\n"
2430+
2431+
/** Contributors (R)
2432+
"The institution or person responsible for collecting, creating, or otherwise
2433+
contributing to the developement of the dataset." List of example types.
2434+
2435+
Should check every file in the dataset for uploaders/metadata contributors other
2436+
than the creator and include here as well (?).
2437+
**/
2438+
2439+
// Date (Created)
2440+
val isoDate = new SimpleDateFormat("YYYY-MM-dd").format(dataset.created)
2441+
s += "<dates>" +
2442+
"\n\t<date>"+isoDate+"</date>" +
2443+
"\n\t<dateType>Created</dateType>" +
2444+
"\n</dates>\n"
2445+
2446+
// AlternateIdentifier
2447+
s += "<alternateIdentifier>"+dataset.id.stringify+"</alternateIdentifier>\n"
2448+
2449+
// Format
2450+
s += "<format>application/zip</format>\n"
2451+
2452+
// Subject (R)
2453+
/**
2454+
* Subject, keyword, classification code, or key phrase describing the resource.
2455+
* Tags?
2456+
*/
2457+
2458+
// RelatedIdentifier (R) - DOIs, ISBNs, URLs of related resources e.g. 'cited by XYZ'.
2459+
// Size - free text, e.g. "126 kb", "8 files", can have many
2460+
// Version
2461+
// Language - e.g. "en"
2462+
// Rights - free text e.g. "Creative Commons Attribution 3.0"
2463+
// GeoLocation (R)
2464+
// FundingReference
2465+
2466+
s += "</resource>"
23862467
Some(new ByteArrayInputStream(s.getBytes("UTF-8")))
23872468
}
23882469

0 commit comments

Comments
 (0)