@@ -2193,7 +2193,7 @@ class Datasets @Inject()(
21932193 }
21942194 case (" bag" , " datacite.xml" ) => {
21952195 // RDA-recommended DataCite xml file
2196- is = addDataCiteMetadataToZip(zip)
2196+ is = addDataCiteMetadataToZip(zip, dataset )
21972197 file_type = " tagmanifest-md5.txt"
21982198 }
21992199 case (" bag" , " tagmanifest-md5.txt" ) => {
@@ -2348,7 +2348,7 @@ class Datasets @Inject()(
23482348 }
23492349
23502350 // List of all dataset (i.e. not BagIt) files and their checksums
2351- private def addManifestMD5ToZip (md5map : Map [String ,MessageDigest ] , zip : ZipOutputStream ) : Option [InputStream ] = {
2351+ private def addManifestMD5ToZip (md5map : Map [String ,MessageDigest ], zip : ZipOutputStream ) : Option [InputStream ] = {
23522352 zip.putNextEntry(new ZipEntry (" manifest-md5.txt" ))
23532353 var s : String = " "
23542354 md5map.foreach{
@@ -2360,29 +2360,110 @@ class Datasets @Inject()(
23602360 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23612361 }
23622362
2363- private def addDataCiteMetadataToZip (zip : ZipOutputStream ): Option [InputStream ] = {
2363+ private def addDataCiteMetadataToZip (zip : ZipOutputStream , dataset : Dataset ): Option [InputStream ] = {
23642364 zip.putNextEntry(new ZipEntry (" metadata/datacite.xml" ))
2365- val nodata = " None"
23662365 var s = " <resource xsi:schemaLocation=\" http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd\" >\n "
2366+ // https://support.datacite.org/docs/schema-40
2367+
2368+ // Prep user data (DataCite v4 specifies Family, Given as name format)
2369+ var creatorName = dataset.author.fullName
2370+ var creatorOrcid = " "
2371+ userService.get(dataset.author.id) match {
2372+ case Some (u : User ) => {
2373+ creatorName = u.fullName
2374+ creatorOrcid = u.profile match {
2375+ case Some (p : Profile ) => p.orcidID.getOrElse(" " )
2376+ case None => " "
2377+ }
2378+ }
2379+ case None => {}
2380+ }
2381+
2382+ // ---------- REQUIRED FIELDS ----------
2383+ // Identifier (DOI)
23672384
23682385 // Creators
23692386 s += " <creators>\n "
23702387 s += " \t <creator>\n "
2371- s += " \t\t <creatorName>" + nodata+ " </creatorName>\n "
2372- s += " \t\t <nameIdentifier>" + nodata+ " </nameIdentifier>\n "
2373- s += " \t\t <nameIdentifierScheme>ORCID</nameIdentifierScheme>\n "
2388+ s += " \t\t <creatorName>" + creatorName+ " </creatorName>\n "
2389+ if (creatorOrcid.length > 0 ) {
2390+ s += " \t\t <nameIdentifier>" + creatorOrcid+ " </nameIdentifier>\n "
2391+ s += " \t\t <nameIdentifierScheme>ORCID</nameIdentifierScheme>\n "
2392+ }
23742393 s += " \t </creator>\n "
23752394 s += " </creators>\n "
2376- // Title
2377- s += " <titles>\n\t <title>" + nodata+ " </title>\n </titles>\n "
2378- // Publisher (required?)
2395+
2396+ // Title (Required)
2397+ s += " <titles>\n\t <title>" + dataset.name+ " </title>\n </titles>\n "
2398+
2399+ // Publisher (Required)
2400+ /**
2401+ * "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces
2402+ * the resource. This property will be used to formulate the citation, so consider the prominence of the role."
2403+ *
2404+ * Not sure Clowder is right here.
2405+ */
23792406 s += " <publisher>Clowder</publisher>\n "
2380- // Year
2381- s += " <publicationYear>" + nodata+ " </publicationYear>\n "
2382- // Description
2383- s += " <descriptions>\n\t <description>" + nodata+ " </description>\n </descriptions>\n "
2384- s += " </resource>"
23852407
2408+ // PublicationYear (Required)
2409+ val yyyy = new SimpleDateFormat (" yyyy" ).format(dataset.created)
2410+ s += " <publicationYear>" + yyyy+ " </publicationYear>\n "
2411+
2412+ // ResourceType
2413+ /**
2414+ * The format is open, but the preferred format is a single term of some detail so that a pair can be formed with the sub-property.
2415+ * Text formats can be free-text OR terms from the CASRAI Publications resource type list. (14)
2416+ * Examples:
2417+ * Dataset/Census Data, where "Dataset" is resourceTypeGeneral value and "Census Data" is ResourceType value.
2418+ * Text/Conference Abstract, where "Text" is resourceTypeGeneral value and "Conference Abstract" is resourceType value aligned with CASRAI Publications term.
2419+ */
2420+ s += " <ResourceType resourceTypeGeneral=\" Dataset\" >Clowder Dataset</ResourceType>\n "
2421+
2422+
2423+ // ---------- RECOMMENDED/OPTIONAL FIELDS ----------
2424+
2425+ // Description (R)
2426+ s += " <descriptions>" +
2427+ " \n\t <description>" + dataset.description+ " </description>" +
2428+ " \n\t <descriptionType>Abstract</descriptionType>" +
2429+ " \n </descriptions>\n "
2430+
2431+ /** Contributors (R)
2432+ "The institution or person responsible for collecting, creating, or otherwise
2433+ contributing to the developement of the dataset." List of example types.
2434+
2435+ Should check every file in the dataset for uploaders/metadata contributors other
2436+ than the creator and include here as well (?).
2437+ **/
2438+
2439+ // Date (Created)
2440+ val isoDate = new SimpleDateFormat (" YYYY-MM-dd" ).format(dataset.created)
2441+ s += " <dates>" +
2442+ " \n\t <date>" + isoDate+ " </date>" +
2443+ " \n\t <dateType>Created</dateType>" +
2444+ " \n </dates>\n "
2445+
2446+ // AlternateIdentifier
2447+ s += " <alternateIdentifier>" + dataset.id.stringify+ " </alternateIdentifier>\n "
2448+
2449+ // Format
2450+ s += " <format>application/zip</format>\n "
2451+
2452+ // Subject (R)
2453+ /**
2454+ * Subject, keyword, classification code, or key phrase describing the resource.
2455+ * Tags?
2456+ */
2457+
2458+ // RelatedIdentifier (R) - DOIs, ISBNs, URLs of related resources e.g. 'cited by XYZ'.
2459+ // Size - free text, e.g. "126 kb", "8 files", can have many
2460+ // Version
2461+ // Language - e.g. "en"
2462+ // Rights - free text e.g. "Creative Commons Attribution 3.0"
2463+ // GeoLocation (R)
2464+ // FundingReference
2465+
2466+ s += " </resource>"
23862467 Some (new ByteArrayInputStream (s.getBytes(" UTF-8" )))
23872468 }
23882469
0 commit comments