@@ -168,6 +168,9 @@ public class Metadata {
168168 // get the SITE URL base for applications
169169 private static String PM_EMAIL = DoeServletContextListener .getConfigurationProperty ("project.manager.email" );
170170
171+ // set pattern for DOI normalization
172+ private static final Pattern DOI_TRIM_PATTERN = Pattern .compile ("(10.\\ d{4,9}\\ /[-._;()\\ /:A-Za-z0-9]+)$" );
173+
171174 // create and start a ConnectorFactory for use by "autopopulate" service
172175 static {
173176 try {
@@ -2145,19 +2148,50 @@ private void sendToOsti(EntityManager em, DOECodeMetadata md) throws IOException
21452148 *
21462149 * @param md the Metadata to evaluate
21472150 */
2148- private void removeRiDups (DOECodeMetadata md ) {
2149- // remove RI duplicates
2151+ private void normalizeRelatedIdentifiers (DOECodeMetadata md ) {
21502152 List <RelatedIdentifier > currentList = md .getRelatedIdentifiers ();
21512153
2152- if (currentList != null ) {
2153- Set <RelatedIdentifier > s = new HashSet <>();
2154+ // nothing to process
2155+ if (currentList == null || currentList .isEmpty ())
2156+ return ;
2157+
2158+ // trim DOI values
2159+ for (RelatedIdentifier ri : currentList )
2160+ if (RelatedIdentifier .Type .DOI .equals (ri .getIdentifierType ()))
2161+ ri .setIdentifierValue (trimDoi (ri .getIdentifierValue ()));
21542162
2155- s .addAll (currentList );
2156- currentList .clear ();
2157- currentList .addAll (s );
2163+ // remove RI duplicates
2164+ Set <RelatedIdentifier > s = new HashSet <>();
2165+ s .addAll (currentList );
2166+ currentList .clear ();
2167+ currentList .addAll (s );
2168+ md .setRelatedIdentifiers (currentList );
2169+ }
21582170
2159- md .setRelatedIdentifiers (currentList );
2171+ /**
2172+ * Trim away unneeded DOI prefixes, etc.
2173+ *
2174+ * @param doi the DOI to trim
2175+ */
2176+ private String trimDoi (String doi ) {
2177+ // trim DOI down to 10.* variation
2178+ if (!StringUtils .isBlank (doi )) {
2179+ doi = doi .trim ();
2180+ Matcher m = DOI_TRIM_PATTERN .matcher (doi );
2181+ if (m .find ())
2182+ doi = m .group (1 );
21602183 }
2184+ return doi ;
2185+ }
2186+
2187+ /**
2188+ * Normalize any DOI information.
2189+ *
2190+ * @param md the Metadata to evaluate
2191+ */
2192+ private void normalizeDois (DOECodeMetadata md ) {
2193+ // trim main DOI
2194+ md .setDoi (trimDoi (md .getDoi ()));
21612195 }
21622196
21632197 /**
@@ -2166,7 +2200,8 @@ private void removeRiDups(DOECodeMetadata md) {
21662200 * @param md the Metadata to normalize
21672201 */
21682202 private void performDataNormalization (DOECodeMetadata md ) {
2169- removeRiDups (md );
2203+ normalizeDois (md );
2204+ normalizeRelatedIdentifiers (md );
21702205 }
21712206
21722207 /**
0 commit comments