@@ -170,6 +170,7 @@ public class Metadata {
170170
171171 // set pattern for DOI normalization
172172 private static final Pattern DOI_TRIM_PATTERN = Pattern .compile ("(10.\\ d{4,9}\\ /[-._;()\\ /:A-Za-z0-9]+)$" );
173+ private static final Pattern URL_TRIM_PATTERN = Pattern .compile ("^(.*)(?<!\\ /)\\ /?$" );
173174
174175 // create and start a ConnectorFactory for use by "autopopulate" service
175176 static {
@@ -2144,7 +2145,7 @@ private void sendToOsti(EntityManager em, DOECodeMetadata md) throws IOException
21442145 }
21452146
21462147 /**
2147- * Remove duplicate RI entries from metadata .
2148+ * Remove duplicate RI entries and normalize values .
21482149 *
21492150 * @param md the Metadata to evaluate
21502151 */
@@ -2155,10 +2156,12 @@ private void normalizeRelatedIdentifiers(DOECodeMetadata md) {
21552156 if (currentList == null || currentList .isEmpty ())
21562157 return ;
21572158
2158- // trim DOI values
2159+ // trim DOI and URL values
21592160 for (RelatedIdentifier ri : currentList )
21602161 if (RelatedIdentifier .Type .DOI .equals (ri .getIdentifierType ()))
21612162 ri .setIdentifierValue (trimDoi (ri .getIdentifierValue ()));
2163+ else if (RelatedIdentifier .Type .URL .equals (ri .getIdentifierType ()))
2164+ ri .setIdentifierValue (trimUrl (ri .getIdentifierValue ()));
21622165
21632166 // remove RI duplicates
21642167 Set <RelatedIdentifier > s = new HashSet <>();
@@ -2185,13 +2188,35 @@ private String trimDoi(String doi) {
21852188 }
21862189
21872190 /**
2188- * Normalize any DOI information.
2191+ * Trim away unneeded URL characters, etc.
2192+ *
2193+ * @param url the URL to trim
2194+ */
2195+ private String trimUrl (String url ) {
2196+ // remove extra spaces and single trailing slash, if exist
2197+ if (!StringUtils .isBlank (url )) {
2198+ url = url .trim ();
2199+ Matcher m = URL_TRIM_PATTERN .matcher (url );
2200+ if (m .find ())
2201+ url = m .group (1 );
2202+ }
2203+ return url ;
2204+ }
2205+
2206+ /**
2207+ * Normalize metadata information.
21892208 *
21902209 * @param md the Metadata to evaluate
21912210 */
2192- private void normalizeDois (DOECodeMetadata md ) {
2211+ private void normalizeMetadata (DOECodeMetadata md ) {
21932212 // trim main DOI
21942213 md .setDoi (trimDoi (md .getDoi ()));
2214+
2215+ // trim main URLs
2216+ md .setRepositoryLink (trimUrl (md .getRepositoryLink ()));
2217+ md .setLandingPage (trimUrl (md .getLandingPage ()));
2218+ md .setProprietaryUrl (trimUrl (md .getProprietaryUrl ()));
2219+ md .setDocumentationUrl (trimUrl (md .getDocumentationUrl ()));
21952220 }
21962221
21972222 /**
@@ -2200,7 +2225,7 @@ private void normalizeDois(DOECodeMetadata md) {
22002225 * @param md the Metadata to normalize
22012226 */
22022227 private void performDataNormalization (DOECodeMetadata md ) {
2203- normalizeDois (md );
2228+ normalizeMetadata (md );
22042229 normalizeRelatedIdentifiers (md );
22052230 }
22062231
0 commit comments