Skip to content

Commit 70e53cd

Browse files
authored
Pre-compile regex patterns (#14192)
1 parent e1a9244 commit 70e53cd

File tree

4 files changed

+31
-14
lines changed

4 files changed

+31
-14
lines changed

jablib/src/main/java/org/jabref/logic/bst/BstPreviewLayout.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import java.nio.file.Files;
55
import java.nio.file.Path;
66
import java.util.List;
7+
import java.util.regex.Pattern;
78

89
import org.jabref.logic.cleanup.ConvertToBibtexCleanup;
910
import org.jabref.logic.formatter.bibtexfields.RemoveNewlinesFormatter;
@@ -23,6 +24,11 @@ public final class BstPreviewLayout implements PreviewLayout {
2324

2425
private static final Logger LOGGER = LoggerFactory.getLogger(BstPreviewLayout.class);
2526

27+
private static final Pattern COMMENT_PATTERN = Pattern.compile("%.*");
28+
private static final Pattern BIBITEM_PATTERN = Pattern.compile("\\\\bibitem[{].*[}]");
29+
private static final Pattern LATEX_COMMAND_PATTERN = Pattern.compile("(?m)^\\\\.*$");
30+
private static final Pattern MULTIPLE_SPACES_PATTERN = Pattern.compile(" +");
31+
2632
private final String name;
2733
private String source;
2834
private BstVM bstVM;
@@ -55,23 +61,23 @@ public String generatePreview(BibEntry originalEntry, BibDatabaseContext databas
5561
if (error != null) {
5662
return error;
5763
}
58-
// ensure that the entry is of BibTeX format (and do not modify the original entry)
64+
// Ensure that the entry is of BibTeX format (and do not modify the original entry)
5965
BibEntry entry = new BibEntry(originalEntry);
6066
new ConvertToBibtexCleanup().cleanup(entry);
6167
String result = bstVM.render(List.of(entry));
6268
// Remove all comments
63-
result = result.replaceAll("%.*", "");
69+
result = COMMENT_PATTERN.matcher(result).replaceAll("");
6470
// Remove all LaTeX comments
6571
// The RemoveLatexCommandsFormatter keeps the words inside latex environments. Therefore, we remove them manually
6672
result = result.replace("\\begin{thebibliography}{1}", "");
6773
result = result.replace("\\end{thebibliography}", "");
6874
// The RemoveLatexCommandsFormatter keeps the word inside the latex command, but we want to remove that completely
69-
result = result.replaceAll("\\\\bibitem[{].*[}]", "");
75+
result = BIBITEM_PATTERN.matcher(result).replaceAll("");
7076
// We want to replace \newblock by a space instead of completely removing it
7177
result = result.replace("\\newblock", " ");
72-
// remove all latex commands statements - assumption: command in a separate line
73-
result = result.replaceAll("(?m)^\\\\.*$", "");
74-
// remove some IEEEtran.bst output (resulting from a multiline \providecommand)
78+
// Remove all latex commands statements - assumption: command in a separate line
79+
result = LATEX_COMMAND_PATTERN.matcher(result).replaceAll("");
80+
// Remove some IEEEtran.bst output (resulting from a multiline \providecommand)
7581
result = result.replace("#2}}", "");
7682
// Have quotes right - and more
7783
result = new LatexToUnicodeFormatter().format(result);
@@ -81,7 +87,7 @@ public String generatePreview(BibEntry originalEntry, BibDatabaseContext databas
8187
result = new RemoveNewlinesFormatter().format(result);
8288
result = new RemoveLatexCommandsFormatter().format(result);
8389
result = new RemoveTilde().format(result);
84-
result = result.trim().replaceAll(" +", " ");
90+
result = MULTIPLE_SPACES_PATTERN.matcher(result.trim()).replaceAll(" ");
8591
return result;
8692
}
8793

jablib/src/main/java/org/jabref/logic/formatter/bibtexfields/HtmlToUnicodeFormatter.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.jabref.logic.formatter.bibtexfields;
22

3+
import java.util.regex.Pattern;
4+
35
import org.jabref.architecture.AllowedToUseApacheCommonsLang3;
46
import org.jabref.logic.cleanup.Formatter;
57
import org.jabref.logic.l10n.Localization;
@@ -11,6 +13,8 @@
1113
@AllowedToUseApacheCommonsLang3("There is no equivalent in Google's Guava")
1214
public class HtmlToUnicodeFormatter extends Formatter implements LayoutFormatter {
1315

16+
private static final Pattern HTML_TAGS_PATTERN = Pattern.compile("<[^>]*>");
17+
1418
@Override
1519
public String getName() {
1620
return Localization.lang("HTML to Unicode");
@@ -34,6 +38,7 @@ public String getExampleInput() {
3438
@Override
3539
public String format(@NonNull String fieldText) {
3640
// StringEscapeUtils converts characters and regex kills tags
37-
return StringEscapeUtils.unescapeHtml4(fieldText).replaceAll("<[^>]*>", "");
41+
String plainText = StringEscapeUtils.unescapeHtml4(fieldText);
42+
return HTML_TAGS_PATTERN.matcher(plainText).replaceAll("");
3843
}
3944
}

jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.util.ArrayList;
88
import java.util.List;
99
import java.util.Optional;
10+
import java.util.regex.Pattern;
1011

1112
import org.jabref.logic.cleanup.FieldFormatterCleanup;
1213
import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter;
@@ -36,6 +37,9 @@
3637
public class EuropePmcFetcher implements IdBasedParserFetcher, SearchBasedParserFetcher {
3738
private static final Logger LOGGER = LoggerFactory.getLogger(EuropePmcFetcher.class);
3839

40+
// Prefer a full ISO date if provided
41+
private static final Pattern DATE_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
42+
3943
@Override
4044
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException {
4145
return new URI("https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=" + identifier + "&resultType=core&format=json").toURL();
@@ -101,13 +105,12 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
101105
if (result.has("journalInfo") && result.getJSONObject("journalInfo").has("issn")) {
102106
entry.setField(StandardField.ISSN, result.getJSONObject("journalInfo").getString("issn"));
103107
}
104-
// Prefer a full ISO date if provided
105-
final String datePattern = "\\d{4}-\\d{2}-\\d{2}";
108+
106109
String printPubDate = result.optString("printPublicationDate");
107110
String dateOfPublication = result.optString("dateOfPublication");
108-
if (printPubDate != null && printPubDate.matches(datePattern)) {
111+
if (printPubDate != null && DATE_PATTERN.matcher(printPubDate).matches()) {
109112
entry.setField(StandardField.DATE, printPubDate);
110-
} else if (dateOfPublication != null && dateOfPublication.matches(datePattern)) {
113+
} else if (dateOfPublication != null && DATE_PATTERN.matcher(dateOfPublication).matches()) {
111114
entry.setField(StandardField.DATE, dateOfPublication);
112115
}
113116

jablib/src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.ArrayList;
1111
import java.util.List;
1212
import java.util.Optional;
13+
import java.util.regex.Pattern;
1314
import java.util.stream.Collectors;
1415

1516
import org.jabref.logic.importer.FetcherException;
@@ -43,6 +44,8 @@ public class JstorFetcher implements SearchBasedParserFetcher, FulltextFetcher,
4344
private static final String CITE_HOST = HOST + "/citation/text/";
4445
private static final String URL_QUERY_REGEX = "(?<=\\?).*";
4546

47+
private static final Pattern URL_QUERY_PATTERN = Pattern.compile(URL_QUERY_REGEX);
48+
4649
private final ImportFormatPreferences importFormatPreferences;
4750

4851
public JstorFetcher(ImportFormatPreferences importFormatPreferences) {
@@ -63,10 +66,10 @@ public URL getUrlForIdentifier(String identifier) throws MalformedURLException {
6366
identifier = identifier.replace("https://www.jstor.org/stable", "");
6467
identifier = identifier.replace("http://www.jstor.org/stable", "");
6568
}
66-
identifier = identifier.replaceAll(URL_QUERY_REGEX, "");
69+
identifier = URL_QUERY_PATTERN.matcher(identifier).replaceAll("");
6770

6871
if (identifier.contains("/")) {
69-
// if identifier links to a entry with a valid doi
72+
// if identifier links to an entry with a valid doi
7073
return URLUtil.create(start + identifier);
7174
}
7275
// else use default doi start.

0 commit comments

Comments
 (0)