diff --git a/CHANGELOG.md b/CHANGELOG.md index d2b106501b9..2446755fbe7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added +- We implemented escaping of delimiters when parsing KeywordLists [#12810](https://github.com/JabRef/jabref/issues/12810) - We fixed an issue where "Print preview" would throw a `NullPointerException` if no printers were available. [#13708](https://github.com/JabRef/jabref/issues/13708) - We added the option to enable the language server in the preferences. [#13697](https://github.com/JabRef/jabref/pull/13697) - We introduced an option in Preferences under (under Linked files -> Linked file name conventions) to automatically rename linked files when an entry data changes. [#11316](https://github.com/JabRef/jabref/issues/11316) diff --git a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java index 55443db80ac..64fa3d2e706 100644 --- a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java @@ -842,7 +842,7 @@ public Optional putKeywords(KeywordList keywords, Character delimit } // Set new keyword field - String newValue = keywords.getAsString(delimiter); + String newValue = keywords.bibtexSerialize(delimiter); return this.setField(StandardField.KEYWORDS, newValue); } @@ -1051,9 +1051,7 @@ public KeywordList getFieldAsKeywords(Field field, Character keywordSeparator) { return storedList.get(); } } - - KeywordList keywords = getField(field) - .map(content -> KeywordList.parse(content, keywordSeparator)) + KeywordList keywords = getField(field).map(content -> KeywordList.parse(content, keywordSeparator)) .orElse(new KeywordList()); if (field instanceof StandardField standardField) { diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index d3df45becca..2d2a1839767 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -13,6 +13,7 @@ */ public class Keyword extends ChainNode implements Comparable { + // Note: {@link org.jabref.model.entry.KeywordList#parse(java.lang.String, java.lang.Character, java.lang.Character) offers configuration, which is not available here public static Character DEFAULT_HIERARCHICAL_DELIMITER = '>'; private final String keyword; @@ -84,9 +85,25 @@ private String getSubchainAsString(Character hierarchicalDelimiter) { .orElse(""); } + /* + * Used for BibTex export, where we need to escape the delimiter with \ + */ + public String getSubchainAsStringWithEscaping(Character delimiter) { + return getEscaped(delimiter) + + getChild().map(child -> " " + DEFAULT_HIERARCHICAL_DELIMITER + " " + child.getSubchainAsStringWithEscaping(DEFAULT_HIERARCHICAL_DELIMITER)) + .orElse(""); + } + + /* + * This ensures that delimiters within keyword values are not misinterpreted as separators. + */ + private String getEscaped(Character delimiter) { + return keyword.replace(delimiter.toString(), "\\" + delimiter); + } /** * Gets the keyword of this node in the chain. */ + public String get() { return keyword; } diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 76adb9add8d..1be493fc192 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -21,6 +21,7 @@ public class KeywordList implements Iterable { private final List keywordChains; + private boolean spaceAfterDelimiter; public KeywordList() { keywordChains = new ArrayList<>(); @@ -52,6 +53,52 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara Objects.requireNonNull(hierarchicalDelimiter); KeywordList keywordList = new KeywordList(); + List hierarchy = new ArrayList<>(); + StringBuilder currentToken = new StringBuilder(); + boolean isEscaping = false; + + keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " "); + + for (int i = 0; i < keywordString.length(); i++) { + char currentChar = keywordString.charAt(i); + + if (isEscaping && currentChar == delimiter) { // we only escape the keyword delimiter + currentToken.append(currentChar); + isEscaping = false; + } else if (currentChar == '\\') { + isEscaping = true; + } else if (currentChar == hierarchicalDelimiter) { + hierarchy.add(currentToken.toString().trim()); + currentToken.setLength(0); + } else if (currentChar == delimiter) { + hierarchy.add(currentToken.toString()); + currentToken.setLength(0); + keywordList.add(Keyword.of(hierarchy.toArray(new String[0]))); + hierarchy.clear(); + } else { + currentToken.append(currentChar); + } + } + + // Handle the final token + if (!currentToken.isEmpty() || !hierarchy.isEmpty()) { + hierarchy.add(currentToken.toString().trim()); + keywordList.add(Keyword.of(hierarchy.toArray(new String[0]))); + } + + return keywordList; + } + + public static KeywordList oldParse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { + if (StringUtil.isBlank(keywordString)) { + return new KeywordList(); + } + + Objects.requireNonNull(delimiter); + Objects.requireNonNull(hierarchicalDelimiter); + + KeywordList keywordList = new KeywordList(); + keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " "); StringTokenizer tok = new StringTokenizer(keywordString, delimiter.toString()); while (tok.hasMoreTokens()) { @@ -77,6 +124,13 @@ public static String serialize(List keywords, Character delimiter) { return keywords.stream().map(Keyword::get).collect(Collectors.joining(delimiter.toString())); } + // This method serializes Keywords supporting escaping of the delimiter for BibTeX Serialization (Issue #12810, #12532) + public String bibtexSerialize(Character delimiter) { + // If the keywords contain ", " (as in PubMed records) we keep the space. + String joiner = spaceAfterDelimiter ? delimiter + " " : delimiter.toString(); + return keywordChains.stream().map(keyword -> keyword.getSubchainAsStringWithEscaping(delimiter)).collect(Collectors.joining(joiner)); + } + public static KeywordList merge(String keywordStringA, String keywordStringB, Character delimiter) { KeywordList keywordListA = parse(keywordStringA, delimiter); KeywordList keywordListB = parse(keywordStringB, delimiter); diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 033d2c26615..5e53c5fe672 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -1,7 +1,12 @@ package org.jabref.model.entry; +import java.util.stream.Stream; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -16,6 +21,17 @@ void setUp() { keywords.add("keywordTwo"); } + private static Stream provideParseKeywordCases() { + return Stream.of( + Arguments.of("keyword\\,one, keywordTwo", new KeywordList("keyword,one", "keywordTwo")), + Arguments.of("keywordOne\\,, keywordTwo", new KeywordList("keywordOne,", "keywordTwo")), + Arguments.of("keyword\\\\, keywordTwo", new KeywordList("keyword\\", "keywordTwo")), + Arguments.of("keyword\\,one > sub", new KeywordList(Keyword.of("keyword,one", "sub"))), + Arguments.of("one\\,two\\,three, four", new KeywordList("one,two,three", "four")), + Arguments.of("keywordOne\\\\", new KeywordList("keywordOne\\")) + ); + } + @Test void parseEmptyStringReturnsEmptyList() { assertEquals(new KeywordList(), KeywordList.parse("", ',')); @@ -115,4 +131,21 @@ void mergeTwoDistinctKeywordsShouldReturnTheTwoKeywordsMerged() { void mergeTwoListsOfKeywordsShouldReturnTheKeywordsMerged() { assertEquals(new KeywordList("Figma", "Adobe", "JabRef", "Eclipse", "JetBrains"), KeywordList.merge("Figma, Adobe, JetBrains, Eclipse", "Adobe, JabRef", ',')); } + + @ParameterizedTest + @MethodSource("provideParseKeywordCases") + void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordList expected) { + assertEquals(expected, KeywordList.parse(input, ',', '>')); + } + + // TODO: We need to redefine the roundtrip test depending on the context GUI or BibTex, + // we want the user to type in escaping character but see the "clean" String as in: + // keyword1\,keyword2, keyword3 --> "keyword1,keyword2", "keyword3" + @ParameterizedTest + @MethodSource("provideParseKeywordCases") + void roundTripPreservesStructure(String original) { + KeywordList parsed = KeywordList.oldParse(original, ',', '>'); + // We need to test the toString() functionality + assertEquals(original, parsed.bibtexSerialize(',')); + } } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java index 2651ba514f8..c6050a879ba 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java @@ -2,13 +2,28 @@ import java.util.HashSet; import java.util.Set; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; class KeywordTest { + private static Stream provideParseKeywordCases() { + return Stream.of( + Arguments.of("keyword\\,one"), + Arguments.of("keywordOne\\,"), + Arguments.of("keyword\\\\"), + Arguments.of("keyword\\,one > sub"), + Arguments.of("one\\,two > three"), + Arguments.of("keywordOne\\\\") + ); + } + @Test void getPathFromRootAsStringForSimpleChain() { Keyword keywordChain = Keyword.of("A", "B", "C"); @@ -25,4 +40,12 @@ void getAllSubchainsAsStringForSimpleChain() { assertEquals(expected, keywordChain.getAllSubchainsAsString('>')); } + + @ParameterizedTest + @MethodSource("provideParseKeywordCases") + void getSubchainAsString(String input) { + Keyword keyword = KeywordList.parse(input, ',', '>').get(0); + // we are testing toString() functionality + assertEquals(input, keyword.toString()); + } }