Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7b5505c
fixes #12810: Implement escaping for keyword separators (FKA #12888)
miguel-cordoba Jul 24, 2025
44257ad
fixes #12810: Implement escaping for keyword separators (FKA #12888)
miguel-cordoba Jul 24, 2025
e6c52dd
fixes #12810: Implement escaping for keyword separators (FKA #12888)
miguel-cordoba Jul 24, 2025
d2ad73d
removes obvious comment, improves CHANGELOG message
miguel-cordoba Jul 24, 2025
465955a
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 24, 2025
4928536
removes obvious comments
miguel-cordoba Jul 24, 2025
2ccc24e
Merge remote-tracking branch 'origin/fix-for-issue-12810' into fix-fo…
miguel-cordoba Jul 24, 2025
1111794
tackle List.of() review comment
miguel-cordoba Jul 24, 2025
83198c0
undo tackle List.of() review comment
miguel-cordoba Jul 24, 2025
cc5fa8b
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 25, 2025
87d1ff6
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 25, 2025
3d2785b
Merge branch 'main' into fix-for-issue-12810
koppor Jul 28, 2025
1b455d3
adds tests after review
miguel-cordoba Jul 29, 2025
2b85d0c
adds trag-bot changes
miguel-cordoba Jul 29, 2025
4b31cd0
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 29, 2025
d15f2ef
- extends Keyword#toString to ensure round-trip integrity
miguel-cordoba Jul 30, 2025
d77bb5e
Merge remote-tracking branch 'origin/fix-for-issue-12810' into fix-fo…
miguel-cordoba Jul 30, 2025
02c252d
- improves Changelog message
miguel-cordoba Jul 30, 2025
027dde0
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 30, 2025
ea690d6
- removes autoscaping on keyword#toString -> KeywordListTest#roundTri…
miguel-cordoba Jul 30, 2025
b6f728f
Merge remote-tracking branch 'origin/fix-for-issue-12810' into fix-fo…
miguel-cordoba Jul 30, 2025
a72b614
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Jul 31, 2025
4c5181b
- removes autoscaping on keyword#toString -> KeywordListTest#roundTri…
miguel-cordoba Aug 3, 2025
b02aeea
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 6, 2025
eef54b6
Merge remote-tracking branch 'origin/fix-for-issue-12810' into fix-fo…
miguel-cordoba Aug 6, 2025
ffeb78d
Working on new approach for parse/serielide depending on context (UI/…
miguel-cordoba Aug 6, 2025
1f321c2
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 10, 2025
3e2636f
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 13, 2025
634d302
Working on new approach for parse/serielide depending on context (UI/…
miguel-cordoba Aug 13, 2025
b2d902f
Merge remote-tracking branch 'origin/fix-for-issue-12810' into fix-fo…
miguel-cordoba Aug 13, 2025
42710fa
WIP: adds old KeyWordList#parse as #bibtexParse
miguel-cordoba Aug 13, 2025
4b2a8d8
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 13, 2025
a2031ed
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 24, 2025
af3c50e
WIP: adds KeywordList#bibtexSerialize (autoescapes delimiter). Needs …
miguel-cordoba Aug 24, 2025
e955fd1
WIP: extends KeywordList#parse and #bibtexSerialize
miguel-cordoba Aug 31, 2025
a3d69f2
Merge branch 'main' into fix-for-issue-12810
miguel-cordoba Aug 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv

### Added

- We implemented escaping of delimiters when parsing KeywordLists [#12810](https://github.com/JabRef/jabref/issues/12810)
- We fixed an issue where "Print preview" would throw a `NullPointerException` if no printers were available. [#13708](https://github.com/JabRef/jabref/issues/13708)
- We added the option to enable the language server in the preferences. [#13697](https://github.com/JabRef/jabref/pull/13697)
- We introduced an option in Preferences under (under Linked files -> Linked file name conventions) to automatically rename linked files when an entry data changes. [#11316](https://github.com/JabRef/jabref/issues/11316)
Expand Down
6 changes: 2 additions & 4 deletions jablib/src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ public Optional<FieldChange> putKeywords(KeywordList keywords, Character delimit
}

// Set new keyword field
String newValue = keywords.getAsString(delimiter);
String newValue = keywords.bibtexSerialize(delimiter);
return this.setField(StandardField.KEYWORDS, newValue);
}

Expand Down Expand Up @@ -1051,9 +1051,7 @@ public KeywordList getFieldAsKeywords(Field field, Character keywordSeparator) {
return storedList.get();
}
}

KeywordList keywords = getField(field)
.map(content -> KeywordList.parse(content, keywordSeparator))
KeywordList keywords = getField(field).map(content -> KeywordList.parse(content, keywordSeparator))
.orElse(new KeywordList());

if (field instanceof StandardField standardField) {
Expand Down
17 changes: 17 additions & 0 deletions jablib/src/main/java/org/jabref/model/entry/Keyword.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
public class Keyword extends ChainNode<Keyword> implements Comparable<Keyword> {

// Note: {@link org.jabref.model.entry.KeywordList#parse(java.lang.String, java.lang.Character, java.lang.Character) offers configuration, which is not available here
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment merely states what is visible from the code and doesn't provide additional value or reasoning. It should be removed or enhanced with actual implementation rationale.

public static Character DEFAULT_HIERARCHICAL_DELIMITER = '>';
private final String keyword;

Expand Down Expand Up @@ -84,9 +85,25 @@ private String getSubchainAsString(Character hierarchicalDelimiter) {
.orElse("");
}

/*
* Used for BibTex export, where we need to escape the delimiter with \
*/
Comment on lines +88 to +90
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spelling 'BibTex' in the comment is incorrect according to project standards. It should be 'BibTeX' for consistency in documentation and comments.

public String getSubchainAsStringWithEscaping(Character delimiter) {
return getEscaped(delimiter) +
getChild().map(child -> " " + DEFAULT_HIERARCHICAL_DELIMITER + " " + child.getSubchainAsStringWithEscaping(DEFAULT_HIERARCHICAL_DELIMITER))
.orElse("");
}

/*
* This ensures that delimiters within keyword values are not misinterpreted as separators.
*/
private String getEscaped(Character delimiter) {
return keyword.replace(delimiter.toString(), "\\" + delimiter);
}
/**
* Gets the keyword of this node in the chain.
*/

public String get() {
return keyword;
}
Expand Down
54 changes: 54 additions & 0 deletions jablib/src/main/java/org/jabref/model/entry/KeywordList.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
public class KeywordList implements Iterable<Keyword> {

private final List<Keyword> keywordChains;
private boolean spaceAfterDelimiter;

public KeywordList() {
keywordChains = new ArrayList<>();
Expand Down Expand Up @@ -52,6 +53,52 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara
Objects.requireNonNull(hierarchicalDelimiter);

KeywordList keywordList = new KeywordList();
List<String> hierarchy = new ArrayList<>();
StringBuilder currentToken = new StringBuilder();
boolean isEscaping = false;

keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " ");

for (int i = 0; i < keywordString.length(); i++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this is #12888 (comment) :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your review! As I dived into the project I saw that this loop had not been addressed. So I used @ungerts proposed comment, since it is readable and solves the issue. Am I missing something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is very OK. I like links to provide context.

Other reviewers might think: why a for loop etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooh ok perfect :)

char currentChar = keywordString.charAt(i);

if (isEscaping && currentChar == delimiter) { // we only escape the keyword delimiter
currentToken.append(currentChar);
isEscaping = false;
} else if (currentChar == '\\') {
isEscaping = true;
} else if (currentChar == hierarchicalDelimiter) {
hierarchy.add(currentToken.toString().trim());
currentToken.setLength(0);
} else if (currentChar == delimiter) {
hierarchy.add(currentToken.toString());
currentToken.setLength(0);
keywordList.add(Keyword.of(hierarchy.toArray(new String[0])));
hierarchy.clear();
} else {
currentToken.append(currentChar);
}
}

// Handle the final token
if (!currentToken.isEmpty() || !hierarchy.isEmpty()) {
hierarchy.add(currentToken.toString().trim());
keywordList.add(Keyword.of(hierarchy.toArray(new String[0])));
}

return keywordList;
}

public static KeywordList oldParse(String keywordString, Character delimiter, Character hierarchicalDelimiter) {
if (StringUtil.isBlank(keywordString)) {
return new KeywordList();
}

Objects.requireNonNull(delimiter);
Objects.requireNonNull(hierarchicalDelimiter);

KeywordList keywordList = new KeywordList();
keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " ");

StringTokenizer tok = new StringTokenizer(keywordString, delimiter.toString());
while (tok.hasMoreTokens()) {
Expand All @@ -77,6 +124,13 @@ public static String serialize(List<Keyword> keywords, Character delimiter) {
return keywords.stream().map(Keyword::get).collect(Collectors.joining(delimiter.toString()));
}

// This method serializes Keywords supporting escaping of the delimiter for BibTeX Serialization (Issue #12810, #12532)
public String bibtexSerialize(Character delimiter) {
// If the keywords contain ", " (as in PubMed records) we keep the space.
String joiner = spaceAfterDelimiter ? delimiter + " " : delimiter.toString();
return keywordChains.stream().map(keyword -> keyword.getSubchainAsStringWithEscaping(delimiter)).collect(Collectors.joining(joiner));
}

public static KeywordList merge(String keywordStringA, String keywordStringB, Character delimiter) {
KeywordList keywordListA = parse(keywordStringA, delimiter);
KeywordList keywordListB = parse(keywordStringB, delimiter);
Expand Down
33 changes: 33 additions & 0 deletions jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package org.jabref.model.entry;

import java.util.stream.Stream;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

Expand All @@ -16,6 +21,17 @@ void setUp() {
keywords.add("keywordTwo");
}

private static Stream<Arguments> provideParseKeywordCases() {
return Stream.of(
Arguments.of("keyword\\,one, keywordTwo", new KeywordList("keyword,one", "keywordTwo")),
Arguments.of("keywordOne\\,, keywordTwo", new KeywordList("keywordOne,", "keywordTwo")),
Arguments.of("keyword\\\\, keywordTwo", new KeywordList("keyword\\", "keywordTwo")),
Arguments.of("keyword\\,one > sub", new KeywordList(Keyword.of("keyword,one", "sub"))),
Arguments.of("one\\,two\\,three, four", new KeywordList("one,two,three", "four")),
Arguments.of("keywordOne\\\\", new KeywordList("keywordOne\\"))
);
}

@Test
void parseEmptyStringReturnsEmptyList() {
assertEquals(new KeywordList(), KeywordList.parse("", ','));
Expand Down Expand Up @@ -115,4 +131,21 @@ void mergeTwoDistinctKeywordsShouldReturnTheTwoKeywordsMerged() {
void mergeTwoListsOfKeywordsShouldReturnTheKeywordsMerged() {
assertEquals(new KeywordList("Figma", "Adobe", "JabRef", "Eclipse", "JetBrains"), KeywordList.merge("Figma, Adobe, JetBrains, Eclipse", "Adobe, JabRef", ','));
}

@ParameterizedTest
@MethodSource("provideParseKeywordCases")
void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordList expected) {
assertEquals(expected, KeywordList.parse(input, ',', '>'));
}

// TODO: We need to redefine the roundtrip test depending on the context GUI or BibTex,
// we want the user to type in escaping character but see the "clean" String as in:
// keyword1\,keyword2, keyword3 --> "keyword1,keyword2", "keyword3"
@ParameterizedTest
@MethodSource("provideParseKeywordCases")
void roundTripPreservesStructure(String original) {
KeywordList parsed = KeywordList.oldParse(original, ',', '>');
// We need to test the toString() functionality
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment restates what is obvious from the code and doesn't provide additional value. Such comments should be removed as they create maintenance overhead.

assertEquals(original, parsed.bibtexSerialize(','));
}
}
23 changes: 23 additions & 0 deletions jablib/src/test/java/org/jabref/model/entry/KeywordTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,28 @@

import java.util.HashSet;
import java.util.Set;
import java.util.stream.Stream;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

class KeywordTest {

private static Stream<Arguments> provideParseKeywordCases() {
return Stream.of(
Arguments.of("keyword\\,one"),
Arguments.of("keywordOne\\,"),
Arguments.of("keyword\\\\"),
Arguments.of("keyword\\,one > sub"),
Arguments.of("one\\,two > three"),
Arguments.of("keywordOne\\\\")
);
}

@Test
void getPathFromRootAsStringForSimpleChain() {
Keyword keywordChain = Keyword.of("A", "B", "C");
Expand All @@ -25,4 +40,12 @@ void getAllSubchainsAsStringForSimpleChain() {

assertEquals(expected, keywordChain.getAllSubchainsAsString('>'));
}

@ParameterizedTest
@MethodSource("provideParseKeywordCases")
void getSubchainAsString(String input) {
Keyword keyword = KeywordList.parse(input, ',', '>').get(0);
// we are testing toString() functionality
assertEquals(input, keyword.toString());
}
}
Loading