From b53573166decf0e1acdf5967e7083a41f7e05cc0 Mon Sep 17 00:00:00 2001 From: timurturbil Date: Thu, 14 Aug 2025 12:32:38 +0300 Subject: [PATCH 1/7] support BibLaTeX datamodel validations --- .../jabref/logic/biblog/BibtexLogParser.java | 96 ++++++++++++++----- .../logic/biblog/BibtexLogParserTest.java | 39 ++++++++ 2 files changed, 109 insertions(+), 26 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index be859a5c054..5c0e357f7b8 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -19,8 +19,13 @@ * Parses the contents of a .blg (BibTeX log) file to extract warning messages. */ public class BibtexLogParser { - private static final Pattern WARNING_PATTERN = Pattern.compile("^Warning--(?[a-zA-Z ]+) in (?[^\\s]+)$"); + private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?[a-zA-Z ]+) in (?[^\\s]+)$"); + private static final Pattern BIBLATEXT_WARNING_PATTERN = Pattern.compile( + "(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?[^']+)' \\((?[^)]+)\\): (?.+)"); + private static final String EMPTY_FIELD_PREFIX = "empty"; + private static final String INVALID_FIELD_PREFIX = "field '"; + private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '"; public List parseBiblog(@NonNull Path blgFilePath) throws IOException { List warnings = new ArrayList<>(); @@ -33,38 +38,77 @@ public List parseBiblog(@NonNull Path blgFilePath) throws IOExceptio } /** - * Parses a single line from the .blg file to identify a warning. + * Parses a single line from a .blg file to identify a warning. *

- * Currently supports parsing warnings of the format: - *

-     * Warning--[message] in [entryKey]
-     * 
+ * This method supports two warning formats: + *
    + *
  1. BibTeX Warnings: Simple warnings from the legacy BibTeX backend. + *
    {@code Warning--[message] in [entryKey]}
    * For example: {@code Warning--empty journal in Scholey_2013} + *
  2. + *
  3. BibLaTeX Datamodel Warnings: Detailed warnings from the Biber backend, including datamodel validation issues. + *
    {@code [Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]}
    + * For example: {@code Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'} + *
  4. + *
* - * @param line a single line from the .blg file - * @return an Optional containing a {@link BibWarning} if a match is found, or empty otherwise + * @param line The single line from the .blg file to parse. + * @return An {@link Optional} containing a {@link BibWarning} if a match is found, or an empty {@code Optional} otherwise. */ - private Optional parseWarningLine(String line) { - // TODO: Support additional warning formats - Matcher matcher = WARNING_PATTERN.matcher(line); - if (!matcher.find()) { - return Optional.empty(); + Optional parseWarningLine(String line) { + // For BibTeX warnings + Matcher bibTexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); + if (bibTexMatcher.find()) { + String message = bibTexMatcher.group("message").trim(); + String entryKey = bibTexMatcher.group("entryKey"); + // Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal") + String fieldName = null; + if (message.startsWith(EMPTY_FIELD_PREFIX)) { + fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + + return Optional.of(new BibWarning( + SeverityType.WARNING, + message, + fieldName, + entryKey + )); } - String message = matcher.group("message").trim(); - String entryKey = matcher.group("entryKey"); - // Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal") - String fieldName = null; - if (message.startsWith(EMPTY_FIELD_PREFIX)) { - fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); - fieldName = FieldFactory.parseField(fieldName).getName(); + // For BiblaTex warnings + Matcher biblaTexMatcher = BIBLATEXT_WARNING_PATTERN.matcher(line); + if (biblaTexMatcher.find()) { + String message = biblaTexMatcher.group("message").trim(); + String entryKey = biblaTexMatcher.group("entryKey"); + String fieldName = null; + + // Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher") + String lowerCaseMessage = message.toLowerCase(); + if (lowerCaseMessage.contains(INVALID_FIELD_PREFIX)) { + int startIndex = lowerCaseMessage.indexOf(INVALID_FIELD_PREFIX) + INVALID_FIELD_PREFIX.length(); + int endIndex = lowerCaseMessage.indexOf('\'', startIndex); + if (endIndex != -1) { + fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + } else if (lowerCaseMessage.contains(MULTI_INVALID_FIELD_PREFIX)) { + int startIndex = lowerCaseMessage.indexOf(MULTI_INVALID_FIELD_PREFIX) + MULTI_INVALID_FIELD_PREFIX.length(); + int endIndex = lowerCaseMessage.indexOf('\'', startIndex); + if (endIndex != -1) { + fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim().split(",")[0].trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + } + + return Optional.of(new BibWarning( + SeverityType.WARNING, + message, + fieldName, + entryKey + )); } - return Optional.of(new BibWarning( - SeverityType.WARNING, - message, - fieldName, - entryKey - )); + return Optional.empty(); } } diff --git a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java index fdc2e06a0ec..cd156a32cf5 100644 --- a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java +++ b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java @@ -3,12 +3,17 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; +import java.util.Optional; +import java.util.stream.Stream; import org.jabref.model.biblog.BibWarning; import org.jabref.model.biblog.SeverityType; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -31,4 +36,38 @@ void parsesWarningsFromResourceFileTest() throws IOException { new BibWarning(SeverityType.WARNING, "empty year", "year", "Tan_2021") ), warnings); } + + @ParameterizedTest + @MethodSource("biblaTexValidationWarningsProvider") + void parsesBiblaTexValidationWarnings(String warningLine, Optional expectedWarning) { + assertEquals(expectedWarning, parser.parseWarningLine(warningLine)); + } + + private static Stream biblaTexValidationWarningsProvider() { + return Stream.of( + Arguments.of("[1124] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (chocolate.bib): Invalid field 'publisher' for entrytype 'article'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Corti_2009"))), + + Arguments.of("[1126] Biber.pm:131> WARN - Datamodel: article entry 'Parker_2006' (Chocolate.bib): Missing mandatory field - one of 'date, year' must be defined", + Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field - one of 'date, year' must be defined", "date", "Parker_2006"))), + + Arguments.of("[1127] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (Chocolate.bib): Missing mandatory field 'author'", + Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field 'author'", "author", "Corti_2009"))), + + Arguments.of("[1128] Biber.pm:131> WARN - Datamodel: article entry 'Cooper_2007' (Chocolate.bib): Invalid ISSN in value of field 'issn'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid ISSN in value of field 'issn'", "issn", "Cooper_2007"))), + + Arguments.of("[1129] Biber.pm:131> WARN - Datamodel: article entry 'Katz_2011' (Chocolate.bib): Invalid value of field 'volume' must be datatype 'integer' - ignoring field", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid value of field 'volume' must be datatype 'integer' - ignoring field", "volume", "Katz_2011"))), + + Arguments.of("WARN - Datamodel: article entry 'Keen_2001' (Chocolate.bib): Invalid field 'publisher' for entrytype 'article'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Keen_2001"))), + + Arguments.of("WARN - Datamodel: article entry 'Macht_2007' (Chocolate.bib): Field 'groups' invalid in data model - ignoring", + Optional.of(new BibWarning(SeverityType.WARNING, "Field 'groups' invalid in data model - ignoring", "groups", "Macht_2007"))), + + Arguments.of("This is not a valid warning line", Optional.empty()), + Arguments.of("", Optional.empty()) + ); + } } From 073d9d3236f4a10367453a5fdaf24058c0f602a5 Mon Sep 17 00:00:00 2001 From: timurturbil Date: Thu, 14 Aug 2025 13:04:02 +0300 Subject: [PATCH 2/7] correct typo --- .../main/java/org/jabref/logic/biblog/BibtexLogParser.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index 5c0e357f7b8..13ef84b090a 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -20,7 +20,7 @@ */ public class BibtexLogParser { private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?[a-zA-Z ]+) in (?[^\\s]+)$"); - private static final Pattern BIBLATEXT_WARNING_PATTERN = Pattern.compile( + private static final Pattern BIBLATEX_WARNING_PATTERN = Pattern.compile( "(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?[^']+)' \\((?[^)]+)\\): (?.+)"); private static final String EMPTY_FIELD_PREFIX = "empty"; @@ -77,7 +77,7 @@ Optional parseWarningLine(String line) { } // For BiblaTex warnings - Matcher biblaTexMatcher = BIBLATEXT_WARNING_PATTERN.matcher(line); + Matcher biblaTexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line); if (biblaTexMatcher.find()) { String message = biblaTexMatcher.group("message").trim(); String entryKey = biblaTexMatcher.group("entryKey"); From 0b84b9f32c9761fe1dec24d4671288c28397cc8c Mon Sep 17 00:00:00 2001 From: timurturbil Date: Thu, 14 Aug 2025 14:50:22 +0300 Subject: [PATCH 3/7] change JavaDoc comments to markdown style and use lowercase for biblatex and bibtex in variable names instead of camel case. --- .../jabref/logic/biblog/BibtexLogParser.java | 53 +++++++++---------- .../logic/biblog/BibtexLogParserTest.java | 6 +-- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index 13ef84b090a..bce0a06b249 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -37,30 +37,28 @@ public List parseBiblog(@NonNull Path blgFilePath) throws IOExceptio return warnings; } - /** - * Parses a single line from a .blg file to identify a warning. - *

- * This method supports two warning formats: - *

    - *
  1. BibTeX Warnings: Simple warnings from the legacy BibTeX backend. - *
    {@code Warning--[message] in [entryKey]}
    - * For example: {@code Warning--empty journal in Scholey_2013} - *
  2. - *
  3. BibLaTeX Datamodel Warnings: Detailed warnings from the Biber backend, including datamodel validation issues. - *
    {@code [Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]}
    - * For example: {@code Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'} - *
  4. - *
- * - * @param line The single line from the .blg file to parse. - * @return An {@link Optional} containing a {@link BibWarning} if a match is found, or an empty {@code Optional} otherwise. - */ + /// Parses a single line from a .blg file to identify a warning. + /// + /// This method supports two warning formats: + /// + /// 1. **BibTeX Warnings:** Simple warnings from the legacy BibTeX backend. + /// `Warning--[message] in [entryKey]` + /// For example: `Warning--empty journal in Scholey_2013` + /// + /// 2. **BibLaTeX Datamodel Warnings:** Detailed warnings from the Biber backend, including datamodel validation issues. + /// `[Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]` + /// For example: `Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'` + /// + /// **Parameters:** + /// * `line` - The single line from the .blg file to parse. + /// + /// **Returns:** + /// * An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise. Optional parseWarningLine(String line) { - // For BibTeX warnings - Matcher bibTexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); - if (bibTexMatcher.find()) { - String message = bibTexMatcher.group("message").trim(); - String entryKey = bibTexMatcher.group("entryKey"); + Matcher bibtexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); + if (bibtexMatcher.find()) { + String message = bibtexMatcher.group("message").trim(); + String entryKey = bibtexMatcher.group("entryKey"); // Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal") String fieldName = null; if (message.startsWith(EMPTY_FIELD_PREFIX)) { @@ -76,11 +74,10 @@ Optional parseWarningLine(String line) { )); } - // For BiblaTex warnings - Matcher biblaTexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line); - if (biblaTexMatcher.find()) { - String message = biblaTexMatcher.group("message").trim(); - String entryKey = biblaTexMatcher.group("entryKey"); + Matcher biblatexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line); + if (biblatexMatcher.find()) { + String message = biblatexMatcher.group("message").trim(); + String entryKey = biblatexMatcher.group("entryKey"); String fieldName = null; // Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher") diff --git a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java index cd156a32cf5..5b4d3d0a2df 100644 --- a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java +++ b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java @@ -38,12 +38,12 @@ void parsesWarningsFromResourceFileTest() throws IOException { } @ParameterizedTest - @MethodSource("biblaTexValidationWarningsProvider") - void parsesBiblaTexValidationWarnings(String warningLine, Optional expectedWarning) { + @MethodSource("biblatexValidationWarningsProvider") + void parsesBiblatexValidationWarnings(String warningLine, Optional expectedWarning) { assertEquals(expectedWarning, parser.parseWarningLine(warningLine)); } - private static Stream biblaTexValidationWarningsProvider() { + private static Stream biblatexValidationWarningsProvider() { return Stream.of( Arguments.of("[1124] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (chocolate.bib): Invalid field 'publisher' for entrytype 'article'", Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Corti_2009"))), From 4ff4893e7b233c524f9b46d4541fdf0e1bbd95e4 Mon Sep 17 00:00:00 2001 From: timurturbil Date: Wed, 20 Aug 2025 12:30:15 +0300 Subject: [PATCH 4/7] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca43060f94d..d399eec8ca2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - In case no citation relation information can be fetched, we show the data providers reason. [#13549](https://github.com/JabRef/jabref/pull/13549) - When relativizing file names, symlinks are now taken into account. [#12995](https://github.com/JabRef/jabref/issues/12995) - We added a new button for shortening the DOI near the DOI field in the general tab when viewing an entry. [#13639](https://github.com/JabRef/jabref/issues/13639) +- We added BibLaTeX datamodel validation support in order to improve error quality for entries' validation fields. [#13318](https://github.com/JabRef/jabref/issues/13318) ### Changed From f5f6bedeabc9e88e15c12fe522f983fff0d055b0 Mon Sep 17 00:00:00 2001 From: timurturbil Date: Wed, 20 Aug 2025 12:34:35 +0300 Subject: [PATCH 5/7] update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d399eec8ca2..6ad95d2558c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - In case no citation relation information can be fetched, we show the data providers reason. [#13549](https://github.com/JabRef/jabref/pull/13549) - When relativizing file names, symlinks are now taken into account. [#12995](https://github.com/JabRef/jabref/issues/12995) - We added a new button for shortening the DOI near the DOI field in the general tab when viewing an entry. [#13639](https://github.com/JabRef/jabref/issues/13639) -- We added BibLaTeX datamodel validation support in order to improve error quality for entries' validation fields. [#13318](https://github.com/JabRef/jabref/issues/13318) +- We added BibLaTeX datamodel validation support in order to improve error message quality in entries' fields validation. [#13318](https://github.com/JabRef/jabref/issues/13318) ### Changed From 0c627f580b50605a58d8fdd9da1a8e0f2fb1868a Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Thu, 28 Aug 2025 14:27:21 +0200 Subject: [PATCH 6/7] Refine parsing logic --- .../org/jabref/logic/biblog/BibtexLogParser.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index bce0a06b249..dc547d418f3 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; @@ -28,13 +27,10 @@ public class BibtexLogParser { private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '"; public List parseBiblog(@NonNull Path blgFilePath) throws IOException { - List warnings = new ArrayList<>(); - List lines = Files.readAllLines(blgFilePath); - for (String line : lines) { - Optional potentialWarning = parseWarningLine(line); - potentialWarning.ifPresent(warnings::add); - } - return warnings; + return Files.lines(blgFilePath) + .map(this::parseWarningLine) + .flatMap(Optional::stream) + .toList(); } /// Parses a single line from a .blg file to identify a warning. From 7ee34970292e7a9330bc8d5c722b727605fbe804 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Thu, 28 Aug 2025 14:29:30 +0200 Subject: [PATCH 7/7] Fix JavaDoc comment syntax --- .../main/java/org/jabref/logic/biblog/BibtexLogParser.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index dc547d418f3..65b6f0d7b5a 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -45,11 +45,9 @@ public List parseBiblog(@NonNull Path blgFilePath) throws IOExceptio /// `[Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]` /// For example: `Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'` /// - /// **Parameters:** - /// * `line` - The single line from the .blg file to parse. + /// @param line The single line from the .blg file to parse. /// - /// **Returns:** - /// * An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise. + /// @returns An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise. Optional parseWarningLine(String line) { Matcher bibtexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); if (bibtexMatcher.find()) {