diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c64e30e43c..d15fa930187 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - When relativizing file names, symlinks are now taken into account. [#12995](https://github.com/JabRef/jabref/issues/12995) - We added a new button for shortening the DOI near the DOI field in the general tab when viewing an entry. [#13639](https://github.com/JabRef/jabref/issues/13639) - We added support for finding CSL-Styles based on their short title (e.g. apa instead of "american psychological association"). [#13728](https://github.com/JabRef/jabref/pull/13728) +- We added BibLaTeX datamodel validation support in order to improve error message quality in entries' fields validation. [#13318](https://github.com/JabRef/jabref/issues/13318) ### Changed diff --git a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java index be859a5c054..65b6f0d7b5a 100644 --- a/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java +++ b/jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; @@ -19,52 +18,88 @@ * Parses the contents of a .blg (BibTeX log) file to extract warning messages. */ public class BibtexLogParser { - private static final Pattern WARNING_PATTERN = Pattern.compile("^Warning--(?[a-zA-Z ]+) in (?[^\\s]+)$"); + private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?[a-zA-Z ]+) in (?[^\\s]+)$"); + private static final Pattern BIBLATEX_WARNING_PATTERN = Pattern.compile( + "(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?[^']+)' \\((?[^)]+)\\): (?.+)"); + private static final String EMPTY_FIELD_PREFIX = "empty"; + private static final String INVALID_FIELD_PREFIX = "field '"; + private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '"; public List parseBiblog(@NonNull Path blgFilePath) throws IOException { - List warnings = new ArrayList<>(); - List lines = Files.readAllLines(blgFilePath); - for (String line : lines) { - Optional potentialWarning = parseWarningLine(line); - potentialWarning.ifPresent(warnings::add); - } - return warnings; + return Files.lines(blgFilePath) + .map(this::parseWarningLine) + .flatMap(Optional::stream) + .toList(); } - /** - * Parses a single line from the .blg file to identify a warning. - *

- * Currently supports parsing warnings of the format: - *

-     * Warning--[message] in [entryKey]
-     * 
- * For example: {@code Warning--empty journal in Scholey_2013} - * - * @param line a single line from the .blg file - * @return an Optional containing a {@link BibWarning} if a match is found, or empty otherwise - */ - private Optional parseWarningLine(String line) { - // TODO: Support additional warning formats - Matcher matcher = WARNING_PATTERN.matcher(line); - if (!matcher.find()) { - return Optional.empty(); + /// Parses a single line from a .blg file to identify a warning. + /// + /// This method supports two warning formats: + /// + /// 1. **BibTeX Warnings:** Simple warnings from the legacy BibTeX backend. + /// `Warning--[message] in [entryKey]` + /// For example: `Warning--empty journal in Scholey_2013` + /// + /// 2. **BibLaTeX Datamodel Warnings:** Detailed warnings from the Biber backend, including datamodel validation issues. + /// `[Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]` + /// For example: `Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'` + /// + /// @param line The single line from the .blg file to parse. + /// + /// @returns An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise. + Optional parseWarningLine(String line) { + Matcher bibtexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); + if (bibtexMatcher.find()) { + String message = bibtexMatcher.group("message").trim(); + String entryKey = bibtexMatcher.group("entryKey"); + // Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal") + String fieldName = null; + if (message.startsWith(EMPTY_FIELD_PREFIX)) { + fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + + return Optional.of(new BibWarning( + SeverityType.WARNING, + message, + fieldName, + entryKey + )); } - String message = matcher.group("message").trim(); - String entryKey = matcher.group("entryKey"); - // Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal") - String fieldName = null; - if (message.startsWith(EMPTY_FIELD_PREFIX)) { - fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); - fieldName = FieldFactory.parseField(fieldName).getName(); + Matcher biblatexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line); + if (biblatexMatcher.find()) { + String message = biblatexMatcher.group("message").trim(); + String entryKey = biblatexMatcher.group("entryKey"); + String fieldName = null; + + // Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher") + String lowerCaseMessage = message.toLowerCase(); + if (lowerCaseMessage.contains(INVALID_FIELD_PREFIX)) { + int startIndex = lowerCaseMessage.indexOf(INVALID_FIELD_PREFIX) + INVALID_FIELD_PREFIX.length(); + int endIndex = lowerCaseMessage.indexOf('\'', startIndex); + if (endIndex != -1) { + fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + } else if (lowerCaseMessage.contains(MULTI_INVALID_FIELD_PREFIX)) { + int startIndex = lowerCaseMessage.indexOf(MULTI_INVALID_FIELD_PREFIX) + MULTI_INVALID_FIELD_PREFIX.length(); + int endIndex = lowerCaseMessage.indexOf('\'', startIndex); + if (endIndex != -1) { + fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim().split(",")[0].trim(); + fieldName = FieldFactory.parseField(fieldName).getName(); + } + } + + return Optional.of(new BibWarning( + SeverityType.WARNING, + message, + fieldName, + entryKey + )); } - return Optional.of(new BibWarning( - SeverityType.WARNING, - message, - fieldName, - entryKey - )); + return Optional.empty(); } } diff --git a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java index fdc2e06a0ec..5b4d3d0a2df 100644 --- a/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java +++ b/jablib/src/test/java/org/jabref/logic/biblog/BibtexLogParserTest.java @@ -3,12 +3,17 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; +import java.util.Optional; +import java.util.stream.Stream; import org.jabref.model.biblog.BibWarning; import org.jabref.model.biblog.SeverityType; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -31,4 +36,38 @@ void parsesWarningsFromResourceFileTest() throws IOException { new BibWarning(SeverityType.WARNING, "empty year", "year", "Tan_2021") ), warnings); } + + @ParameterizedTest + @MethodSource("biblatexValidationWarningsProvider") + void parsesBiblatexValidationWarnings(String warningLine, Optional expectedWarning) { + assertEquals(expectedWarning, parser.parseWarningLine(warningLine)); + } + + private static Stream biblatexValidationWarningsProvider() { + return Stream.of( + Arguments.of("[1124] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (chocolate.bib): Invalid field 'publisher' for entrytype 'article'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Corti_2009"))), + + Arguments.of("[1126] Biber.pm:131> WARN - Datamodel: article entry 'Parker_2006' (Chocolate.bib): Missing mandatory field - one of 'date, year' must be defined", + Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field - one of 'date, year' must be defined", "date", "Parker_2006"))), + + Arguments.of("[1127] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (Chocolate.bib): Missing mandatory field 'author'", + Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field 'author'", "author", "Corti_2009"))), + + Arguments.of("[1128] Biber.pm:131> WARN - Datamodel: article entry 'Cooper_2007' (Chocolate.bib): Invalid ISSN in value of field 'issn'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid ISSN in value of field 'issn'", "issn", "Cooper_2007"))), + + Arguments.of("[1129] Biber.pm:131> WARN - Datamodel: article entry 'Katz_2011' (Chocolate.bib): Invalid value of field 'volume' must be datatype 'integer' - ignoring field", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid value of field 'volume' must be datatype 'integer' - ignoring field", "volume", "Katz_2011"))), + + Arguments.of("WARN - Datamodel: article entry 'Keen_2001' (Chocolate.bib): Invalid field 'publisher' for entrytype 'article'", + Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Keen_2001"))), + + Arguments.of("WARN - Datamodel: article entry 'Macht_2007' (Chocolate.bib): Field 'groups' invalid in data model - ignoring", + Optional.of(new BibWarning(SeverityType.WARNING, "Field 'groups' invalid in data model - ignoring", "groups", "Macht_2007"))), + + Arguments.of("This is not a valid warning line", Optional.empty()), + Arguments.of("", Optional.empty()) + ); + } }