Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- When relativizing file names, symlinks are now taken into account. [#12995](https://github.com/JabRef/jabref/issues/12995)
- We added a new button for shortening the DOI near the DOI field in the general tab when viewing an entry. [#13639](https://github.com/JabRef/jabref/issues/13639)
- We added support for finding CSL-Styles based on their short title (e.g. apa instead of "american psychological association"). [#13728](https://github.com/JabRef/jabref/pull/13728)
- We added BibLaTeX datamodel validation support in order to improve error message quality in entries' fields validation. [#13318](https://github.com/JabRef/jabref/issues/13318)

### Changed

Expand Down
113 changes: 74 additions & 39 deletions jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
Expand All @@ -19,52 +18,88 @@
* Parses the contents of a .blg (BibTeX log) file to extract warning messages.
*/
public class BibtexLogParser {
private static final Pattern WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$");
private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$");
private static final Pattern BIBLATEX_WARNING_PATTERN = Pattern.compile(
"(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?<entryKey>[^']+)' \\((?<fileName>[^)]+)\\): (?<message>.+)");

private static final String EMPTY_FIELD_PREFIX = "empty";
private static final String INVALID_FIELD_PREFIX = "field '";
private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '";

public List<BibWarning> parseBiblog(@NonNull Path blgFilePath) throws IOException {
List<BibWarning> warnings = new ArrayList<>();
List<String> lines = Files.readAllLines(blgFilePath);
for (String line : lines) {
Optional<BibWarning> potentialWarning = parseWarningLine(line);
potentialWarning.ifPresent(warnings::add);
}
return warnings;
return Files.lines(blgFilePath)
.map(this::parseWarningLine)
.flatMap(Optional::stream)
.toList();
}

/**
* Parses a single line from the .blg file to identify a warning.
* <p>
* Currently supports parsing warnings of the format:
* <pre>
* Warning--[message] in [entryKey]
* </pre>
* For example: {@code Warning--empty journal in Scholey_2013}
*
* @param line a single line from the .blg file
* @return an Optional containing a {@link BibWarning} if a match is found, or empty otherwise
*/
private Optional<BibWarning> parseWarningLine(String line) {
// TODO: Support additional warning formats
Matcher matcher = WARNING_PATTERN.matcher(line);
if (!matcher.find()) {
return Optional.empty();
/// Parses a single line from a .blg file to identify a warning.
///
/// This method supports two warning formats:
///
/// 1. **BibTeX Warnings:** Simple warnings from the legacy BibTeX backend.
/// `Warning--[message] in [entryKey]`
/// For example: `Warning--empty journal in Scholey_2013`
///
/// 2. **BibLaTeX Datamodel Warnings:** Detailed warnings from the Biber backend, including datamodel validation issues.
/// `[Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]`
/// For example: `Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'`
///
/// @param line The single line from the .blg file to parse.
///
/// @returns An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise.
Optional<BibWarning> parseWarningLine(String line) {
Matcher bibtexMatcher = BIBTEX_WARNING_PATTERN.matcher(line);
if (bibtexMatcher.find()) {
String message = bibtexMatcher.group("message").trim();
String entryKey = bibtexMatcher.group("entryKey");
// Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal")
String fieldName = null;
if (message.startsWith(EMPTY_FIELD_PREFIX)) {
fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
}

String message = matcher.group("message").trim();
String entryKey = matcher.group("entryKey");
// Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal")
String fieldName = null;
if (message.startsWith(EMPTY_FIELD_PREFIX)) {
fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
Matcher biblatexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line);
if (biblatexMatcher.find()) {
String message = biblatexMatcher.group("message").trim();
String entryKey = biblatexMatcher.group("entryKey");
String fieldName = null;

// Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher")
String lowerCaseMessage = message.toLowerCase();
if (lowerCaseMessage.contains(INVALID_FIELD_PREFIX)) {
int startIndex = lowerCaseMessage.indexOf(INVALID_FIELD_PREFIX) + INVALID_FIELD_PREFIX.length();
int endIndex = lowerCaseMessage.indexOf('\'', startIndex);
if (endIndex != -1) {
fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}
} else if (lowerCaseMessage.contains(MULTI_INVALID_FIELD_PREFIX)) {
int startIndex = lowerCaseMessage.indexOf(MULTI_INVALID_FIELD_PREFIX) + MULTI_INVALID_FIELD_PREFIX.length();
int endIndex = lowerCaseMessage.indexOf('\'', startIndex);
if (endIndex != -1) {
fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim().split(",")[0].trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;

import org.jabref.model.biblog.BibWarning;
import org.jabref.model.biblog.SeverityType;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

Expand All @@ -31,4 +36,38 @@ void parsesWarningsFromResourceFileTest() throws IOException {
new BibWarning(SeverityType.WARNING, "empty year", "year", "Tan_2021")
), warnings);
}

@ParameterizedTest
@MethodSource("biblatexValidationWarningsProvider")
void parsesBiblatexValidationWarnings(String warningLine, Optional<BibWarning> expectedWarning) {
assertEquals(expectedWarning, parser.parseWarningLine(warningLine));
}

private static Stream<Arguments> biblatexValidationWarningsProvider() {
return Stream.of(
Arguments.of("[1124] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (chocolate.bib): Invalid field 'publisher' for entrytype 'article'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Corti_2009"))),

Arguments.of("[1126] Biber.pm:131> WARN - Datamodel: article entry 'Parker_2006' (Chocolate.bib): Missing mandatory field - one of 'date, year' must be defined",
Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field - one of 'date, year' must be defined", "date", "Parker_2006"))),

Arguments.of("[1127] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (Chocolate.bib): Missing mandatory field 'author'",
Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field 'author'", "author", "Corti_2009"))),

Arguments.of("[1128] Biber.pm:131> WARN - Datamodel: article entry 'Cooper_2007' (Chocolate.bib): Invalid ISSN in value of field 'issn'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid ISSN in value of field 'issn'", "issn", "Cooper_2007"))),

Arguments.of("[1129] Biber.pm:131> WARN - Datamodel: article entry 'Katz_2011' (Chocolate.bib): Invalid value of field 'volume' must be datatype 'integer' - ignoring field",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid value of field 'volume' must be datatype 'integer' - ignoring field", "volume", "Katz_2011"))),

Arguments.of("WARN - Datamodel: article entry 'Keen_2001' (Chocolate.bib): Invalid field 'publisher' for entrytype 'article'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Keen_2001"))),

Arguments.of("WARN - Datamodel: article entry 'Macht_2007' (Chocolate.bib): Field 'groups' invalid in data model - ignoring",
Optional.of(new BibWarning(SeverityType.WARNING, "Field 'groups' invalid in data model - ignoring", "groups", "Macht_2007"))),

Arguments.of("This is not a valid warning line", Optional.empty()),
Arguments.of("", Optional.empty())
);
}
}
Loading