Skip to content
96 changes: 70 additions & 26 deletions jablib/src/main/java/org/jabref/logic/biblog/BibtexLogParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@
* Parses the contents of a .blg (BibTeX log) file to extract warning messages.
*/
public class BibtexLogParser {
private static final Pattern WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$");
private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$");
private static final Pattern BIBLATEX_WARNING_PATTERN = Pattern.compile(
"(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?<entryKey>[^']+)' \\((?<fileName>[^)]+)\\): (?<message>.+)");

private static final String EMPTY_FIELD_PREFIX = "empty";
private static final String INVALID_FIELD_PREFIX = "field '";
private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '";

public List<BibWarning> parseBiblog(@NonNull Path blgFilePath) throws IOException {
List<BibWarning> warnings = new ArrayList<>();
Expand All @@ -33,38 +38,77 @@ public List<BibWarning> parseBiblog(@NonNull Path blgFilePath) throws IOExceptio
}

/**
* Parses a single line from the .blg file to identify a warning.
* Parses a single line from a .blg file to identify a warning.
* <p>
* Currently supports parsing warnings of the format:
* <pre>
* Warning--[message] in [entryKey]
* </pre>
* This method supports two warning formats:
* <ol>
* <li><b>BibTeX Warnings:</b> Simple warnings from the legacy BibTeX backend.
* <pre>{@code Warning--[message] in [entryKey]}</pre>
* For example: {@code Warning--empty journal in Scholey_2013}
* </li>
* <li><b>BibLaTeX Datamodel Warnings:</b> Detailed warnings from the Biber backend, including datamodel validation issues.
* <pre>{@code [Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]}</pre>
* For example: {@code Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'}
* </li>
* </ol>
*
* @param line a single line from the .blg file
* @return an Optional containing a {@link BibWarning} if a match is found, or empty otherwise
* @param line The single line from the .blg file to parse.
* @return An {@link Optional} containing a {@link BibWarning} if a match is found, or an empty {@code Optional} otherwise.
*/
private Optional<BibWarning> parseWarningLine(String line) {
// TODO: Support additional warning formats
Matcher matcher = WARNING_PATTERN.matcher(line);
if (!matcher.find()) {
return Optional.empty();
Optional<BibWarning> parseWarningLine(String line) {
// For BibTeX warnings
Matcher bibTexMatcher = BIBTEX_WARNING_PATTERN.matcher(line);
if (bibTexMatcher.find()) {
String message = bibTexMatcher.group("message").trim();
String entryKey = bibTexMatcher.group("entryKey");
// Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal")
String fieldName = null;
if (message.startsWith(EMPTY_FIELD_PREFIX)) {
fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
}

String message = matcher.group("message").trim();
String entryKey = matcher.group("entryKey");
// Extract field name for warnings related to empty fields (e.g., "empty journal" -> fieldName = "journal")
String fieldName = null;
if (message.startsWith(EMPTY_FIELD_PREFIX)) {
fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
// For BiblaTex warnings
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment adds no value as it merely restates what is obvious from the code. Should be removed or enhanced with additional context.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not camel case latex or bibtex or biblatex in variable names.

Matcher biblaTexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line);
if (biblaTexMatcher.find()) {
String message = biblaTexMatcher.group("message").trim();
String entryKey = biblaTexMatcher.group("entryKey");
String fieldName = null;

// Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher")
String lowerCaseMessage = message.toLowerCase();
if (lowerCaseMessage.contains(INVALID_FIELD_PREFIX)) {
int startIndex = lowerCaseMessage.indexOf(INVALID_FIELD_PREFIX) + INVALID_FIELD_PREFIX.length();
int endIndex = lowerCaseMessage.indexOf('\'', startIndex);
if (endIndex != -1) {
fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}
} else if (lowerCaseMessage.contains(MULTI_INVALID_FIELD_PREFIX)) {
int startIndex = lowerCaseMessage.indexOf(MULTI_INVALID_FIELD_PREFIX) + MULTI_INVALID_FIELD_PREFIX.length();
int endIndex = lowerCaseMessage.indexOf('\'', startIndex);
if (endIndex != -1) {
fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim().split(",")[0].trim();
fieldName = FieldFactory.parseField(fieldName).getName();
}
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
}

return Optional.of(new BibWarning(
SeverityType.WARNING,
message,
fieldName,
entryKey
));
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;

import org.jabref.model.biblog.BibWarning;
import org.jabref.model.biblog.SeverityType;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

Expand All @@ -31,4 +36,38 @@ void parsesWarningsFromResourceFileTest() throws IOException {
new BibWarning(SeverityType.WARNING, "empty year", "year", "Tan_2021")
), warnings);
}

@ParameterizedTest
@MethodSource("biblaTexValidationWarningsProvider")
void parsesBiblaTexValidationWarnings(String warningLine, Optional<BibWarning> expectedWarning) {
assertEquals(expectedWarning, parser.parseWarningLine(warningLine));
}

private static Stream<Arguments> biblaTexValidationWarningsProvider() {
return Stream.of(
Arguments.of("[1124] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (chocolate.bib): Invalid field 'publisher' for entrytype 'article'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Corti_2009"))),

Arguments.of("[1126] Biber.pm:131> WARN - Datamodel: article entry 'Parker_2006' (Chocolate.bib): Missing mandatory field - one of 'date, year' must be defined",
Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field - one of 'date, year' must be defined", "date", "Parker_2006"))),

Arguments.of("[1127] Biber.pm:131> WARN - Datamodel: article entry 'Corti_2009' (Chocolate.bib): Missing mandatory field 'author'",
Optional.of(new BibWarning(SeverityType.WARNING, "Missing mandatory field 'author'", "author", "Corti_2009"))),

Arguments.of("[1128] Biber.pm:131> WARN - Datamodel: article entry 'Cooper_2007' (Chocolate.bib): Invalid ISSN in value of field 'issn'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid ISSN in value of field 'issn'", "issn", "Cooper_2007"))),

Arguments.of("[1129] Biber.pm:131> WARN - Datamodel: article entry 'Katz_2011' (Chocolate.bib): Invalid value of field 'volume' must be datatype 'integer' - ignoring field",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid value of field 'volume' must be datatype 'integer' - ignoring field", "volume", "Katz_2011"))),

Arguments.of("WARN - Datamodel: article entry 'Keen_2001' (Chocolate.bib): Invalid field 'publisher' for entrytype 'article'",
Optional.of(new BibWarning(SeverityType.WARNING, "Invalid field 'publisher' for entrytype 'article'", "publisher", "Keen_2001"))),

Arguments.of("WARN - Datamodel: article entry 'Macht_2007' (Chocolate.bib): Field 'groups' invalid in data model - ignoring",
Optional.of(new BibWarning(SeverityType.WARNING, "Field 'groups' invalid in data model - ignoring", "groups", "Macht_2007"))),

Arguments.of("This is not a valid warning line", Optional.empty()),
Arguments.of("", Optional.empty())
);
}
}
Loading