Skip to content

Commit 9b2772b

Browse files
dcarpentierodcarpentierokopporcalixtus
authored
Mode aware consistency check (#13584)
* feat: consistency check draft * Feat: mode comparison draft * fix: removed comment * Feat: mode diversification operations * fix: removed @test for testing purposes * fix: hashset constructor refactor * Update BibliographyConsistencyCheckTest.java * fix: passing whole dbContext instead of entries into execute() method * fix: passing whole database context instead of list of entries * fix: code refactor * fix: typo * fix: typo * feat: tests now are passed whole database instead of a list of entries * fix: typo * fix: consistent variable naming, bibEntryList -> bibEntriesList * Fix: removed the withoutDate entry from the test * Fix: added missing database entries to the bibDatabase * fix: refactor, using a typeset instead of two set.of(), removing redundancy * fix: now biblatex mode is handled correctly * fix: typo * fix: excluding "key" field and corrects common fields in consistency control * fix: now correct differentiation based on bibtex / biblatex mode. also maintains order (hashset vs linkedhashset) * fix: added required'date' field for withoutDate * fix: missing 'bibtex' mode determination in csvwritertest * feat: filtering logic added * fix: changes to reflect fixes on filtering logic of bibliography consistency writer * Adapt tests * feat: Align with reviewer-confirmed test expectations for required field handling * feat: update consistency check and output tests to include all entries involved in field differences - Tests now expect all entries participating in a required/optional field difference to be reported in the output (CSV/TXT) - Aligns symbol logic (x, o, ?, -) between code and tests - Improves consistency handling for bibtex and biblatex modes * fix: Double bibtex reference typo * fix: removed name redundancy * Update jablib/src/main/java/org/jabref/logic/quality/consistency/BibliographyConsistencyCheck.java Co-authored-by: Oliver Kopp <[email protected]> * fix: Added testing for filterEntiresWithFieldDifferences * fix: indentation * fix: removed "visible for testing" – violated annotation rule * fix: removed unused import * fix: updated StandardFields, now are ALL included. * fix: re-added previously removed comments * Fix field order * Remove obsolete branch * feat: duplicated test for checkComplexLibrary * fix: variable naming * fix: re-added sixth BibEntry * chore(ci): trigger pipeline * chore(ci): trigger pipeline * feat: workflow for amazon corretto 24 * Delete .github/workflows/ci.yml * fix: substituted the exception throw with a logger and a dialog message. * fix: language localization typo * fix: Modern best practices – usage of Map.of() instead of Collections.EmptyMap() for empty collections * fix: passing whole bibDatabaseContext instead of entries * Use weaker HashMap type * Refine data structure * Avoid elvis * Add small assert * Fix assert * Tweak algorithm --------- Co-authored-by: dcarpentiero <[email protected]> Co-authored-by: Oliver Kopp <[email protected]> Co-authored-by: Carl Christian Snethlage <[email protected]>
1 parent d2dfc6e commit 9b2772b

File tree

7 files changed

+418
-113
lines changed

7 files changed

+418
-113
lines changed

jabgui/src/main/java/org/jabref/gui/consistency/ConsistencyCheckAction.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package org.jabref.gui.consistency;
22

3-
import java.util.List;
3+
import java.util.Map;
44
import java.util.Optional;
55
import java.util.function.Supplier;
66

@@ -15,13 +15,16 @@
1515
import org.jabref.logic.l10n.Localization;
1616
import org.jabref.logic.quality.consistency.BibliographyConsistencyCheck;
1717
import org.jabref.model.database.BibDatabaseContext;
18-
import org.jabref.model.entry.BibEntry;
1918
import org.jabref.model.entry.BibEntryTypesManager;
2019

20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
2123
import static org.jabref.gui.actions.ActionHelper.needsDatabase;
2224

2325
public class ConsistencyCheckAction extends SimpleCommand {
2426

27+
private static final Logger LOGGER = LoggerFactory.getLogger(ConsistencyCheckAction.class);
2528
Supplier<LibraryTab> tabSupplier;
2629
private final DialogService dialogService;
2730
private final StateManager stateManager;
@@ -49,16 +52,18 @@ public ConsistencyCheckAction(Supplier<LibraryTab> tabSupplier,
4952
public void execute() {
5053
Task<BibliographyConsistencyCheck.Result> task = new Task<>() {
5154
@Override
52-
public BibliographyConsistencyCheck.Result call() throws Exception {
53-
55+
public BibliographyConsistencyCheck.Result call() {
5456
Optional<BibDatabaseContext> databaseContext = stateManager.getActiveDatabase();
5557
if (databaseContext.isEmpty()) {
56-
throw new IllegalStateException((Localization.lang("No library present")));
58+
LOGGER.debug("Consistency check invoked with no library opened.");
59+
dialogService.notify(Localization.lang("No library open"));
60+
return new BibliographyConsistencyCheck.Result(Map.of());
5761
}
58-
List<BibEntry> entries = databaseContext.get().getEntries();
62+
63+
BibDatabaseContext bibContext = databaseContext.get();
5964

6065
BibliographyConsistencyCheck consistencyCheck = new BibliographyConsistencyCheck();
61-
return consistencyCheck.check(entries, (count, total) ->
66+
return consistencyCheck.check(bibContext, (count, total) ->
6267
UiTaskExecutor.runInJavaFXThread(() -> {
6368
updateProgress(count, total);
6469
updateMessage(Localization.lang("%0/%1 entry types", count + 1, total));

jabkit/src/main/java/org/jabref/cli/CheckConsistency.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import java.io.IOException;
44
import java.io.OutputStreamWriter;
55
import java.io.Writer;
6-
import java.util.List;
76
import java.util.Optional;
87
import java.util.concurrent.Callable;
98

@@ -14,15 +13,13 @@
1413
import org.jabref.logic.quality.consistency.BibliographyConsistencyCheckResultTxtWriter;
1514
import org.jabref.logic.quality.consistency.BibliographyConsistencyCheckResultWriter;
1615
import org.jabref.model.database.BibDatabaseContext;
17-
import org.jabref.model.entry.BibEntry;
1816

1917
import org.slf4j.Logger;
2018
import org.slf4j.LoggerFactory;
21-
22-
import static picocli.CommandLine.Command;
23-
import static picocli.CommandLine.Mixin;
24-
import static picocli.CommandLine.Option;
25-
import static picocli.CommandLine.ParentCommand;
19+
import picocli.CommandLine.Command;
20+
import picocli.CommandLine.Mixin;
21+
import picocli.CommandLine.Option;
22+
import picocli.CommandLine.ParentCommand;
2623

2724
@Command(name = "check-consistency", description = "Check consistency of the library.")
2825
class CheckConsistency implements Callable<Integer> {
@@ -63,10 +60,9 @@ public Integer call() {
6360
}
6461

6562
BibDatabaseContext databaseContext = parserResult.get().getDatabaseContext();
66-
List<BibEntry> entries = databaseContext.getDatabase().getEntries();
6763

6864
BibliographyConsistencyCheck consistencyCheck = new BibliographyConsistencyCheck();
69-
BibliographyConsistencyCheck.Result result = consistencyCheck.check(entries, (count, total) -> {
65+
BibliographyConsistencyCheck.Result result = consistencyCheck.check(databaseContext, (count, total) -> {
7066
if (!sharedOptions.porcelain) {
7167
System.out.println(Localization.lang("Checking consistency for entry type %0 of %1", count + 1, total));
7268
}
Lines changed: 125 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package org.jabref.logic.quality.consistency;
22

33
import java.util.Collection;
4-
import java.util.Comparator;
4+
import java.util.Collections;
55
import java.util.HashMap;
66
import java.util.HashSet;
7+
import java.util.LinkedHashMap;
78
import java.util.List;
89
import java.util.Map;
10+
import java.util.Optional;
911
import java.util.SequencedCollection;
1012
import java.util.Set;
1113
import java.util.function.BiConsumer;
@@ -14,27 +16,52 @@
1416
import org.jabref.logic.bibtex.comparator.BibEntryByCitationKeyComparator;
1517
import org.jabref.logic.bibtex.comparator.BibEntryByFieldsComparator;
1618
import org.jabref.logic.bibtex.comparator.FieldComparatorStack;
19+
import org.jabref.model.database.BibDatabaseContext;
20+
import org.jabref.model.database.BibDatabaseMode;
1721
import org.jabref.model.entry.BibEntry;
22+
import org.jabref.model.entry.BibEntryType;
1823
import org.jabref.model.entry.field.Field;
24+
import org.jabref.model.entry.field.InternalField;
1925
import org.jabref.model.entry.field.SpecialField;
2026
import org.jabref.model.entry.field.StandardField;
2127
import org.jabref.model.entry.field.UserSpecificCommentField;
28+
import org.jabref.model.entry.types.BiblatexEntryTypeDefinitions;
29+
import org.jabref.model.entry.types.BibtexEntryTypeDefinitions;
2230
import org.jabref.model.entry.types.EntryType;
2331

32+
import com.google.common.annotations.VisibleForTesting;
33+
2434
public class BibliographyConsistencyCheck {
2535

36+
private static final Set<EntryType> BIBLATEX_TYPES = BiblatexEntryTypeDefinitions.ALL.stream()
37+
.map(BibEntryType::getType)
38+
.collect(Collectors.toSet());
39+
40+
private static final Set<EntryType> BIBTEX_TYPES = BibtexEntryTypeDefinitions.ALL.stream()
41+
.map(BibEntryType::getType)
42+
.collect(Collectors.toSet());
43+
2644
private static final Set<Field> EXPLICITLY_EXCLUDED_FIELDS = Set.of(
27-
StandardField.COMMENT,
28-
StandardField.CROSSREF,
29-
StandardField.GROUPS,
30-
StandardField.CITES,
31-
StandardField.PDF,
32-
StandardField.REVIEW,
33-
StandardField.SORTKEY,
34-
StandardField.SORTNAME,
35-
StandardField.TYPE,
36-
StandardField.XREF
37-
);
45+
InternalField.KEY_FIELD, // Citation key
46+
StandardField.KEY,
47+
StandardField.COMMENT,
48+
StandardField.CROSSREF,
49+
StandardField.CITES,
50+
StandardField.PDF,
51+
StandardField.REVIEW,
52+
StandardField.SORTKEY,
53+
StandardField.SORTNAME,
54+
StandardField.TYPE,
55+
StandardField.XREF,
56+
57+
// JabRef-specific
58+
StandardField.GROUPS,
59+
StandardField.OWNER,
60+
StandardField.CITATIONCOUNT,
61+
StandardField.TIMESTAMP,
62+
StandardField.CREATIONDATE,
63+
StandardField.MODIFICATIONDATE
64+
);
3865

3966
private static Set<Field> filterExcludedFields(Collection<Field> fields) {
4067
return fields.stream()
@@ -45,6 +72,27 @@ private static Set<Field> filterExcludedFields(Collection<Field> fields) {
4572
.collect(Collectors.toSet());
4673
}
4774

75+
/// Filters the given entries to those that violate consistency:
76+
///
77+
/// - Fields not set (but set in other entries of the same type)
78+
/// - Required fields not set
79+
///
80+
/// Additionally, the entries are sorted
81+
@VisibleForTesting
82+
List<BibEntry> filterAndSortEntriesWithFieldDifferences(Set<BibEntry> entries, Set<Field> differingFields, Set<Field> requiredFields) {
83+
return entries.stream()
84+
.filter(entry ->
85+
// This removes entries that have all differing fields set (could be confusing to the user)
86+
!Collections.disjoint(entry.getFields(), differingFields)
87+
// This ensures that all entries with missing required fields are included
88+
|| !entry.getFields().containsAll(requiredFields))
89+
.sorted(new FieldComparatorStack<>(List.of(
90+
new BibEntryByCitationKeyComparator(),
91+
new BibEntryByFieldsComparator()
92+
)))
93+
.toList();
94+
}
95+
4896
public record Result(Map<EntryType, EntryTypeResult> entryTypeToResultMap) {
4997
}
5098

@@ -62,64 +110,94 @@ public record EntryTypeResult(Collection<Field> fields, SequencedCollection<BibE
62110
*
63111
* @implNote This class does not implement {@link org.jabref.logic.integrity.DatabaseChecker}, because it returns a list of {@link org.jabref.logic.integrity.IntegrityMessage}, which are too fine-grained.
64112
*/
65-
public Result check(List<BibEntry> entries, BiConsumer<Integer, Integer> entriesGroupingProgress) {
113+
public Result check(BibDatabaseContext bibContext, BiConsumer<Integer, Integer> entriesGroupingProgress) {
66114
// collects fields existing in any entry, scoped by entry type
67115
Map<EntryType, Set<Field>> entryTypeToFieldsInAnyEntryMap = new HashMap<>();
68116
// collects fields existing in all entries, scoped by entry type
69117
Map<EntryType, Set<Field>> entryTypeToFieldsInAllEntriesMap = new HashMap<>();
70118
// collects entries of the same type
71119
Map<EntryType, Set<BibEntry>> entryTypeToEntriesMap = new HashMap<>();
72120

73-
collectEntriesIntoMaps(entries, entryTypeToFieldsInAnyEntryMap, entryTypeToFieldsInAllEntriesMap, entryTypeToEntriesMap);
121+
collectEntriesIntoMaps(bibContext, entryTypeToFieldsInAnyEntryMap, entryTypeToFieldsInAllEntriesMap, entryTypeToEntriesMap);
122+
123+
List<BibEntryType> entryTypeDefinitions;
124+
if (bibContext.getMode() == BibDatabaseMode.BIBLATEX) {
125+
entryTypeDefinitions = BiblatexEntryTypeDefinitions.ALL;
126+
} else {
127+
entryTypeDefinitions = BibtexEntryTypeDefinitions.ALL;
128+
}
74129

75-
Map<EntryType, EntryTypeResult> resultMap = new HashMap<>();
130+
// Use LinkedHashMap to preserve the order of Bib*EntryTypeDefinitions.ALL
131+
Map<EntryType, EntryTypeResult> resultMap = new LinkedHashMap<>();
76132

77133
int counter = 0;
78134
for (Map.Entry<EntryType, Set<Field>> mapEntry : entryTypeToFieldsInAnyEntryMap.entrySet()) {
79135
entriesGroupingProgress.accept(counter++, entryTypeToFieldsInAnyEntryMap.size());
80136
EntryType entryType = mapEntry.getKey();
81-
Set<Field> fields = mapEntry.getValue();
82-
Set<Field> commonFields = entryTypeToFieldsInAllEntriesMap.get(entryType);
83-
assert commonFields != null;
84-
Set<Field> uniqueFields = new HashSet<>(fields);
85-
uniqueFields.removeAll(commonFields);
86-
87-
if (uniqueFields.isEmpty()) {
137+
Set<Field> fieldsInAnyEntry = mapEntry.getValue();
138+
Set<Field> fieldsInAllEntries = entryTypeToFieldsInAllEntriesMap.get(entryType);
139+
Set<Field> filteredFieldsInAnyEntry = filterExcludedFields(fieldsInAnyEntry);
140+
141+
Set<Field> differingFields = new HashSet<>(filteredFieldsInAnyEntry);
142+
differingFields.removeAll(fieldsInAllEntries);
143+
assert fieldsInAllEntries != null;
144+
145+
differingFields.removeAll(fieldsInAllEntries);
146+
147+
Optional<BibEntryType> typeDefOpt = entryTypeDefinitions.stream()
148+
.filter(def -> def.getType().equals(entryType))
149+
.findFirst();
150+
151+
Set<Field> requiredFields = typeDefOpt.map(typeDef ->
152+
typeDef.getRequiredFields().stream()
153+
.flatMap(orFields -> orFields.getFields().stream())
154+
.collect(Collectors.toSet())
155+
).orElse(Set.of());
156+
157+
Set<BibEntry> entries = entryTypeToEntriesMap.get(entryType);
158+
assert entries != null;
159+
assert entries.size() != 1; // Either there is no entry with different fields or more than one
160+
if (entries == null || entries.size() <= 1 || differingFields.isEmpty()) {
88161
continue;
89162
}
90163

91-
List<Comparator<BibEntry>> comparators = List.of(
92-
new BibEntryByCitationKeyComparator(),
93-
new BibEntryByFieldsComparator());
94-
FieldComparatorStack<BibEntry> comparatorStack = new FieldComparatorStack<>(comparators);
95-
96-
List<BibEntry> differingEntries = entryTypeToEntriesMap
97-
.get(entryType).stream()
98-
.filter(entry -> !filterExcludedFields(entry.getFields()).equals(commonFields))
99-
.sorted(comparatorStack)
100-
.toList();
101-
102-
resultMap.put(entryType, new EntryTypeResult(uniqueFields, differingEntries));
164+
List<BibEntry> sortedEntries = filterAndSortEntriesWithFieldDifferences(entries, differingFields, requiredFields);
165+
if (!sortedEntries.isEmpty()) {
166+
resultMap.put(entryType, new EntryTypeResult(differingFields, sortedEntries));
167+
}
103168
}
104169

105170
return new Result(resultMap);
106171
}
107172

108-
private static void collectEntriesIntoMaps(List<BibEntry> entries, Map<EntryType, Set<Field>> entryTypeToFieldsInAnyEntryMap, Map<EntryType, Set<Field>> entryTypeToFieldsInAllEntriesMap, Map<EntryType, Set<BibEntry>> entryTypeToEntriesMap) {
109-
for (BibEntry entry : entries) {
110-
EntryType entryType = entry.getType();
173+
private static void collectEntriesIntoMaps(BibDatabaseContext bibContext, Map<EntryType, Set<Field>> entryTypeToFieldsInAnyEntryMap, Map<EntryType, Set<Field>> entryTypeToFieldsInAllEntriesMap, Map<EntryType, Set<BibEntry>> entryTypeToEntriesMap) {
174+
BibDatabaseMode mode = bibContext.getMode();
175+
List<BibEntry> entries = bibContext.getEntries();
111176

112-
entryTypeToFieldsInAnyEntryMap
113-
.computeIfAbsent(entryType, _ -> new HashSet<>())
114-
.addAll(filterExcludedFields(entry.getFields()));
177+
Set<EntryType> typeSet = switch (mode) {
178+
case BIBLATEX -> BIBLATEX_TYPES;
179+
case BIBTEX -> BIBTEX_TYPES;
180+
};
115181

116-
entryTypeToFieldsInAllEntriesMap
117-
.computeIfAbsent(entryType, _ -> new HashSet<>(filterExcludedFields(entry.getFields())))
118-
.retainAll(filterExcludedFields(entry.getFields()));
119-
120-
entryTypeToEntriesMap
121-
.computeIfAbsent(entryType, _ -> new HashSet<>())
122-
.add(entry);
182+
for (BibEntry entry : entries) {
183+
if (typeSet.contains(entry.getType())) {
184+
EntryType entryType = entry.getType();
185+
186+
Set<Field> filteredFields = filterExcludedFields(entry.getFields());
187+
188+
entryTypeToFieldsInAnyEntryMap
189+
.computeIfAbsent(entryType, _ -> new HashSet<>())
190+
.addAll(filteredFields);
191+
if (entryTypeToFieldsInAllEntriesMap.containsKey(entryType)) {
192+
entryTypeToFieldsInAllEntriesMap.get(entryType).retainAll(filteredFields);
193+
} else {
194+
entryTypeToFieldsInAllEntriesMap.put(entryType, new HashSet<>(filteredFields));
195+
}
196+
197+
entryTypeToEntriesMap
198+
.computeIfAbsent(entryType, _ -> new java.util.LinkedHashSet<>())
199+
.add(entry);
200+
}
123201
}
124202
}
125203
}

0 commit comments

Comments
 (0)