Skip to content

Commit 502b655

Browse files
committed
Fix checkpointing for checksum checking
Fixes the checkpointing for checksum checking by actually committing the changes to the database. Replacing "uncacheEntity" with "commit", as testing has shown that this is necessary to ensure that the checksum status is properly updated in the most_recent_checksum" table when checksum process is interrupted. The provided integration test fails if the checksums of bitstreams that were checked before the interruption are not properly recorded in the database.
1 parent eba5216 commit 502b655

File tree

2 files changed

+193
-1
lines changed

2 files changed

+193
-1
lines changed

dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ public void process() throws SQLException {
131131
collector.collect(context, info);
132132
}
133133

134-
context.uncacheEntity(bitstream);
134+
context.commit();
135135
bitstream = dispatcher.next();
136136
}
137137
}
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.checker;
9+
10+
import static java.nio.charset.StandardCharsets.UTF_8;
11+
import static org.junit.Assert.assertTrue;
12+
import static org.junit.Assert.fail;
13+
14+
import java.sql.SQLException;
15+
import java.time.Instant;
16+
import java.util.ArrayList;
17+
import java.util.List;
18+
19+
import org.apache.commons.io.IOUtils;
20+
import org.dspace.AbstractIntegrationTestWithDatabase;
21+
import org.dspace.builder.BitstreamBuilder;
22+
import org.dspace.builder.CollectionBuilder;
23+
import org.dspace.builder.CommunityBuilder;
24+
import org.dspace.builder.ItemBuilder;
25+
import org.dspace.checker.factory.CheckerServiceFactory;
26+
import org.dspace.checker.service.ChecksumHistoryService;
27+
import org.dspace.checker.service.MostRecentChecksumService;
28+
import org.dspace.content.Bitstream;
29+
import org.dspace.content.Collection;
30+
import org.dspace.content.Community;
31+
import org.dspace.content.Item;
32+
import org.dspace.core.Context;
33+
import org.junit.After;
34+
import org.junit.Before;
35+
import org.junit.Test;
36+
37+
public class ChecksumCheckerIT extends AbstractIntegrationTestWithDatabase {
38+
protected List<Bitstream> bitstreams;
39+
protected MostRecentChecksumService checksumService =
40+
CheckerServiceFactory.getInstance().getMostRecentChecksumService();
41+
42+
@Before
43+
public void setup() throws Exception {
44+
context.turnOffAuthorisationSystem();
45+
46+
Community parentCommunity = CommunityBuilder.createCommunity(context).build();
47+
Collection collection = CollectionBuilder.createCollection(context, parentCommunity)
48+
.build();
49+
Item item = ItemBuilder.createItem(context, collection).withTitle("Test item")
50+
.build();
51+
52+
int numBitstreams = 3;
53+
bitstreams = new ArrayList<>();
54+
for (int i = 0; i < numBitstreams; i++) {
55+
String content = "Test bitstream " + i;
56+
bitstreams.add(
57+
BitstreamBuilder.createBitstream(
58+
context, item, IOUtils.toInputStream(content, UTF_8)
59+
).build()
60+
);
61+
}
62+
63+
context.restoreAuthSystemState();
64+
65+
// Call the "updateMissingBitstreams" method so that the test bitstreams
66+
// already have checksums in the past when CheckerCommand runs.
67+
// Otherwise, the CheckerCommand will simply update the test
68+
// bitstreams without going through the BitstreamDispatcher.
69+
checksumService = CheckerServiceFactory.getInstance().getMostRecentChecksumService();
70+
checksumService.updateMissingBitstreams(context);
71+
72+
// The "updateMissingBitstreams" method updates the test bitstreams in
73+
// a random order. To verify that the expected bitstreams were
74+
// processed, reset the timestamps so that the bitstreams are
75+
// checked in a specific order (oldest first).
76+
Instant checksumInstant = Instant.ofEpochMilli(0);
77+
for (Bitstream bitstream: bitstreams) {
78+
MostRecentChecksum mrc = checksumService.findByBitstream(context, bitstream);
79+
mrc.setProcessStartDate(checksumInstant);
80+
mrc.setProcessEndDate(checksumInstant);
81+
checksumInstant = checksumInstant.plusSeconds(10);
82+
}
83+
context.commit();
84+
}
85+
86+
@After
87+
public void cleanUp() throws SQLException {
88+
// Need to clean up ChecksumHistory because of a referential integrity
89+
// constraint violation between the most_recent_checksum table and
90+
// bitstream tables
91+
ChecksumHistoryService checksumHistoryService = CheckerServiceFactory.getInstance().getChecksumHistoryService();
92+
93+
for (Bitstream bitstream: bitstreams) {
94+
checksumHistoryService.deleteByBitstream(context, bitstream);
95+
}
96+
}
97+
98+
@Test
99+
public void testChecksumsRecordedWhenProcesingIsInterrupted() throws SQLException {
100+
CheckerCommand checker = new CheckerCommand(context);
101+
102+
// The start date to use for the checker process
103+
Instant checkerStartDate = Instant.now();
104+
105+
// Verify that all checksums are before the checker start date
106+
for (Bitstream bitstream: bitstreams) {
107+
MostRecentChecksum checksum = checksumService.findByBitstream(context, bitstream);
108+
Instant lastChecksumDate = checksum.getProcessStartDate();
109+
assertTrue("lastChecksumDate (" + lastChecksumDate + ") <= checkerStartDate (" + checkerStartDate + ")",
110+
lastChecksumDate.isBefore(checkerStartDate));
111+
}
112+
113+
// Dispatcher that throws an exception when a third bitstream is
114+
// retrieved.
115+
BitstreamDispatcher dispatcher = new ExpectionThrowingDispatcher(
116+
context, checkerStartDate, false, 2);
117+
checker.setDispatcher(dispatcher);
118+
119+
120+
// Run the checksum checker
121+
checker.setProcessStartDate(checkerStartDate);
122+
try {
123+
checker.process();
124+
fail("SQLException should have been thrown");
125+
} catch (SQLException sqle) {
126+
// Rollback any pending transaction
127+
context.rollback();
128+
}
129+
130+
// Verify that the checksums of the first two bitstreams (that were
131+
// processed before the exception) have been successfully recorded in
132+
// the database, while the third bitstream was not updated.
133+
int bitstreamCount = 0;
134+
for (Bitstream bitstream: bitstreams) {
135+
MostRecentChecksum checksum = checksumService.findByBitstream(context, bitstream);
136+
Instant lastChecksumDate = checksum.getProcessStartDate();
137+
138+
bitstreamCount = bitstreamCount + 1;
139+
if (bitstreamCount <= 2) {
140+
assertTrue("lastChecksumDate (" + lastChecksumDate + ") <= checkerStartDate (" + checkerStartDate + ")",
141+
lastChecksumDate.isAfter(checkerStartDate));
142+
} else {
143+
assertTrue("lastChecksumDate (" + lastChecksumDate + ") >= checkerStartDate (" + checkerStartDate + ")",
144+
lastChecksumDate.isBefore(checkerStartDate));
145+
}
146+
}
147+
}
148+
149+
/**
150+
* Subclass of SimpleDispatcher that only allows a limited number of "next"
151+
* class before throwing a SQLException.
152+
*/
153+
class ExpectionThrowingDispatcher extends SimpleDispatcher {
154+
// The number of "next" calls to allow before throwing a SQLException
155+
protected int maxNextCalls;
156+
157+
// The number of "next" method calls seen so far.
158+
protected int numNextCalls = 0;
159+
160+
/**
161+
* Constructor.
162+
*
163+
* @param context Context
164+
* @param startTime timestamp for beginning of checker process
165+
* @param looping indicates whether checker should loop infinitely
166+
* through most_recent_checksum table
167+
* @param maxNextCalls the number of "next" method calls to allow before
168+
* throwing a SQLException.
169+
*/
170+
public ExpectionThrowingDispatcher(Context context, Instant startTime, boolean looping, int maxNextCalls) {
171+
super(context, startTime, looping);
172+
this.maxNextCalls = maxNextCalls;
173+
}
174+
175+
/**
176+
* Selects the next candidate bitstream.
177+
*
178+
* After "maxNextClass" number of calls, this method throws a
179+
* SQLException.
180+
*
181+
* @throws SQLException if database error
182+
*/
183+
@Override
184+
public synchronized Bitstream next() throws SQLException {
185+
numNextCalls = numNextCalls + 1;
186+
if (numNextCalls > maxNextCalls) {
187+
throw new SQLException("Max 'next' method calls exceeded");
188+
}
189+
return super.next();
190+
}
191+
}
192+
}

0 commit comments

Comments
 (0)