Skip to content

Commit 89119a6

Browse files
committed
Improved record id extraction from field 003@$0.
The new code handles record ids properly if they are at the end of the record and not terminated by a field or subfield delimiter.
1 parent 4483e5e commit 89119a6

File tree

2 files changed

+45
-14
lines changed

2 files changed

+45
-14
lines changed

src/main/java/org/culturegraph/mf/stream/converter/bib/PicaDecoder.java

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,20 @@ private boolean recordIsEmpty() {
158158
return true;
159159
}
160160

161+
/**
162+
* Searches the record for the sequence specified in {@code ID_FIELD}
163+
* and returns all characters following this sequence until the next
164+
* {@link PicaConstants.FIELD_DELIMITER},
165+
* {@link PicaConstants.SUBFIELD_DELIMITER} or the end of the record
166+
* is reached. Only the first occurrence of the sequence is processed,
167+
* later occurrences are ignored.
168+
*
169+
* If the sequence is not found in the string or if it is not followed
170+
* by any characters then {@code null} is returned.
171+
*
172+
* @return value of subfield 003@$0 or null if the
173+
* field is not found or is empty.
174+
*/
161175
private String extractRecordId() {
162176
idBuilder.setLength(0);
163177

@@ -166,29 +180,31 @@ private String extractRecordId() {
166180
for (int i = 0; i < recordLen; ++i) {
167181
if (buffer[i] == PicaConstants.FIELD_DELIMITER) {
168182
if (idBuilder.length() > 0) {
169-
return idBuilder.toString();
183+
break;
170184
}
171185
fieldPos = 0;
172186
skip = false;
173-
continue;
174-
}
175-
if (!skip) {
176-
if (fieldPos < ID_FIELD.length) {
177-
if (buffer[i] == ID_FIELD[fieldPos]) {
178-
fieldPos += 1;
179-
} else {
180-
skip = true;
181-
}
182-
} else {
183-
if (buffer[i] == PicaConstants.SUBFIELD_DELIMITER) {
184-
skip = true;
187+
} else {
188+
if (!skip) {
189+
if (fieldPos < ID_FIELD.length) {
190+
if (buffer[i] == ID_FIELD[fieldPos]) {
191+
fieldPos += 1;
192+
} else {
193+
skip = true;
194+
}
185195
} else {
196+
if (buffer[i] == PicaConstants.SUBFIELD_DELIMITER) {
197+
break;
198+
}
186199
idBuilder.append(buffer[i]);
187200
}
188201
}
189202
}
190203
}
191204

205+
if (idBuilder.length() > 0) {
206+
return idBuilder.toString();
207+
}
192208
return null;
193209
}
194210

src/test/java/org/culturegraph/mf/stream/converter/bib/PicaDecoderTest.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ public final class PicaDecoderTest {
4141
private static final String SUBFIELD = "\u001f";
4242
private static final String FIELD_END = "\u001e";
4343

44+
private static final String FIELD_003AT0_START = "003@ " + SUBFIELD + "0";
4445
private static final String FIELD_001AT = "001@ " + SUBFIELD + "0test" + FIELD_END;
45-
private static final String FIELD_003AT = "003@ " + SUBFIELD + "0" + RECORD_ID + FIELD_END;
46+
private static final String FIELD_003AT = FIELD_003AT0_START + RECORD_ID + FIELD_END;
4647
private static final String FIELD_021A = "021A " + SUBFIELD + "a" + COMPOSED_UTF8 + FIELD_END;
4748
private static final String FIELD_028A_START = "028A ";
4849
private static final String SUBFIELD_A = SUBFIELD + "aEco";
@@ -237,6 +238,20 @@ public void testShouldNotSkipEmptyFieldsWithOnlyEmptySubfieldsIfConfigured() {
237238
verify028AEnd(ordered);
238239
ordered.verify(receiver).endRecord();
239240
}
241+
242+
@Test
243+
public void testShouldProcessRecordIdAtRecordEnd() {
244+
picaDecoder.setFixUnexpectedEOR(true);
245+
246+
picaDecoder.process(
247+
FIELD_003AT0_START +
248+
RECORD_ID);
249+
250+
final InOrder ordered = inOrder(receiver);
251+
ordered.verify(receiver).startRecord(RECORD_ID);
252+
verify003At(ordered);
253+
ordered.verify(receiver).endRecord();
254+
}
240255

241256
@Test(expected=MissingIdException.class)
242257
public void testShouldFailIfIdIsMissingByDefault() {

0 commit comments

Comments
 (0)