Skip to content

Commit 1c13d5a

Browse files
committed
Add builder for ISO 2709:2008 records.
The ISO-standard 2709:2008 defines a data format for bibliographic records. The MARC21 format is probably the best known instance of this standard. This commits adds a builder for constructing records in ISO 2709:2008 format.
1 parent fc5cd8a commit 1c13d5a

File tree

10 files changed

+1564
-0
lines changed

10 files changed

+1564
-0
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.iso2709;
17+
18+
import static org.culturegraph.mf.iso2709.Util.calculateMaxValue;
19+
import static org.culturegraph.mf.iso2709.Util.padWithZeros;
20+
21+
import org.culturegraph.mf.exceptions.FormatException;
22+
23+
/**
24+
* Builds a directory in ISO2709:2008 format. For fields whose length is greater
25+
* than the maximum value that can be stored in field length multiple directory
26+
* entries are created automatically.
27+
*
28+
* @author Christoph Böhme
29+
*
30+
*/
31+
final class DirectoryBuilder {
32+
33+
private final StringBuilder directory = new StringBuilder();
34+
35+
private final int fieldStartLength;
36+
private final int fieldLengthLength;
37+
38+
private final int maxFieldStart;
39+
private final int maxFieldLength;
40+
41+
private String tag;
42+
private String implDefinedPart;
43+
private int fieldStart;
44+
private int fieldEnd;
45+
46+
public DirectoryBuilder(final RecordFormat format) {
47+
fieldStartLength = format.getFieldStartLength();
48+
fieldLengthLength = format.getFieldLengthLength();
49+
50+
maxFieldStart = calculateMaxValue(fieldStartLength);
51+
maxFieldLength = calculateMaxValue(fieldLengthLength);
52+
53+
reset();
54+
}
55+
56+
public void setTag(final String tag) {
57+
this.tag = tag;
58+
}
59+
60+
public void setImplDefinedPart(final String implDefinedPart) {
61+
this.implDefinedPart = implDefinedPart;
62+
}
63+
64+
public void setFieldStart(final int fieldStart) {
65+
assert fieldStart >= 0;
66+
this.fieldStart = fieldStart;
67+
}
68+
69+
public void setFieldEnd(final int fieldEnd) {
70+
assert fieldEnd >= 0;
71+
this.fieldEnd = fieldEnd;
72+
}
73+
74+
public void write() {
75+
assert tag != null;
76+
assert implDefinedPart != null;
77+
assert fieldEnd >= fieldStart;
78+
79+
checkAllPartsStartInAddressRange();
80+
81+
int remainingLength = fieldEnd - fieldStart;
82+
int partStart = fieldStart;
83+
while (remainingLength > maxFieldLength) {
84+
writeDirectoryEntry(partStart, 0);
85+
remainingLength -= maxFieldLength;
86+
partStart += maxFieldLength;
87+
}
88+
writeDirectoryEntry(partStart, remainingLength);
89+
}
90+
91+
private void checkAllPartsStartInAddressRange() {
92+
final int fieldLength = fieldEnd - fieldStart;
93+
final int lastPartLength = fieldLength % maxFieldLength;
94+
final int lastPartStart = fieldEnd - lastPartLength;
95+
if (lastPartStart > maxFieldStart) {
96+
throw new FormatException("the field is too long");
97+
}
98+
}
99+
100+
private void writeDirectoryEntry(final int partStart, final int partLength) {
101+
directory.append(tag);
102+
directory.append(padWithZeros(partLength, fieldLengthLength));
103+
directory.append(padWithZeros(partStart, fieldStartLength));
104+
directory.append(implDefinedPart);
105+
}
106+
107+
public void reset() {
108+
directory.setLength(0);
109+
tag = null;
110+
implDefinedPart = null;
111+
fieldStart = 0;
112+
fieldEnd = 0;
113+
}
114+
115+
public int length() {
116+
return directory.length() + 1;
117+
}
118+
119+
@Override
120+
public String toString() {
121+
return directory.toString() + Iso2709Format.FIELD_SEPARATOR;
122+
}
123+
124+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.iso2709;
17+
18+
/**
19+
* Builds a list of fields in ISO 2709:2008 format.
20+
*
21+
* @author Christoph Böhme
22+
*
23+
*/
24+
final class FieldsBuilder {
25+
26+
private final StringBuilder fields = new StringBuilder();
27+
28+
private final int identifierLength;
29+
30+
private boolean inField;
31+
private int undoMarker = -1;
32+
33+
public FieldsBuilder(final RecordFormat format) {
34+
identifierLength = format.getIdentifierLength();
35+
}
36+
37+
public int startField(final String indicators) {
38+
requireNotInField();
39+
inField = true;
40+
undoMarker = fields.length();
41+
fields.append(indicators);
42+
return undoMarker;
43+
}
44+
45+
public int endField() {
46+
requireInField();
47+
inField = false;
48+
fields.append(Iso2709Format.FIELD_SEPARATOR);
49+
return fields.length();
50+
}
51+
52+
public void undoLastField() {
53+
assert undoMarker > -1;
54+
fields.setLength(undoMarker);
55+
undoMarker = -1;
56+
}
57+
58+
public void appendValue(final String value) {
59+
requireInField();
60+
fields.append(value);
61+
}
62+
63+
public void appendSubfield(final String identifier, final String value) {
64+
requireInField();
65+
if (identifierLength > 0) {
66+
fields.append(Iso2709Format.IDENTIFIER_MARKER);
67+
fields.append(identifier);
68+
}
69+
fields.append(value);
70+
}
71+
72+
public void reset() {
73+
fields.setLength(0);
74+
undoMarker = -1;
75+
inField = false;
76+
}
77+
78+
public int length() {
79+
return fields.length() + 1;
80+
}
81+
82+
@Override
83+
public String toString() {
84+
requireNotInField();
85+
return fields.toString() + Iso2709Format.RECORD_SEPARATOR;
86+
}
87+
88+
private void requireInField() {
89+
if (!inField) {
90+
throw new IllegalStateException("need to be in field");
91+
}
92+
}
93+
94+
private void requireNotInField() {
95+
if (inField) {
96+
throw new IllegalStateException("must not be in field");
97+
}
98+
}
99+
100+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.iso2709;
17+
18+
/**
19+
* Constants defining the positions and lengths of the elements of an ISO
20+
* 2709:2008 record.
21+
*
22+
* @author Christoph Böhme
23+
*
24+
*/
25+
public final class Iso2709Format {
26+
27+
public static final int RECORD_LABEL_LENGTH = 24;
28+
29+
public static final int RECORD_LENGTH_START = 0;
30+
public static final int RECORD_LENGTH_LENGTH = 5;
31+
public static final int RECORD_LENGTH_END = RECORD_LENGTH_START
32+
+ RECORD_LENGTH_LENGTH;
33+
34+
public static final int RECORD_STATUS_POS = 5;
35+
36+
public static final int IMPL_CODES_START = 6;
37+
public static final int IMPL_CODES_LENGTH = 4;
38+
public static final int IMPL_CODES_END = IMPL_CODES_START
39+
+ IMPL_CODES_LENGTH;
40+
41+
public static final int INDICATOR_LENGTH_POS = 10;
42+
public static final int IDENTIFIER_LENGTH_POS = 11;
43+
44+
public static final int BASE_ADDRESS_START = 12;
45+
public static final int BASE_ADDRESS_LENGTH = 5;
46+
public static final int BASE_ADDRESS_END = BASE_ADDRESS_START
47+
+ BASE_ADDRESS_LENGTH;
48+
49+
public static final int SYSTEM_CHARS_START = 17;
50+
public static final int SYSTEM_CHARS_LENGTH = 3;
51+
public static final int SYSTEM_CHARS_END = SYSTEM_CHARS_START
52+
+ SYSTEM_CHARS_LENGTH;
53+
54+
public static final int FIELD_LENGTH_LENGTH_POS = 20;
55+
public static final int FIELD_START_LENGTH_POS = 21;
56+
public static final int IMPL_DEFINED_PART_LENGTH_POS = 22;
57+
public static final int RESERVED_CHAR_POS = 23;
58+
59+
public static final int TAG_LENGTH = 3;
60+
61+
public static final char IDENTIFIER_MARKER = Iso646Characters.IS1;
62+
public static final char FIELD_SEPARATOR = Iso646Characters.IS2;
63+
public static final char RECORD_SEPARATOR = Iso646Characters.IS3;
64+
65+
private Iso2709Format() {
66+
// No instance allowed
67+
}
68+
69+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.iso2709;
17+
18+
/**
19+
* Constant definitions for characters in the ISO646:1991 standard.
20+
*
21+
* @author Christoph Böhme
22+
*
23+
*/
24+
public final class Iso646Characters {
25+
26+
public static final char IS4 = '\u001c';
27+
public static final char IS3 = '\u001d';
28+
public static final char IS2 = '\u001e';
29+
public static final char IS1 = '\u001f';
30+
31+
private Iso646Characters() {
32+
// No instances allowed
33+
}
34+
35+
}

0 commit comments

Comments
 (0)