Skip to content

Commit 9c28a07

Browse files
committed
Switch to enum for PicaConstants
Avoid reassignment to static fields, keep switch in parser See #296
1 parent e3d9674 commit 9c28a07

File tree

3 files changed

+85
-74
lines changed

3 files changed

+85
-74
lines changed
Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright 2016,2019 Christoph Böhme and hbz
1+
/* Copyright 2016,2019 Christoph Böhme and others
22
*
33
* Licensed under the Apache License, Version 2.0 the "License";
44
* you may not use this file except in compliance with the License.
@@ -13,40 +13,44 @@
1313
* limitations under the License.
1414
*/
1515

16+
package org.metafacture.biblio.pica;
17+
1618
/**
1719
* Useful constants for PICA+.
1820
* PICA+ comes with two possible serializations:
1921
* a normalized one and a non-normalized.
2022
*
21-
* @author Christoph Böhme
22-
* @author Pascal Christoph (dr0i)
23+
* @author Christoph Böhme (initial implementation)
24+
* @author Pascal Christoph (dr0i) (add support for non-normalized pica+)
25+
* @author Fabian Steeg (fsteeg) (switch to enum)
2326
*
2427
*/
28+
enum PicaConstants {
29+
// We use '\0' for null/empty
30+
RECORD_MARKER('\u001d', '\n'), //
31+
FIELD_MARKER('\u001e', '\0'), //
32+
SUBFIELD_MARKER('\u001f', '$'), //
33+
FIELD_END_MARKER('\n', '\n'), //
34+
NO_MARKER('\0', '\0');
2535

26-
package org.metafacture.biblio.pica;
27-
28-
final class PicaConstants{
29-
public static char RECORD_MARKER = '\u001d';
30-
public static char FIELD_MARKER = '\u001e';
31-
public static char SUBFIELD_MARKER = '\u001f';
32-
public static char FIELD_END_MARKER = '\n';
33-
34-
public static void setNormalizedSerialization() {
35-
RECORD_MARKER = '\u001d';
36-
FIELD_MARKER = '\u001e';
37-
SUBFIELD_MARKER = '\u001f';
38-
FIELD_END_MARKER = '\n';
39-
}
36+
char normalized;
37+
char nonNormalized;
4038

41-
public static void setNonNormalizedSerialization() {
42-
RECORD_MARKER = '\n';
43-
FIELD_MARKER = '\n'; //this is a dummy
44-
SUBFIELD_MARKER = '$';
45-
FIELD_END_MARKER = '\n';
46-
}
39+
PicaConstants(char normalized, char nonNormalized) {
40+
this.normalized = normalized;
41+
this.nonNormalized = nonNormalized;
42+
}
4743

48-
private PicaConstants() {
49-
// No instances allowed
50-
}
44+
public char get(boolean isNormalized) {
45+
return isNormalized ? normalized : nonNormalized;
46+
}
5147

52-
}
48+
public static PicaConstants from(boolean isNormalized, char ch) {
49+
for (PicaConstants value : values()) {
50+
if (ch == (isNormalized ? value.normalized : value.nonNormalized)) {
51+
return value;
52+
}
53+
}
54+
return NO_MARKER;
55+
}
56+
}

metafacture-biblio/src/main/java/org/metafacture/biblio/pica/PicaDecoder.java

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2016, 2019 Christoph Böhme and hbz
2+
* Copyright 2016, 2019 Christoph Böhme and others
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -143,7 +143,8 @@
143143
* support other pica encodings.
144144
*
145145
* @author Christoph Böhme
146-
* @author Pascal Christoph (dr0i)
146+
* @author Pascal Christoph (dr0i) (add support for non-normalized pica+)
147+
* @author Fabian Steeg (fsteeg) (switch to enum)
147148
*
148149
*/
149150
@Description("Parses pica+ records. The parser only parses single records. " +
@@ -167,37 +168,36 @@ public final class PicaDecoder
167168
private int recordLen;
168169

169170
private boolean ignoreMissingIdn;
171+
private boolean isNormalized;
170172

171173
public PicaDecoder() {
172-
makeConstants();
174+
this(true);
173175
}
174176

175177
public PicaDecoder(boolean normalized) {
176178
setNormalizedSerialization(normalized);
177-
makeConstants();
178179
}
180+
179181
/**
180-
* Controls wether the input is serialzed as normalized or non-normalized
182+
* Controls whether the input is read as normalized or non-normalized
181183
* pica+. As the default "normalized" is assumed.
182184
*
183-
* @param normalized if true, the input is treated as "normalized" pica+ ;
184-
* if false, it's treated as non-normalized serialized.
185+
* @param normalized if true, the input is treated as normalized pica+ ;
186+
* if false, it's treated as non-normalized.
185187
*/
186188
public void setNormalizedSerialization(boolean normalized) {
187-
if (normalized)
188-
PicaConstants.setNormalizedSerialization();
189-
else
190-
PicaConstants.setNonNormalizedSerialization();
189+
this.isNormalized = normalized;
191190
makeConstants();
192191
}
192+
193193
private void makeConstants() {
194-
START_MARKERS = "(?:^|" + PicaConstants.FIELD_MARKER + "|"
195-
+ PicaConstants.FIELD_END_MARKER + "|"
196-
+ PicaConstants.RECORD_MARKER + "|.*\n" + ")";
194+
START_MARKERS = "(?:^|" + PicaConstants.FIELD_MARKER.get(isNormalized) + "|"
195+
+ PicaConstants.FIELD_END_MARKER.get(isNormalized) + "|"
196+
+ PicaConstants.RECORD_MARKER.get(isNormalized) + "|.*\n" + ")";
197197
ID_FIELDS_PATTERN = Pattern
198198
.compile(START_MARKERS + "(?:003@|203@(?:/..+)?|107F) "
199-
+ " ?(\\" + PicaConstants.SUBFIELD_MARKER + "|"
200-
+ PicaConstants.SUBFIELD_MARKER + ")0");
199+
+ " ?(\\" + PicaConstants.SUBFIELD_MARKER.get(isNormalized) + "|"
200+
+ PicaConstants.SUBFIELD_MARKER.get(isNormalized) + ")0");
201201
idFieldMatcher = ID_FIELDS_PATTERN.matcher("");
202202
}
203203
/**
@@ -303,7 +303,7 @@ public void process(final String record) {
303303

304304
PicaParserState state = PicaParserState.FIELD_NAME;
305305
for (int i = 0; i < recordLen; ++i) {
306-
state = state.parseChar(buffer[i], parserContext);
306+
state = state.parseChar(buffer[i], parserContext, isNormalized);
307307
}
308308
state.endOfInput(parserContext);
309309

@@ -337,7 +337,7 @@ private String extractRecordId() {
337337
idBuilder.setLength(0);
338338
for (int i = idFromIndex; i < recordLen; ++i) {
339339
final char ch = buffer[i];
340-
if (isSubfieldDelimiter(ch)) {
340+
if (isMarker(ch)) {
341341
break;
342342
}
343343
idBuilder.append(ch);
@@ -353,11 +353,8 @@ private int findRecordId() {
353353
return idFieldMatcher.end();
354354
}
355355

356-
private static boolean isSubfieldDelimiter(final char ch) {
357-
return ch == PicaConstants.RECORD_MARKER
358-
|| ch == PicaConstants.FIELD_MARKER
359-
|| ch == PicaConstants.FIELD_END_MARKER
360-
|| ch == PicaConstants.SUBFIELD_MARKER;
356+
private boolean isMarker(final char ch) {
357+
return PicaConstants.from(isNormalized, ch) != PicaConstants.NO_MARKER;
361358
}
362359

363360
}

metafacture-biblio/src/main/java/org/metafacture/biblio/pica/PicaParserState.java

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2016,2019 Christoph Böhme and hbz
2+
* Copyright 2016,2019 Christoph Böhme and others
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -25,30 +25,33 @@
2525
* The parser ignores spaces in field names. They are not included in the
2626
* field name.
2727
*
28-
* Empty subfields are skipped. For instance, parsing the following normalized
29-
* pica+ would NOT produce an empty literal: 003@ \u001f\u001e. The parser also
28+
* Empty subfields are skipped. For instance, parsing the following input
29+
* would NOT produce an empty literal: 003@ \u001f\u001e. The parser also
3030
* skips unnamed fields without any subfields.
3131
*
3232
* @author Christoph Böhme
33-
* @author Pascal Christoph (dr0i)
34-
*
33+
* @author Pascal Christoph (dr0i) (add support for non-normalized pica+)
34+
* @author Fabian Steeg (fsteeg) (switch to enum)
3535
*/
3636
enum PicaParserState {
3737

3838
FIELD_NAME {
3939
@Override
40-
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx) {
40+
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx, boolean normalized) {
4141
final PicaParserState next;
42-
if(ch==PicaConstants.RECORD_MARKER ||
43-
ch==PicaConstants.FIELD_MARKER ||
44-
ch==PicaConstants.FIELD_END_MARKER){
42+
switch (PicaConstants.from(normalized, ch)) {
43+
case RECORD_MARKER:
44+
case FIELD_MARKER:
45+
case FIELD_END_MARKER:
4546
ctx.emitStartEntity();
4647
ctx.emitEndEntity();
4748
next = FIELD_NAME;
48-
}else if(ch==PicaConstants.SUBFIELD_MARKER){
49+
break;
50+
case SUBFIELD_MARKER:
4951
ctx.emitStartEntity();
5052
next = SUBFIELD_NAME;
51-
}else{
53+
break;
54+
default:
5255
ctx.appendText(ch);
5356
next = this;
5457
}
@@ -63,16 +66,19 @@ protected void endOfInput(final PicaParserContext ctx) {
6366
},
6467
SUBFIELD_NAME {
6568
@Override
66-
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx) {
69+
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx, boolean normalized) {
6770
final PicaParserState next;
68-
if(ch==PicaConstants.RECORD_MARKER ||
69-
ch==PicaConstants.FIELD_MARKER ||
70-
ch==PicaConstants.FIELD_END_MARKER){
71+
switch (PicaConstants.from(normalized, ch)) {
72+
case RECORD_MARKER:
73+
case FIELD_MARKER:
74+
case FIELD_END_MARKER:
7175
ctx.emitEndEntity();
7276
next = FIELD_NAME;
73-
}else if(ch==PicaConstants.SUBFIELD_MARKER)
77+
break;
78+
case SUBFIELD_MARKER:
7479
next = this;
75-
else{
80+
break;
81+
default:
7682
ctx.setSubfieldName(ch);
7783
next = SUBFIELD_VALUE;
7884
}
@@ -86,18 +92,21 @@ protected void endOfInput(final PicaParserContext ctx) {
8692
},
8793
SUBFIELD_VALUE {
8894
@Override
89-
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx) {
95+
protected PicaParserState parseChar(final char ch, final PicaParserContext ctx, boolean normalized) {
9096
final PicaParserState next;
91-
if(ch==PicaConstants.RECORD_MARKER ||
92-
ch==PicaConstants.FIELD_MARKER ||
93-
ch==PicaConstants.FIELD_END_MARKER){
97+
switch (PicaConstants.from(normalized, ch)) {
98+
case RECORD_MARKER:
99+
case FIELD_MARKER:
100+
case FIELD_END_MARKER:
94101
ctx.emitLiteral();
95102
ctx.emitEndEntity();
96103
next = FIELD_NAME;
97-
}else if(ch==PicaConstants.SUBFIELD_MARKER){
104+
break;
105+
case SUBFIELD_MARKER:
98106
ctx.emitLiteral();
99107
next = SUBFIELD_NAME;
100-
}else{
108+
break;
109+
default:
101110
ctx.appendText(ch);
102111
next = this;
103112
}
@@ -111,7 +120,8 @@ protected void endOfInput(final PicaParserContext ctx) {
111120
}
112121
};
113122

114-
protected abstract PicaParserState parseChar(final char ch, final PicaParserContext ctx);
123+
protected abstract PicaParserState parseChar(final char ch, final PicaParserContext ctx, final boolean normalized);
115124

116125
protected abstract void endOfInput(final PicaParserContext ctx);
126+
117127
}

0 commit comments

Comments
 (0)