Skip to content

Commit ff1b28e

Browse files
lili
authored andcommitted
The #111 Requests are implemented.
The field and subfield names are check by pattern now. The sequences of the stream elements are controlled and it must be nested structured.
1 parent 5a781f2 commit ff1b28e

File tree

2 files changed

+333
-201
lines changed

2 files changed

+333
-201
lines changed
Lines changed: 168 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,168 @@
1-
/*
2-
* Copyright 2013 Deutsche Nationalbibliothek
3-
*
4-
* Licensed under the Apache License, Version 2.0 the "License";
5-
* you may not use this file except in compliance with the License.
6-
* You may obtain a copy of the License at
7-
*
8-
* http://www.apache.org/licenses/LICENSE-2.0
9-
*
10-
* Unless required by applicable law or agreed to in writing, software
11-
* distributed under the License is distributed on an "AS IS" BASIS,
12-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
* See the License for the specific language governing permissions and
14-
* limitations under the License.
15-
*/
16-
package org.culturegraph.mf.stream.converter.bib;
17-
18-
import java.text.Normalizer;
19-
import java.text.Normalizer.Form;
20-
21-
import org.culturegraph.mf.framework.DefaultStreamPipe;
22-
23-
import org.culturegraph.mf.framework.DefaultStreamPipe;
24-
import org.culturegraph.mf.framework.ObjectReceiver;
25-
import org.culturegraph.mf.framework.StreamReceiver;
26-
import org.culturegraph.mf.framework.annotations.Description;
27-
import org.culturegraph.mf.framework.annotations.In;
28-
import org.culturegraph.mf.framework.annotations.Out;
29-
30-
31-
/**
32-
* Encodes an event stream in pica+ format.
33-
*
34-
* @see PicaEncoder
35-
*
36-
* @author Markus Michael Geipel, Christoph Böhme, Yining Li
37-
*
38-
*/
39-
@Description("Encodes a stream in pica+ Format")
40-
@In(StreamReceiver.class)
41-
@Out(String.class)
42-
public class PicaEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
43-
44-
private static final String FIELD_DELIMITER = "\u001e";
45-
private static final String SUB_DELIMITER = "\u001f";
46-
private static boolean idnControlSubField = false;
47-
private StringBuilder builder = new StringBuilder();
48-
private String idn="";
49-
50-
/**
51-
* For each field in the stream the method calls:
52-
* <ol>
53-
* <li>receiver.startEntity</li>
54-
* <li>receiver.literal for each subfield of the field</li>
55-
* <li>receiver.endEntity</li>
56-
* </ol>
57-
* Fields without any subfield will be skipped.<br>
58-
* <strong>Special handling of subfield 'S':</strong> the code of
59-
* "control subfields" (subfield name='S') will be appended to the
60-
* fieldName. E.g.: 041A $Saxx would be mapped to the fieldName 041Aa,
61-
* and xx will be ignored. A recovery of such field to original is not implemented.
62-
* So the encoder cannot identify a S-field. The S-field special processing
63-
* can be turn of if the call of decode with the option:
64-
* (appendcontrolsubfield="false")
65-
* which default is set to true.
66-
*
67-
* @param record
68-
*/
69-
@Override
70-
public final void startRecord(final String name) {
71-
// the name is a idn, which should be found in the encoded data under 003@.
72-
this.idn = name;
73-
}
74-
75-
public final boolean compareIdFromRecord(final String gndId) {
76-
if (this.idn.equals(gndId)) {
77-
idnControlSubField = false; //only test this context.
78-
return true;
79-
}
80-
throw new MissingIdException(gndId);
81-
}
82-
83-
84-
@Override
85-
public final void startEntity(final String name) {
86-
// Here begins a field (i.e. "028A ", which is given in the name.
87-
// It is unknown, whether there are any subfields in the field.
88-
builder.append(name.trim()+ " ");
89-
if (name.trim().equals("003@")) {
90-
//Time to check nid
91-
idnControlSubField = true;
92-
}else {
93-
//No check is necessary.
94-
idnControlSubField = false;
95-
}
96-
}
97-
98-
@Override
99-
public final void literal(final String name, final String value) {
100-
//
101-
final String value_new = Normalizer.normalize(value, Form.NFD);
102-
if (idnControlSubField == true){
103-
// it is a 003@ field, the same nid delivered with record should follow
104-
if (compareIdFromRecord(value)) idnControlSubField = false;
105-
}
106-
builder.append(SUB_DELIMITER);
107-
builder.append(name);
108-
builder.append(value_new);
109-
}
110-
111-
@Override
112-
public final void endEntity() {
113-
builder.append(FIELD_DELIMITER);
114-
}
115-
116-
@Override
117-
public final void endRecord() {
118-
getReceiver().process(builder.toString());
119-
builder = new StringBuilder();
120-
}
121-
122-
}
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.converter.bib;
17+
18+
import java.text.Normalizer;
19+
import java.text.Normalizer.Form;
20+
import java.util.regex.Matcher;
21+
import java.util.regex.Pattern;
22+
23+
import org.culturegraph.mf.exceptions.FormatException;
24+
import org.culturegraph.mf.framework.DefaultStreamPipe;
25+
26+
import org.culturegraph.mf.framework.DefaultStreamPipe;
27+
import org.culturegraph.mf.framework.ObjectReceiver;
28+
import org.culturegraph.mf.framework.StreamReceiver;
29+
import org.culturegraph.mf.framework.annotations.Description;
30+
import org.culturegraph.mf.framework.annotations.In;
31+
import org.culturegraph.mf.framework.annotations.Out;
32+
33+
34+
/**
35+
* Encodes an event stream in pica+ format.
36+
*
37+
* <strong>Special handling of subfield 'S':</strong> the code of
38+
* "control subfields" (subfield name='S') will be appended to the fieldName.
39+
* E.g.: 041A $Saxx would be mapped to the fieldName 041Aa, and xx will be
40+
* ignored. A recovery of such field to original is not implemented. So the
41+
* encoder cannot identify an S-field.
42+
* The S-field special processing can be turned on if the decoder is called
43+
* with the option: (appendcontrolsubfield="true")
44+
* The default value of this option is set to "false".
45+
*
46+
* @see PicaDecoder
47+
*
48+
* @author Yining Li
49+
*
50+
*/
51+
@Description("Encodes a stream in pica+ Format")
52+
@In(StreamReceiver.class)
53+
@Out(String.class)
54+
public final class PicaEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
55+
56+
private static final String FIELD_DELIMITER = "\u001e";
57+
private static final String SUB_DELIMITER = "\u001f";
58+
private boolean idnControlSubField;
59+
private boolean recordOpen;
60+
private boolean entityOpen;
61+
private StringBuilder builder = new StringBuilder();
62+
private String id="";
63+
64+
private static final String FIELD_NAME_PATTERN_STRING = "\\d{3}.(/..)?";
65+
private static final Pattern FIELD_NAME_PATTERN = Pattern.compile(FIELD_NAME_PATTERN_STRING);
66+
private boolean ignoreRecordId;
67+
68+
/**
69+
* For each field in the stream the method calls:
70+
* <ol>
71+
* <li>receiver.startEntity</li>
72+
* <li>receiver.literal for each subfield of the field</li>
73+
* <li>receiver.endEntity</li>
74+
* </ol>
75+
* Fields without any subfield will be skipped.<br>
76+
*
77+
* @param record
78+
*/
79+
@Override
80+
public void startRecord(final String recordId) {
81+
// the name is a idn, which should be found in the encoded data under 003@.
82+
//any rest of the previous record is cleared before the new begins.
83+
builder.setLength(0);
84+
this.id = recordId;
85+
//Now an entity can be opened. But no literal is allowed.
86+
this.recordOpen = true;
87+
this.entityOpen = false;
88+
}
89+
90+
public void setIgnoreRecordId(final boolean ignoreRecordId) {
91+
this.ignoreRecordId = ignoreRecordId;
92+
}
93+
94+
public boolean getIgnoreRecordId() {
95+
return this.ignoreRecordId;
96+
}
97+
98+
protected void compareIdFromRecord(final String recordId) {
99+
if (this.id.equals(recordId)) {
100+
idnControlSubField = false; //only test this context.
101+
return;
102+
}
103+
throw new MissingIdException(recordId);
104+
}
105+
106+
107+
@Override
108+
public void startEntity(final String name) {
109+
// Here begins a field (i.e. "028A ", which is given in the name.
110+
// It is unknown, whether there are any subfields in the field.
111+
final Matcher fieldNameMatcher = FIELD_NAME_PATTERN.matcher(name);
112+
if (fieldNameMatcher.find()) {
113+
builder.append(name.trim()+ " ");
114+
}
115+
else {
116+
throw new FormatException(name);
117+
}
118+
if (name.trim().equals("003@") && !getIgnoreRecordId()) {
119+
//Time to check record Id in the following subfield.
120+
idnControlSubField = true;
121+
}else {
122+
//No check is necessary.
123+
idnControlSubField = false;
124+
}
125+
//Now literals can be opened. But no entities are allowed.
126+
if (recordOpen)
127+
this.entityOpen = true;
128+
}
129+
130+
@Override
131+
public void literal(final String name, final String value) {
132+
//A Subfield has one character or digit exactly.
133+
if (name.length()!=1){
134+
throw new FormatException(name);
135+
} else if (!entityOpen){
136+
throw new FormatException(name); //new exceptions define!!!! tODo
137+
}
138+
final String valueNew = Normalizer.normalize(value, Form.NFD);
139+
if (idnControlSubField){
140+
// it is a 003@ field, the same record id delivered with record should follow
141+
compareIdFromRecord(value);
142+
}
143+
builder.append(SUB_DELIMITER);
144+
builder.append(name);
145+
builder.append(valueNew);
146+
}
147+
148+
@Override
149+
public void endEntity() {
150+
builder.append(FIELD_DELIMITER);
151+
//Now an entity can be opened. But no literal is allowed.
152+
this.entityOpen = false;
153+
}
154+
155+
@Override
156+
public void endRecord() {
157+
getReceiver().process(builder.toString());
158+
builder.setLength(0);
159+
//Now a record can be opened. But no literal and entity are allowed.
160+
this.recordOpen = false;
161+
this.entityOpen = false;
162+
}
163+
@Override
164+
protected void onResetStream() {
165+
builder.setLength(0);
166+
}
167+
168+
}

0 commit comments

Comments
 (0)