5
5
6
6
import org .metafacture .framework .FluxCommand ;
7
7
import org .metafacture .framework .MetafactureException ;
8
- import org .metafacture .framework .XmlReceiver ;
8
+ import org .metafacture .framework .ObjectReceiver ;
9
9
import org .metafacture .framework .annotations .Description ;
10
10
import org .metafacture .framework .annotations .In ;
11
11
import org .metafacture .framework .annotations .Out ;
12
12
import org .metafacture .framework .helpers .DefaultObjectPipe ;
13
- import org .xml . sax . InputSource ;
13
+ import org .w3c . dom . Document ;
14
14
import org .xml .sax .SAXException ;
15
- import org .xml .sax .SAXNotRecognizedException ;
16
- import org .xml .sax .SAXNotSupportedException ;
17
- import org .xml .sax .XMLReader ;
18
15
16
+ import java .io .ByteArrayInputStream ;
17
+ import java .io .ByteArrayOutputStream ;
19
18
import java .io .IOException ;
20
19
import java .io .InputStream ;
20
+ import java .io .InputStreamReader ;
21
+ import java .io .Reader ;
21
22
import java .net .HttpURLConnection ;
22
23
import java .net .URL ;
24
+ import javax .xml .parsers .DocumentBuilder ;
25
+ import javax .xml .parsers .DocumentBuilderFactory ;
23
26
import javax .xml .parsers .ParserConfigurationException ;
24
- import javax .xml .parsers .SAXParserFactory ;
27
+ import javax .xml .transform .Result ;
28
+ import javax .xml .transform .Transformer ;
29
+ import javax .xml .transform .TransformerException ;
30
+ import javax .xml .transform .TransformerFactory ;
31
+ import javax .xml .transform .dom .DOMSource ;
32
+ import javax .xml .transform .stream .StreamResult ;
25
33
26
34
/**
27
35
* Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver.
31
39
*/
32
40
@ Description ("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY." )
33
41
@ In (String .class )
34
- @ Out (XmlReceiver .class )
42
+ @ Out (java . io . Reader .class )
35
43
@ FluxCommand ("open-sru" )
36
- public final class SruOpener extends DefaultObjectPipe <String , XmlReceiver > {
44
+ public final class SruOpener extends DefaultObjectPipe <String , ObjectReceiver < Reader > > {
37
45
38
46
private static final String OPERATION = "searchRetrieve" ;
39
47
private static final String RECORD_SCHEMA = "MARC21-xml" ;
@@ -43,8 +51,6 @@ public final class SruOpener extends DefaultObjectPipe<String, XmlReceiver> {
43
51
private static final int CONNECTION_TIMEOUT = 11000 ;
44
52
private static final int MAXIMUM_RECORDS = 10 ;
45
53
private static final int START_RECORD = 1 ;
46
- private final XMLReader saxReader ;
47
-
48
54
private String operation = OPERATION ;
49
55
private String query ;
50
56
private String recordSchema = RECORD_SCHEMA ;
@@ -59,17 +65,9 @@ public final class SruOpener extends DefaultObjectPipe<String, XmlReceiver> {
59
65
60
66
61
67
/**
62
- * Creates an instance of {@link SruOpener}
68
+ * Default constructor
63
69
*/
64
70
public SruOpener () {
65
- try {
66
- final SAXParserFactory parserFactory = SAXParserFactory .newInstance ();
67
- parserFactory .setNamespaceAware (true );
68
- saxReader = parserFactory .newSAXParser ().getXMLReader ();
69
- }
70
- catch (final ParserConfigurationException | SAXException e ) {
71
- throw new MetafactureException (e );
72
- }
73
71
}
74
72
75
73
/**
@@ -160,24 +158,42 @@ public void process(final String baseUrl) {
160
158
throw new IllegalArgumentException ("Missing mandatory parameter 'query'" );
161
159
}
162
160
int retrievedRecords = 0 ;
163
- while (!stopRetrieving && (totalRecords == 0 || retrievedRecords < totalRecords )) {
164
- if (totalRecords >0 ) {
165
- int yetToRetrieveRecords = retrievedRecords - totalRecords ;
166
- if (yetToRetrieveRecords > maximumRecords ) {
161
+ while (!stopRetrieving && (retrievedRecords < totalRecords )) {
162
+ if (totalRecords >0 ) {
163
+ int yetToRetrieveRecords = totalRecords - retrievedRecords ;
164
+ if (yetToRetrieveRecords < maximumRecords ) {
167
165
maximumRecords = yetToRetrieveRecords ;
168
166
}
169
167
}
170
- retrieve (srUrl , startRecord ); //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr)
168
+ ByteArrayInputStream byteArrayInputStream = retrieve (srUrl , startRecord , maximumRecords );
169
+
170
+ TransformerFactory tf = TransformerFactory .newInstance ();
171
+ Transformer t = tf .newTransformer ();
172
+ DocumentBuilderFactory factory =DocumentBuilderFactory .newInstance ();
173
+ DocumentBuilder docBuilder = factory .newDocumentBuilder ();
174
+ Document xmldoc = docBuilder .parse (byteArrayInputStream );
175
+
176
+ ByteArrayOutputStream os = new ByteArrayOutputStream ();
177
+ Result result = new StreamResult (os );
178
+ t .transform (new DOMSource (xmldoc ), result );
179
+
180
+ ByteArrayInputStream inputStream = new ByteArrayInputStream (os .toByteArray ());
181
+
182
+ getReceiver ().process (
183
+ new InputStreamReader (inputStream ));
184
+ t .setOutputProperty ("omit-xml-declaration" , "yes" );
185
+ //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr)
171
186
startRecord = startRecord + maximumRecords ;
172
187
retrievedRecords = retrievedRecords + maximumRecords ;
173
188
}
174
189
}
175
- catch (final IOException e ) {
190
+ catch (final IOException | TransformerException | SAXException | ParserConfigurationException e ) {
176
191
throw new MetafactureException (e );
177
192
}
193
+
178
194
}
179
195
180
- private void retrieve (StringBuilder srUrl , int startRecord ) throws IOException {
196
+ private ByteArrayInputStream retrieve (StringBuilder srUrl , int startRecord , int maximumRecords ) throws IOException {
181
197
final URL urlToOpen = new URL (srUrl .toString () + "&maximumRecords=" + maximumRecords +"&startRecord=" + startRecord );
182
198
final HttpURLConnection connection = (HttpURLConnection ) urlToOpen .openConnection ();
183
199
@@ -186,22 +202,16 @@ private void retrieve(StringBuilder srUrl, int startRecord) throws IOException {
186
202
connection .setRequestProperty ("User-Agent" , userAgent );
187
203
}
188
204
InputStream inputStream = getInputStream (connection );
189
- try {
190
- InputSource inputSource = new InputSource (inputStream );
191
- saxReader .parse (inputSource );
192
- // String sr = saxReader.getProperty("huhu").toString();
193
- // System.out.println(sr);
194
- }
195
- catch (final IOException | SAXException e ) {
196
- throw new MetafactureException (e );
197
- }
205
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream ();
206
+
198
207
System .out .println ("srUrl=" +srUrl );
199
208
System .out .println ("startRecord=" +startRecord );
200
209
System .out .println ("istream.length=" +inputStream .available ());
201
210
if (inputStream .available () < 768 ){ // we take it that this is a result without a record
202
211
stopRetrieving = true ;
203
212
}
204
- // getReceiver().process(saxReader);
213
+ inputStream .transferTo (outputStream );
214
+ return new ByteArrayInputStream (outputStream .toByteArray ());
205
215
}
206
216
207
217
private InputStream getInputStream (final HttpURLConnection connection ) {
@@ -214,19 +224,4 @@ private InputStream getInputStream(final HttpURLConnection connection) {
214
224
}
215
225
}
216
226
217
- private static final String SAX_PROPERTY_LEXICAL_HANDLER = "http://xml.org/sax/properties/lexical-handler" ;
218
- @ Override
219
- protected void onSetReceiver () {
220
- saxReader .setContentHandler (getReceiver ());
221
- saxReader .setDTDHandler (getReceiver ());
222
- saxReader .setEntityResolver (getReceiver ());
223
- saxReader .setErrorHandler (getReceiver ());
224
- try {
225
- saxReader .setProperty (SAX_PROPERTY_LEXICAL_HANDLER , getReceiver ());
226
- }
227
- catch (final SAXNotRecognizedException | SAXNotSupportedException e ) {
228
- throw new MetafactureException (e );
229
- }
230
- }
231
-
232
227
}
0 commit comments