Skip to content

Commit 26640fa

Browse files
committed
WIP and working
1 parent c3f3ad6 commit 26640fa

File tree

2 files changed

+66
-90
lines changed

2 files changed

+66
-90
lines changed

metafacture-io/src/main/java/org/metafacture/io/SruOpener.java

Lines changed: 46 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,31 @@
55

66
import org.metafacture.framework.FluxCommand;
77
import org.metafacture.framework.MetafactureException;
8-
import org.metafacture.framework.XmlReceiver;
8+
import org.metafacture.framework.ObjectReceiver;
99
import org.metafacture.framework.annotations.Description;
1010
import org.metafacture.framework.annotations.In;
1111
import org.metafacture.framework.annotations.Out;
1212
import org.metafacture.framework.helpers.DefaultObjectPipe;
13-
import org.xml.sax.InputSource;
13+
import org.w3c.dom.Document;
1414
import org.xml.sax.SAXException;
15-
import org.xml.sax.SAXNotRecognizedException;
16-
import org.xml.sax.SAXNotSupportedException;
17-
import org.xml.sax.XMLReader;
1815

16+
import java.io.ByteArrayInputStream;
17+
import java.io.ByteArrayOutputStream;
1918
import java.io.IOException;
2019
import java.io.InputStream;
20+
import java.io.InputStreamReader;
21+
import java.io.Reader;
2122
import java.net.HttpURLConnection;
2223
import java.net.URL;
24+
import javax.xml.parsers.DocumentBuilder;
25+
import javax.xml.parsers.DocumentBuilderFactory;
2326
import javax.xml.parsers.ParserConfigurationException;
24-
import javax.xml.parsers.SAXParserFactory;
27+
import javax.xml.transform.Result;
28+
import javax.xml.transform.Transformer;
29+
import javax.xml.transform.TransformerException;
30+
import javax.xml.transform.TransformerFactory;
31+
import javax.xml.transform.dom.DOMSource;
32+
import javax.xml.transform.stream.StreamResult;
2533

2634
/**
2735
* Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver.
@@ -31,9 +39,9 @@
3139
*/
3240
@Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.")
3341
@In(String.class)
34-
@Out(XmlReceiver.class)
42+
@Out(java.io.Reader.class)
3543
@FluxCommand("open-sru")
36-
public final class SruOpener extends DefaultObjectPipe<String, XmlReceiver> {
44+
public final class SruOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> {
3745

3846
private static final String OPERATION = "searchRetrieve";
3947
private static final String RECORD_SCHEMA = "MARC21-xml";
@@ -43,8 +51,6 @@ public final class SruOpener extends DefaultObjectPipe<String, XmlReceiver> {
4351
private static final int CONNECTION_TIMEOUT = 11000;
4452
private static final int MAXIMUM_RECORDS = 10;
4553
private static final int START_RECORD = 1;
46-
private final XMLReader saxReader;
47-
4854
private String operation = OPERATION;
4955
private String query;
5056
private String recordSchema = RECORD_SCHEMA;
@@ -59,17 +65,9 @@ public final class SruOpener extends DefaultObjectPipe<String, XmlReceiver> {
5965

6066

6167
/**
62-
* Creates an instance of {@link SruOpener}
68+
* Default constructor
6369
*/
6470
public SruOpener() {
65-
try {
66-
final SAXParserFactory parserFactory = SAXParserFactory.newInstance();
67-
parserFactory.setNamespaceAware(true);
68-
saxReader = parserFactory.newSAXParser().getXMLReader();
69-
}
70-
catch (final ParserConfigurationException | SAXException e) {
71-
throw new MetafactureException(e);
72-
}
7371
}
7472

7573
/**
@@ -160,24 +158,42 @@ public void process(final String baseUrl) {
160158
throw new IllegalArgumentException("Missing mandatory parameter 'query'");
161159
}
162160
int retrievedRecords = 0;
163-
while (!stopRetrieving && (totalRecords==0 || retrievedRecords < totalRecords)) {
164-
if (totalRecords >0) {
165-
int yetToRetrieveRecords = retrievedRecords - totalRecords;
166-
if (yetToRetrieveRecords > maximumRecords) {
161+
while (!stopRetrieving && (retrievedRecords < totalRecords)) {
162+
if (totalRecords >0) {
163+
int yetToRetrieveRecords = totalRecords - retrievedRecords;
164+
if (yetToRetrieveRecords < maximumRecords) {
167165
maximumRecords = yetToRetrieveRecords;
168166
}
169167
}
170-
retrieve(srUrl, startRecord); //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr)
168+
ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords);
169+
170+
TransformerFactory tf = TransformerFactory.newInstance();
171+
Transformer t = tf.newTransformer();
172+
DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance();
173+
DocumentBuilder docBuilder = factory.newDocumentBuilder();
174+
Document xmldoc = docBuilder.parse(byteArrayInputStream);
175+
176+
ByteArrayOutputStream os = new ByteArrayOutputStream();
177+
Result result = new StreamResult(os);
178+
t.transform(new DOMSource(xmldoc), result);
179+
180+
ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray());
181+
182+
getReceiver().process(
183+
new InputStreamReader(inputStream));
184+
t.setOutputProperty("omit-xml-declaration", "yes");
185+
//todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr)
171186
startRecord = startRecord + maximumRecords;
172187
retrievedRecords = retrievedRecords + maximumRecords;
173188
}
174189
}
175-
catch (final IOException e) {
190+
catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) {
176191
throw new MetafactureException(e);
177192
}
193+
178194
}
179195

180-
private void retrieve(StringBuilder srUrl, int startRecord) throws IOException {
196+
private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException {
181197
final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords+"&startRecord=" + startRecord);
182198
final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection();
183199

@@ -186,22 +202,16 @@ private void retrieve(StringBuilder srUrl, int startRecord) throws IOException {
186202
connection.setRequestProperty("User-Agent", userAgent);
187203
}
188204
InputStream inputStream = getInputStream(connection);
189-
try {
190-
InputSource inputSource = new InputSource(inputStream);
191-
saxReader.parse(inputSource);
192-
// String sr = saxReader.getProperty("huhu").toString();
193-
// System.out.println(sr);
194-
}
195-
catch (final IOException | SAXException e) {
196-
throw new MetafactureException(e);
197-
}
205+
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
206+
198207
System.out.println("srUrl="+srUrl);
199208
System.out.println("startRecord="+startRecord);
200209
System.out.println("istream.length="+inputStream.available());
201210
if (inputStream.available() < 768){ // we take it that this is a result without a record
202211
stopRetrieving = true;
203212
}
204-
// getReceiver().process(saxReader);
213+
inputStream.transferTo(outputStream);
214+
return new ByteArrayInputStream(outputStream.toByteArray());
205215
}
206216

207217
private InputStream getInputStream(final HttpURLConnection connection) {
@@ -214,19 +224,4 @@ private InputStream getInputStream(final HttpURLConnection connection) {
214224
}
215225
}
216226

217-
private static final String SAX_PROPERTY_LEXICAL_HANDLER = "http://xml.org/sax/properties/lexical-handler";
218-
@Override
219-
protected void onSetReceiver() {
220-
saxReader.setContentHandler(getReceiver());
221-
saxReader.setDTDHandler(getReceiver());
222-
saxReader.setEntityResolver(getReceiver());
223-
saxReader.setErrorHandler(getReceiver());
224-
try {
225-
saxReader.setProperty(SAX_PROPERTY_LEXICAL_HANDLER, getReceiver());
226-
}
227-
catch (final SAXNotRecognizedException | SAXNotSupportedException e) {
228-
throw new MetafactureException(e);
229-
}
230-
}
231-
232227
}
Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
11
package org.metafacture.io;
22

33
import org.junit.Test;
4-
import org.metafacture.formatting.StreamLiteralFormatter;
5-
import org.metafacture.framework.ObjectReceiver;
6-
import org.metafacture.framework.XmlReceiver;
7-
import org.metafacture.xml.XmlDecoder;
8-
import org.metafacture.xml.XmlElementSplitter;
9-
import org.xml.sax.XMLReader;
10-
11-
import java.io.BufferedReader;
12-
import java.io.IOException;
13-
import java.io.Reader;
144

155
public class SruOpenerTest {
166

@@ -21,12 +11,14 @@ public class SruOpenerTest {
2111
@Test
2212
public void test(){
2313
SruOpener sruOpener = new SruOpener();
24-
sruOpener.setReceiver(new ObjectReceiver<XmlReceiver> () {
14+
RecordReader recordReader = new RecordReader();
15+
recordReader.setReceiver(new ObjectStdoutWriter<String>());
16+
sruOpener.setReceiver(recordReader);// {
2517

2618

27-
@Override
19+
/* @Override
2820
public void process(final XmlReceiver obj) {
29-
/* BufferedReader in = new BufferedReader(obj);
21+
BufferedReader in = new BufferedReader(obj);
3022
String line = null;
3123
StringBuilder rslt = new StringBuilder();
3224
while (true) {
@@ -38,38 +30,27 @@ public void process(final XmlReceiver obj) {
3830
}
3931
rslt.append(line);
4032
}*/
41-
StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter();
33+
/* StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter();
4234
ObjectStdoutWriter<String> objectStdoutWriter = new ObjectStdoutWriter<String>();
4335
XmlElementSplitter xmlElementSplitter = new XmlElementSplitter();
4436
streamLiteralFormatter.setReceiver(objectStdoutWriter);
4537
xmlElementSplitter.setReceiver(streamLiteralFormatter);
46-
xmlDecoder.setReceiver(xmlElementSplitter);
47-
48-
49-
// System.out.println(rslt.toString());
50-
resultCollector.append(obj);
51-
}
52-
53-
@Override
54-
public void resetStream() {
55-
++resultCollectorsResetStreamCount;
56-
}
38+
xmlDecoder.setReceiver(xmlElementSplitter);*/
39+
// System.out.println(rslt.toString());
40+
// resultCollector.append(obj);
41+
//}
5742

58-
@Override
59-
public void closeStream() {
60-
61-
}
62-
});
63-
64-
// sruOpener.setQuery("dnb.isil%3DDE-Sol1");
65-
sruOpener.setQuery("WVN%3D24A05");
43+
sruOpener.setQuery("dnb.isil%3DDE-Sol1");
44+
// sruOpener.setQuery("WVN%3D24A05");
6645
sruOpener.setRecordSchema("MARC21plus-xml");
6746
sruOpener.setVersion("1.1");
68-
sruOpener.setStartRecord("1890");
69-
// sruOpener.process("https://services.dnb.de/sru/dnb");
70-
sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/");
71-
72-
System.out.println(resultCollector.toString());
73-
System.out.println(resultCollector.toString());
47+
// sruOpener.setStartRecord("5");
48+
sruOpener.setMaximumRecords("5");
49+
sruOpener.setTotal("6");
50+
// sruOpener.process("https://services.dnb.de/sru/dnb");
51+
sruOpener.process("https://services.dnb.de/sru/zdb");
52+
// sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/");
53+
54+
// System.out.println(resultCollector.toString());
7455
}
7556
}

0 commit comments

Comments
 (0)