Skip to content

Commit 96f36a5

Browse files
committed
Add harvester2 package from https://github.com/hbz/oai-harvester2
See #360
1 parent e2c7643 commit 96f36a5

File tree

11 files changed

+1105
-5
lines changed

11 files changed

+1105
-5
lines changed

metafacture-biblio/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ dependencies {
2121
api project(':metafacture-framework')
2222
implementation project(':metafacture-commons')
2323
implementation project(':metafacture-flowcontrol')
24-
implementation 'org.dspace:oclc-harvester2:0.1.12'
2524
implementation ('xalan:xalan:2.7.0') {
2625
exclude group: 'xalan', module: 'serializer'
2726
exclude group: 'xercesImpl', module: 'xercesImpl'
2827
exclude group: 'xml-apis', module: 'xml-apis'
2928
}
3029
implementation 'log4j:log4j:1.2.12'
30+
implementation 'org.slf4j:slf4j-api:1.7.7'
3131
testImplementation 'junit:junit:4.12'
3232
testImplementation 'org.mockito:mockito-core:2.5.5'
3333
}

metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@
1111

1212
import javax.xml.parsers.ParserConfigurationException;
1313
import javax.xml.transform.TransformerException;
14+
import javax.xml.xpath.XPathException;
1415

1516
import org.metafacture.framework.MetafactureException;
1617
import org.metafacture.framework.ObjectReceiver;
1718
import org.metafacture.framework.annotations.Description;
1819
import org.metafacture.framework.annotations.In;
1920
import org.metafacture.framework.annotations.Out;
2021
import org.metafacture.framework.helpers.DefaultObjectPipe;
22+
import org.oclc.oai.harvester2.app.RawWrite;
2123
import org.xml.sax.SAXException;
2224

23-
import ORG.oclc.oai.harvester2.app.RawWrite;
24-
2525
/**
2626
* Opens an OAI-PMH stream and passes a reader to the receiver.
2727
*
@@ -111,10 +111,10 @@ public void process(final String baseUrl) {
111111
e.printStackTrace();
112112
} catch (SAXException e) {
113113
e.printStackTrace();
114-
} catch (TransformerException e) {
115-
e.printStackTrace();
116114
} catch (NoSuchFieldException e) {
117115
e.printStackTrace();
116+
} catch (XPathException e) {
117+
e.printStackTrace();
118118
}
119119
try {
120120
getReceiver().process(
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/**
2+
* Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the
3+
* "License"); you may not use this file except in compliance with the License. You may obtain a copy of the
4+
* License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software distributed under the License is
9+
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and limitations under the License.
11+
*/
12+
package org.oclc.oai.harvester2.app;
13+
14+
import java.io.*;
15+
import java.lang.NoSuchFieldException;
16+
import java.util.ArrayList;
17+
import java.util.Date;
18+
import java.util.List;
19+
import java.util.HashMap;
20+
import javax.xml.parsers.ParserConfigurationException;
21+
import javax.xml.xpath.XPathException;
22+
import javax.xml.xpath.XPathExpressionException;
23+
import org.oclc.oai.harvester2.verb.*;
24+
import org.w3c.dom.Node;
25+
import org.w3c.dom.NodeList;
26+
import org.xml.sax.SAXException;
27+
28+
public class RawWrite {
29+
30+
public static void main(String[] args) {
31+
try {
32+
System.out.println(new Date());
33+
34+
HashMap options = getOptions(args);
35+
List rootArgs = (List) options.get("rootArgs");
36+
String baseURL = null;
37+
if (rootArgs.size() > 0) {
38+
baseURL = (String) rootArgs.get(0);
39+
} else {
40+
throw new IllegalArgumentException();
41+
}
42+
43+
OutputStream out = System.out;
44+
String outFileName = (String) options.get("-out");
45+
String from = (String) options.get("-from");
46+
String until = (String) options.get("-until");
47+
String metadataPrefix = (String) options.get("-metadataPrefix");
48+
if (metadataPrefix == null) metadataPrefix = "oai_dc";
49+
String resumptionToken = (String) options.get("-resumptionToken");
50+
String setSpec = (String) options.get("-setSpec");
51+
52+
if (resumptionToken != null) {
53+
if (outFileName != null)
54+
out = new FileOutputStream(outFileName, true);
55+
run(baseURL, resumptionToken, out);
56+
} else {
57+
if (outFileName != null)
58+
out = new FileOutputStream(outFileName);
59+
run(baseURL, from, until, metadataPrefix, setSpec, out);
60+
}
61+
62+
if (out != System.out) out.close();
63+
System.out.println(new Date());
64+
} catch (IllegalArgumentException e) {
65+
System.err.println("RawWrite <-from date> <-until date> <-metadataPrefix prefix> <-setSpec setName> <-resumptionToken token> <-out fileName> baseURL");
66+
} catch (Exception e) {
67+
e.printStackTrace();
68+
System.exit(-1);
69+
}
70+
}
71+
72+
public static void run(String baseURL, String resumptionToken,
73+
OutputStream out)
74+
throws IOException, ParserConfigurationException, SAXException, XPathExpressionException,
75+
NoSuchFieldException {
76+
ListRecords listRecords = new ListRecords(baseURL, resumptionToken);
77+
while (listRecords != null) {
78+
NodeList errors = listRecords.getErrors();
79+
if (errors != null && errors.getLength() > 0) {
80+
System.out.println("Found errors");
81+
int length = errors.getLength();
82+
for (int i = 0; i < length; ++i) {
83+
Node item = errors.item(i);
84+
System.out.println(item);
85+
}
86+
System.out.println("Error record: " + listRecords.toString());
87+
break;
88+
}
89+
// System.out.println(listRecords);
90+
out.write(listRecords.toString().getBytes("UTF-8"));
91+
out.write("\n".getBytes("UTF-8"));
92+
resumptionToken = listRecords.getResumptionToken();
93+
System.out.println("resumptionToken: " + resumptionToken);
94+
if (resumptionToken == null || resumptionToken.length() == 0) {
95+
listRecords = null;
96+
} else {
97+
listRecords = new ListRecords(baseURL, resumptionToken);
98+
}
99+
}
100+
out.write("</harvest>\n".getBytes("UTF-8"));
101+
}
102+
103+
public static void run(String baseURL, String from, String until,
104+
String metadataPrefix, String setSpec,
105+
OutputStream out)
106+
throws IOException, ParserConfigurationException, SAXException, XPathException,
107+
NoSuchFieldException {
108+
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".getBytes("UTF-8"));
109+
out.write("<harvest>\n".getBytes("UTF-8"));
110+
out.write(new Identify(baseURL).toString().getBytes("UTF-8"));
111+
out.write("\n".getBytes("UTF-8"));
112+
out.write(new ListMetadataFormats(baseURL).toString().getBytes("UTF-8"));
113+
out.write("\n".getBytes("UTF-8"));
114+
ListSets listSets = new ListSets(baseURL);
115+
while (listSets != null) {
116+
out.write(listSets.toString().getBytes("UTF-8"));
117+
out.write("\n".getBytes("UTF-8"));
118+
String resumptionToken = listSets.getResumptionToken();
119+
System.out.println("resumptionToken: " + resumptionToken);
120+
if (resumptionToken == null || resumptionToken.length() == 0) {
121+
listSets = null;
122+
} else {
123+
listSets = new ListSets(baseURL, resumptionToken);
124+
}
125+
}
126+
ListRecords listRecords = new ListRecords(baseURL, from, until, setSpec,
127+
metadataPrefix);
128+
while (listRecords != null) {
129+
NodeList errors = listRecords.getErrors();
130+
if (errors != null && errors.getLength() > 0) {
131+
System.out.println("Found errors");
132+
int length = errors.getLength();
133+
for (int i = 0; i < length; ++i) {
134+
Node item = errors.item(i);
135+
System.out.println(item);
136+
}
137+
System.out.println("Error record: " + listRecords.toString());
138+
break;
139+
}
140+
// System.out.println(listRecords);
141+
out.write(listRecords.toString().getBytes("UTF-8"));
142+
out.write("\n".getBytes("UTF-8"));
143+
String resumptionToken = listRecords.getResumptionToken();
144+
System.out.println("resumptionToken: " + resumptionToken);
145+
if (resumptionToken == null || resumptionToken.length() == 0) {
146+
listRecords = null;
147+
} else {
148+
listRecords = new ListRecords(baseURL, resumptionToken);
149+
}
150+
}
151+
out.write("</harvest>\n".getBytes("UTF-8"));
152+
}
153+
154+
private static HashMap getOptions(String[] args) {
155+
HashMap options = new HashMap();
156+
ArrayList rootArgs = new ArrayList();
157+
options.put("rootArgs", rootArgs);
158+
159+
for (int i = 0; i < args.length; ++i) {
160+
if (args[i].charAt(0) != '-') {
161+
rootArgs.add(args[i]);
162+
} else if (i + 1 < args.length) {
163+
options.put(args[i], args[++i]);
164+
} else {
165+
throw new IllegalArgumentException();
166+
}
167+
}
168+
return options;
169+
}
170+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the
3+
* "License"); you may not use this file except in compliance with the License. You may obtain a copy of the
4+
* License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software distributed under the License is
9+
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and limitations under the License.
11+
*/
12+
package org.oclc.oai.harvester2.verb;
13+
14+
import java.io.IOException;
15+
import java.net.MalformedURLException;
16+
import javax.xml.parsers.ParserConfigurationException;
17+
import javax.xml.xpath.XPathExpressionException;
18+
import org.xml.sax.SAXException;
19+
20+
/**
21+
* This class represents an GetRecord response on either the server or on the client
22+
*
23+
* @author Jeffrey A. Young, OCLC Online Computer Library Center
24+
*/
25+
public class GetRecord extends HarvesterVerb {
26+
27+
/**
28+
* Mock object constructor (for unit testing purposes)
29+
*/
30+
public GetRecord() {
31+
super();
32+
}
33+
34+
/**
35+
* Client-side GetRecord verb constructor
36+
*
37+
* @param baseURL the baseURL of the server to be queried
38+
* @param identifier
39+
* @param metadataPrefix
40+
* @exception MalformedURLException the baseURL is bad
41+
* @exception SAXException the xml response is bad
42+
* @exception IOException an I/O error occurred
43+
* @throws ParserConfigurationException
44+
* @throws XPathExpressionException
45+
*/
46+
public GetRecord(String baseURL, String identifier, String metadataPrefix)
47+
throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
48+
super(getRequestURL(baseURL, identifier, metadataPrefix));
49+
}
50+
51+
/**
52+
* Get the oai:identifier from the oai:header
53+
*
54+
* @return the oai:identifier as a String
55+
* @throws XPathExpressionException
56+
* @throws NoSuchFieldException
57+
*/
58+
public String getIdentifier() throws XPathExpressionException, NoSuchFieldException {
59+
if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) {
60+
return getSingleString("/oai20:OAI-PMH/oai20:GetRecord/oai20:record/oai20:header/oai20:identifier");
61+
} else if (SCHEMA_LOCATION_V1_1_GET_RECORD.equals(getSchemaLocation())) {
62+
return getSingleString("/oai11_GetRecord:GetRecord/oai11_GetRecord:record/oai11_GetRecord:header/oai11_GetRecord:identifier");
63+
} else {
64+
throw new NoSuchFieldException(getSchemaLocation());
65+
}
66+
}
67+
68+
/**
69+
* Construct the query portion of the http request
70+
*
71+
* @return a String containing the query portion of the http request
72+
*/
73+
private static String getRequestURL(String baseURL, String identifier, String metadataPrefix) {
74+
StringBuilder requestURL = new StringBuilder(baseURL);
75+
requestURL.append("?verb=GetRecord");
76+
requestURL.append("&identifier=").append(identifier);
77+
requestURL.append("&metadataPrefix=").append(metadataPrefix);
78+
return requestURL.toString();
79+
}
80+
}

0 commit comments

Comments
 (0)