16
16
require 'oai/client/metadata_format'
17
17
require 'oai/client/response'
18
18
require 'oai/client/header'
19
- require 'oai/client/record'
19
+ require 'oai/client/record'
20
20
require 'oai/client/identify'
21
21
require 'oai/client/get_record'
22
22
require 'oai/client/list_identifiers'
27
27
module OAI
28
28
29
29
# A OAI::Client provides a client api for issuing OAI-PMH verbs against
30
- # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
30
+ # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
31
31
# can call on a OAI::Client object. Verb arguments are passed as a hash:
32
32
#
33
33
# client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -36,9 +36,9 @@ module OAI
36
36
# puts identifier
37
37
# end
38
38
#
39
- # It is worth noting that the api uses methods and parameter names with
40
- # underscores in them rather than studly caps. So above list_identifiers
41
- # and metadata_prefix are used instead of the listIdentifiers and
39
+ # It is worth noting that the api uses methods and parameter names with
40
+ # underscores in them rather than studly caps. So above list_identifiers
41
+ # and metadata_prefix are used instead of the listIdentifiers and
42
42
# metadataPrefix used in the OAI-PMH specification.
43
43
#
44
44
# Also, the from and until arguments which specify dates should be passed
@@ -49,10 +49,10 @@ module OAI
49
49
# the OAI-PMH docs at:
50
50
#
51
51
# http://www.openarchives.org/OAI/openarchivesprotocol.html
52
-
52
+
53
53
class Client
54
54
55
- # The constructor which must be passed a valid base url for an oai
55
+ # The constructor which must be passed a valid base url for an oai
56
56
# service:
57
57
#
58
58
# client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -67,15 +67,15 @@ class Client
67
67
# back XML::Node objects
68
68
#
69
69
# client = OAI::Client.new 'http://example.com', :parser => 'libxml'
70
- #
71
- # You can configure the Faraday HTTP client by providing an alternate
70
+ #
71
+ # You can configure the Faraday HTTP client by providing an alternate
72
72
# Faraday instance:
73
73
#
74
74
# client = OAI::Client.new 'http://example.com', :http => Faraday.new { |c| }
75
75
#
76
76
# === HIGH PERFORMANCE
77
77
#
78
- # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
78
+ # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
79
79
# use the :parser option when you construct your OAI::Client.
80
80
#
81
81
def initialize ( base_url , options = { } )
@@ -94,7 +94,7 @@ def initialize(base_url, options={})
94
94
require 'faraday_middleware'
95
95
@http_client . use FaradayMiddleware ::FollowRedirects , :limit => count
96
96
end
97
-
97
+
98
98
# load appropriate parser
99
99
case @parser
100
100
when 'libxml'
@@ -113,33 +113,33 @@ def initialize(base_url, options={})
113
113
end
114
114
115
115
# Equivalent to a Identify request. You'll get back a OAI::IdentifyResponse
116
- # object which is essentially just a wrapper around a REXML::Document
117
- # for the response. If you created your client using the libxml
116
+ # object which is essentially just a wrapper around a REXML::Document
117
+ # for the response. If you created your client using the libxml
118
118
# parser then you will get an XML::Node object instead.
119
-
119
+
120
120
def identify
121
121
return OAI ::IdentifyResponse . new ( do_request ( 'Identify' ) )
122
122
end
123
123
124
124
# Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
125
- # object is returned to you.
126
-
125
+ # object is returned to you.
126
+
127
127
def list_metadata_formats ( opts = { } )
128
128
return OAI ::ListMetadataFormatsResponse . new ( do_request ( 'ListMetadataFormats' , opts ) )
129
129
end
130
130
131
131
# Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
132
- # as Date or DateTime objects as appropriate depending on the granularity
132
+ # as Date or DateTime objects as appropriate depending on the granularity
133
133
# supported by the server.
134
-
134
+
135
135
def list_identifiers ( opts = { } )
136
- return OAI ::ListIdentifiersResponse . new ( do_request ( 'ListIdentifiers' , opts ) )
136
+ return OAI ::ListIdentifiersResponse . new ( do_request ( 'ListIdentifiers' , opts ) )
137
137
end
138
138
139
- # Equivalent to a GetRecord request. You must supply an identifier
139
+ # Equivalent to a GetRecord request. You must supply an identifier
140
140
# argument. You should get back a OAI::GetRecordResponse object
141
141
# which you can extract a OAI::Record object from.
142
-
142
+
143
143
def get_record ( opts = { } )
144
144
return OAI ::GetRecordResponse . new ( do_request ( 'GetRecord' , opts ) )
145
145
end
@@ -150,47 +150,47 @@ def get_record(opts={})
150
150
# for record in client.list_records
151
151
# puts record.metadata
152
152
# end
153
-
153
+
154
154
def list_records ( opts = { } )
155
155
return OAI ::ListRecordsResponse . new ( do_request ( 'ListRecords' , opts ) )
156
156
end
157
157
158
158
# Equivalent to the ListSets request. A ListSetsResponse object
159
- # will be returned which you can use for iterating through the
159
+ # will be returned which you can use for iterating through the
160
160
# OAI::Set objects
161
161
#
162
162
# for set in client.list_sets
163
163
# puts set
164
164
# end
165
-
165
+
166
166
def list_sets ( opts = { } )
167
167
return OAI ::ListSetsResponse . new ( do_request ( 'ListSets' , opts ) )
168
168
end
169
169
170
- private
170
+ private
171
171
172
172
def do_request ( verb , opts = nil )
173
173
# fire off the request and return appropriate DOM object
174
174
uri = build_uri ( verb , opts )
175
175
xml = strip_invalid_utf_8_chars ( get ( uri ) )
176
- if @parser == 'libxml'
176
+ if @parser == 'libxml'
177
177
# remove default namespace for oai-pmh since libxml
178
- # isn't able to use our xpaths to get at them
178
+ # isn't able to use our xpaths to get at them
179
179
# if you know a way around thins please let me know
180
180
xml = xml . gsub (
181
- /xmlns=\" http:\/ \/ www.openarchives.org\/ OAI\/ .\. .\/ \" / , '' )
181
+ /xmlns=\" http:\/ \/ www.openarchives.org\/ OAI\/ .\. .\/ \" / , '' )
182
182
end
183
183
return load_document ( xml )
184
184
end
185
-
185
+
186
186
def build_uri ( verb , opts )
187
187
opts = validate_options ( verb , opts )
188
188
uri = @base . clone
189
189
uri . query = "verb=" << verb
190
190
opts . each_pair { |k , v | uri . query << '&' << externalize ( k ) << '=' << encode ( v ) }
191
191
uri
192
192
end
193
-
193
+
194
194
def encode ( value )
195
195
return CGI . escape ( value ) unless value . respond_to? ( :strftime )
196
196
if value . kind_of? ( DateTime )
@@ -229,28 +229,28 @@ def get(uri)
229
229
def debug ( msg )
230
230
$stderr. print ( "#{ msg } \n " ) if @debug
231
231
end
232
-
232
+
233
233
# Massage the standard OAI options to make them a bit more palatable.
234
234
def validate_options ( verb , opts = { } )
235
235
raise OAI ::VerbException . new unless Const ::VERBS . keys . include? ( verb )
236
236
237
237
return { } if opts . nil?
238
238
239
239
raise OAI ::ArgumentException . new unless opts . respond_to? ( :keys )
240
-
240
+
241
241
realopts = { }
242
242
# Internalize the hash
243
243
opts . keys . each do |key |
244
244
realopts [ key . to_s . gsub ( /([A-Z])/ , '_\1' ) . downcase . intern ] = opts . delete ( key )
245
245
end
246
-
246
+
247
247
return realopts if is_resumption? ( realopts )
248
-
248
+
249
249
# add in a default metadataPrefix if none exists
250
250
if ( Const ::VERBS [ verb ] . include? ( :metadata_prefix ) )
251
251
realopts [ :metadata_prefix ] ||= 'oai_dc'
252
252
end
253
-
253
+
254
254
# Convert date formated strings in dates.
255
255
#realopts[:from] = parse_date(realopts[:from]) if realopts[:from]
256
256
#realopts[:until] = parse_date(realopts[:until]) if realopts[:until]
@@ -261,43 +261,43 @@ def validate_options(verb, opts = {})
261
261
end
262
262
realopts
263
263
end
264
-
264
+
265
265
def is_resumption? ( opts )
266
- if opts . keys . include? ( :resumption_token )
266
+ if opts . keys . include? ( :resumption_token )
267
267
return true if 1 == opts . keys . size
268
268
raise OAI ::ArgumentException . new
269
269
end
270
270
end
271
-
271
+
272
272
# Convert our internal representations back into standard OAI options
273
273
def externalize ( value )
274
274
value . to_s . gsub ( /_[a-z]/ ) { |m | m . sub ( "_" , '' ) . capitalize }
275
275
end
276
-
276
+
277
277
def parse_date ( value )
278
278
return value if value . respond_to? ( :strftime )
279
-
279
+
280
280
Date . parse ( value ) # This will raise an exception for badly formatted dates
281
281
Time . parse ( value ) . utc # Sadly, this will not
282
282
rescue
283
- raise OAI ::ArgumentError . new
283
+ raise OAI ::ArgumentError . new
284
284
end
285
-
285
+
286
286
# Strip out invalid UTF-8 characters. Regex from the W3C, inverted.
287
287
# http://www.w3.org/International/questions/qa-forms-utf-8.en.php
288
288
#
289
- # Regex is from WebCollab:
289
+ # Regex is from WebCollab:
290
290
# http://webcollab.sourceforge.net/unicode.html
291
291
def strip_invalid_utf_8_chars ( xml )
292
- simple_bytes = xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
292
+ xml && xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
293
293
| [\x00 -\x7F ][\x80 -\xBF ]+
294
294
| ([\xC0 \xC1 ]|[\xF0 -\xFF ])[\x80 -\xBF ]*
295
295
| [\xC2 -\xDF ]((?![\x80 -\xBF ])|[\x80 -\xBF ]{2,})
296
296
| [\xE0 -\xEF ](([\x80 -\xBF ](?![\x80 -\xBF ]))
297
- | (?![\x80 -\xBF ]{2})|[\x80 -\xBF ]{3,})/x , '?' )
298
- simple_bytes . gsub ( /\xE0 [\x80 -\x9F ][\x80 -\xBF ]
297
+ | (?![\x80 -\xBF ]{2})|[\x80 -\xBF ]{3,})/x , '?' ) \
298
+ . gsub ( /\xE0 [\x80 -\x9F ][\x80 -\xBF ]
299
299
| \xED [\xA0 -\xBF ][\x80 -\xBF ]/ , '?' )
300
300
end
301
-
301
+
302
302
end
303
303
end
0 commit comments