Skip to content

Commit 8df1d9e

Browse files
committed
Fixing failing "test_xml_error"
"strip_invalid_utf_8_chars(xml)" didn't handle being passed "nil".
1 parent 1237077 commit 8df1d9e

File tree

1 file changed

+47
-47
lines changed

1 file changed

+47
-47
lines changed

lib/oai/client.rb

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
require 'oai/client/metadata_format'
1717
require 'oai/client/response'
1818
require 'oai/client/header'
19-
require 'oai/client/record'
19+
require 'oai/client/record'
2020
require 'oai/client/identify'
2121
require 'oai/client/get_record'
2222
require 'oai/client/list_identifiers'
@@ -27,7 +27,7 @@
2727
module OAI
2828

2929
# A OAI::Client provides a client api for issuing OAI-PMH verbs against
30-
# a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
30+
# a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
3131
# can call on a OAI::Client object. Verb arguments are passed as a hash:
3232
#
3333
# client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -36,9 +36,9 @@ module OAI
3636
# puts identifier
3737
# end
3838
#
39-
# It is worth noting that the api uses methods and parameter names with
40-
# underscores in them rather than studly caps. So above list_identifiers
41-
# and metadata_prefix are used instead of the listIdentifiers and
39+
# It is worth noting that the api uses methods and parameter names with
40+
# underscores in them rather than studly caps. So above list_identifiers
41+
# and metadata_prefix are used instead of the listIdentifiers and
4242
# metadataPrefix used in the OAI-PMH specification.
4343
#
4444
# Also, the from and until arguments which specify dates should be passed
@@ -49,10 +49,10 @@ module OAI
4949
# the OAI-PMH docs at:
5050
#
5151
# http://www.openarchives.org/OAI/openarchivesprotocol.html
52-
52+
5353
class Client
5454

55-
# The constructor which must be passed a valid base url for an oai
55+
# The constructor which must be passed a valid base url for an oai
5656
# service:
5757
#
5858
# client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -67,15 +67,15 @@ class Client
6767
# back XML::Node objects
6868
#
6969
# client = OAI::Client.new 'http://example.com', :parser => 'libxml'
70-
#
71-
# You can configure the Faraday HTTP client by providing an alternate
70+
#
71+
# You can configure the Faraday HTTP client by providing an alternate
7272
# Faraday instance:
7373
#
7474
# client = OAI::Client.new 'http://example.com', :http => Faraday.new { |c| }
7575
#
7676
# === HIGH PERFORMANCE
7777
#
78-
# If you want to supercharge this api install libxml-ruby >= 0.3.8 and
78+
# If you want to supercharge this api install libxml-ruby >= 0.3.8 and
7979
# use the :parser option when you construct your OAI::Client.
8080
#
8181
def initialize(base_url, options={})
@@ -94,7 +94,7 @@ def initialize(base_url, options={})
9494
require 'faraday_middleware'
9595
@http_client.use FaradayMiddleware::FollowRedirects, :limit => count
9696
end
97-
97+
9898
# load appropriate parser
9999
case @parser
100100
when 'libxml'
@@ -113,33 +113,33 @@ def initialize(base_url, options={})
113113
end
114114

115115
# Equivalent to a Identify request. You'll get back a OAI::IdentifyResponse
116-
# object which is essentially just a wrapper around a REXML::Document
117-
# for the response. If you created your client using the libxml
116+
# object which is essentially just a wrapper around a REXML::Document
117+
# for the response. If you created your client using the libxml
118118
# parser then you will get an XML::Node object instead.
119-
119+
120120
def identify
121121
return OAI::IdentifyResponse.new(do_request('Identify'))
122122
end
123123

124124
# Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
125-
# object is returned to you.
126-
125+
# object is returned to you.
126+
127127
def list_metadata_formats(opts={})
128128
return OAI::ListMetadataFormatsResponse.new(do_request('ListMetadataFormats', opts))
129129
end
130130

131131
# Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
132-
# as Date or DateTime objects as appropriate depending on the granularity
132+
# as Date or DateTime objects as appropriate depending on the granularity
133133
# supported by the server.
134-
134+
135135
def list_identifiers(opts={})
136-
return OAI::ListIdentifiersResponse.new(do_request('ListIdentifiers', opts))
136+
return OAI::ListIdentifiersResponse.new(do_request('ListIdentifiers', opts))
137137
end
138138

139-
# Equivalent to a GetRecord request. You must supply an identifier
139+
# Equivalent to a GetRecord request. You must supply an identifier
140140
# argument. You should get back a OAI::GetRecordResponse object
141141
# which you can extract a OAI::Record object from.
142-
142+
143143
def get_record(opts={})
144144
return OAI::GetRecordResponse.new(do_request('GetRecord', opts))
145145
end
@@ -150,47 +150,47 @@ def get_record(opts={})
150150
# for record in client.list_records
151151
# puts record.metadata
152152
# end
153-
153+
154154
def list_records(opts={})
155155
return OAI::ListRecordsResponse.new(do_request('ListRecords', opts))
156156
end
157157

158158
# Equivalent to the ListSets request. A ListSetsResponse object
159-
# will be returned which you can use for iterating through the
159+
# will be returned which you can use for iterating through the
160160
# OAI::Set objects
161161
#
162162
# for set in client.list_sets
163163
# puts set
164164
# end
165-
165+
166166
def list_sets(opts={})
167167
return OAI::ListSetsResponse.new(do_request('ListSets', opts))
168168
end
169169

170-
private
170+
private
171171

172172
def do_request(verb, opts = nil)
173173
# fire off the request and return appropriate DOM object
174174
uri = build_uri(verb, opts)
175175
xml = strip_invalid_utf_8_chars(get(uri))
176-
if @parser == 'libxml'
176+
if @parser == 'libxml'
177177
# remove default namespace for oai-pmh since libxml
178-
# isn't able to use our xpaths to get at them
178+
# isn't able to use our xpaths to get at them
179179
# if you know a way around thins please let me know
180180
xml = xml.gsub(
181-
/xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
181+
/xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
182182
end
183183
return load_document(xml)
184184
end
185-
185+
186186
def build_uri(verb, opts)
187187
opts = validate_options(verb, opts)
188188
uri = @base.clone
189189
uri.query = "verb=" << verb
190190
opts.each_pair { |k,v| uri.query << '&' << externalize(k) << '=' << encode(v) }
191191
uri
192192
end
193-
193+
194194
def encode(value)
195195
return CGI.escape(value) unless value.respond_to?(:strftime)
196196
if value.kind_of?(DateTime)
@@ -229,28 +229,28 @@ def get(uri)
229229
def debug(msg)
230230
$stderr.print("#{msg}\n") if @debug
231231
end
232-
232+
233233
# Massage the standard OAI options to make them a bit more palatable.
234234
def validate_options(verb, opts = {})
235235
raise OAI::VerbException.new unless Const::VERBS.keys.include?(verb)
236236

237237
return {} if opts.nil?
238238

239239
raise OAI::ArgumentException.new unless opts.respond_to?(:keys)
240-
240+
241241
realopts = {}
242242
# Internalize the hash
243243
opts.keys.each do |key|
244244
realopts[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = opts.delete(key)
245245
end
246-
246+
247247
return realopts if is_resumption?(realopts)
248-
248+
249249
# add in a default metadataPrefix if none exists
250250
if(Const::VERBS[verb].include?(:metadata_prefix))
251251
realopts[:metadata_prefix] ||= 'oai_dc'
252252
end
253-
253+
254254
# Convert date formated strings in dates.
255255
#realopts[:from] = parse_date(realopts[:from]) if realopts[:from]
256256
#realopts[:until] = parse_date(realopts[:until]) if realopts[:until]
@@ -261,43 +261,43 @@ def validate_options(verb, opts = {})
261261
end
262262
realopts
263263
end
264-
264+
265265
def is_resumption?(opts)
266-
if opts.keys.include?(:resumption_token)
266+
if opts.keys.include?(:resumption_token)
267267
return true if 1 == opts.keys.size
268268
raise OAI::ArgumentException.new
269269
end
270270
end
271-
271+
272272
# Convert our internal representations back into standard OAI options
273273
def externalize(value)
274274
value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
275275
end
276-
276+
277277
def parse_date(value)
278278
return value if value.respond_to?(:strftime)
279-
279+
280280
Date.parse(value) # This will raise an exception for badly formatted dates
281281
Time.parse(value).utc # Sadly, this will not
282282
rescue
283-
raise OAI::ArgumentError.new
283+
raise OAI::ArgumentError.new
284284
end
285-
285+
286286
# Strip out invalid UTF-8 characters. Regex from the W3C, inverted.
287287
# http://www.w3.org/International/questions/qa-forms-utf-8.en.php
288288
#
289-
# Regex is from WebCollab:
289+
# Regex is from WebCollab:
290290
# http://webcollab.sourceforge.net/unicode.html
291291
def strip_invalid_utf_8_chars(xml)
292-
simple_bytes = xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
292+
xml && xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
293293
| [\x00-\x7F][\x80-\xBF]+
294294
| ([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*
295295
| [\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})
296296
| [\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))
297-
| (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')
298-
simple_bytes.gsub(/\xE0[\x80-\x9F][\x80-\xBF]
297+
| (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')\
298+
.gsub(/\xE0[\x80-\x9F][\x80-\xBF]
299299
| \xED[\xA0-\xBF][\x80-\xBF]/,'?')
300300
end
301-
301+
302302
end
303303
end

0 commit comments

Comments
 (0)