Skip to content

Commit 16efed8

Browse files
committed
Add an OAI set for seamless harvesting from an organization.
1 parent 372cd3b commit 16efed8

File tree

4 files changed

+83
-32
lines changed

4 files changed

+83
-32
lines changed

app/controllers/concerns/oai_concern.rb

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,25 @@ def date_range
104104
# rubocop:disable Metrics/AbcSize
105105
# valid iff all values can be parsed and set/page are nonnegative integers
106106
def valid?
107-
Integer(set) if @set.present?
107+
assert_set_valid!
108+
108109
Integer(page) if @page.present?
109110
Date.parse(from_date) if @from_date.present?
110111
Date.parse(until_date) if @until_date.present?
111112
!set.to_i.negative? && !page.to_i.negative?
112-
rescue ArgumentError
113+
rescue ArgumentError, ActiveRecord::RecordNotFound
113114
false
114115
end
116+
117+
def assert_set_valid!
118+
return if @set.blank?
119+
120+
if @set.start_with?('organization/')
121+
Organization.find(@set.sub('organization/', ''))
122+
else
123+
Integer(set)
124+
end
125+
end
115126
# rubocop:enable Metrics/AbcSize
116127
end
117128

app/controllers/oai_controller.rb

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,16 @@ def error_params
110110
end
111111

112112
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
113-
def next_record_page(token, use_interstream_deltas: false)
114-
streams = if token.set.present?
113+
def next_record_page(token, use_interstream_deltas: nil)
114+
streams = if token.set&.start_with?('organization/')
115+
org_slug = token.set.split('/').last
116+
stream = Organization.find_by!(slug: org_slug).default_stream
117+
use_interstream_deltas = true if use_interstream_deltas.nil?
118+
119+
authorize! :read, stream
120+
121+
[stream]
122+
elsif token.set.present?
115123
Stream.accessible_by(current_ability).where(id: token.set)
116124
else
117125
Stream.accessible_by(current_ability).where(status: 'default')
@@ -205,31 +213,57 @@ def build_list_records_response(page, token = nil)
205213
end
206214

207215
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets
208-
# rubocop:disable Metrics/AbcSize
209-
# rubocop:disable Metrics/MethodLength
210216
def build_list_sets_response(streams)
211217
Nokogiri::XML::Builder.new do |xml|
212218
build_oai_response xml, list_sets_params do
213219
xml.ListSets do
214-
streams.each do |stream|
215-
xml.set do
216-
xml.setSpec stream.id
217-
xml.setName stream.display_name
218-
xml.setDescription do
219-
xml[:oai_dc].dc(oai_dc_xmlns) do
220-
xml[:dc].description oai_dc_description(stream)
221-
xml[:dc].contributor stream.organization.slug
222-
xml[:dc].type oai_dc_type(stream)
223-
oai_dc_dates(stream).each do |date|
224-
xml[:dc].date date
225-
end
226-
end
227-
end
220+
build_organizations_list_sets_response(xml, streams.map(&:organization).uniq)
221+
build_streams_list_sets_response(xml, streams)
222+
end
223+
end
224+
end.to_xml
225+
end
226+
227+
# rubocop:disable Metrics/AbcSize
228+
# rubocop:disable Metrics/MethodLength
229+
def build_streams_list_sets_response(xml, streams)
230+
streams.each do |stream|
231+
xml.set do
232+
xml.setSpec stream.id
233+
xml.setName stream.display_name
234+
xml.setDescription do
235+
xml[:oai_dc].dc(oai_dc_xmlns) do
236+
xml[:dc].description oai_dc_description(stream)
237+
xml[:dc].contributor stream.organization.slug
238+
xml[:dc].type oai_dc_type(stream)
239+
oai_dc_dates(stream).each do |date|
240+
xml[:dc].date date
228241
end
229242
end
230243
end
231244
end
232-
end.to_xml
245+
end
246+
end
247+
248+
def build_organizations_list_sets_response(xml, organizations)
249+
organizations.each do |organization|
250+
xml.set do
251+
xml.setSpec "organization/#{organization.slug}"
252+
xml.setName organization.name
253+
xml.setDescription do
254+
xml[:oai_dc].dc(oai_dc_xmlns) do
255+
xml[:dc].description "Seamless harvesting for #{organization.name}"
256+
xml[:dc].contributor organization.slug
257+
xml[:dc].type 'organization'
258+
xml[:dc].source "stream #{organization.default_stream.id}"
259+
260+
oai_dc_dates(organization.default_stream).each do |date|
261+
xml[:dc].date date
262+
end
263+
end
264+
end
265+
end
266+
end
233267
end
234268
# rubocop:enable Metrics/AbcSize
235269
# rubocop:enable Metrics/MethodLength

app/services/oai_marc_record_writer_service.rb

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,17 @@ def initialize(base_name = nil)
99
end
1010

1111
def write_marc_record(record, dump_created_at)
12-
oai_writer.write(record.augmented_marc, oai_id(record), record.stream.id, dump_created_at)
12+
oai_writer.write(record.augmented_marc, oai_id(record), sets: ["organization/#{record.organization.slug}", record.stream.id],
13+
datestamp: dump_created_at)
1314
rescue StandardError => e
1415
error = "Error writing MARC OAI file #{base_name} id #{record.id}: #{e}"
1516
Rails.logger.info(error)
1617
Honeybadger.notify(error)
1718
end
1819

1920
def write_delete(record, dump_created_at)
20-
oai_writer.write_delete(oai_id(record), record.stream.id, dump_created_at)
21+
oai_writer.write_delete(oai_id(record), sets: ["organization/#{record.organization.slug}", record.stream.id],
22+
datestamp: dump_created_at)
2123
end
2224

2325
def finalize
@@ -60,13 +62,13 @@ def initialize(io)
6062
@bytes_written = 0
6163
end
6264

63-
def write(record, identifier, set, datestamp = Time.zone.now)
65+
def write(record, identifier, sets: [], datestamp: Time.zone.now)
6466
@bytes_written += @io.write <<-EOXML
6567
<record>
6668
<header>
6769
<identifier>#{identifier}</identifier>
6870
<datestamp>#{datestamp.strftime('%F')}</datestamp>
69-
<setSpec>#{set}</setSpec>
71+
#{sets.map { |s| "<setSpec>#{s}</setSpec>" }.join("\n")}
7072
</header>
7173
<metadata>
7274
#{Ox.dump(OxMarcXmlWriter.encode(record, include_namespace: true))}
@@ -75,13 +77,13 @@ def write(record, identifier, set, datestamp = Time.zone.now)
7577
EOXML
7678
end
7779

78-
def write_delete(identifier, set, datestamp = Time.zone.now)
80+
def write_delete(identifier, sets: [], datestamp: Time.zone.now)
7981
@bytes_written += @io.write <<-EOXML
8082
<record>
8183
<header status="deleted">
8284
<identifier>#{identifier}</identifier>
8385
<datestamp>#{datestamp.strftime('%F')}</datestamp>
84-
<setSpec>#{set}</setSpec>
86+
#{sets.map { |s| "<setSpec>#{s}</setSpec>" }.join("\n")}
8587
</header>
8688
</record>
8789
EOXML

spec/features/oai_spec.rb

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,14 @@
6767
it 'renders a name for each set' do
6868
visit oai_path(verb: 'ListSets')
6969
doc = Nokogiri::XML(page.body)
70-
expect(doc.at_css('ListSets > set > setName').text).to eq('2020-05-06 - ')
70+
expect(doc.at_css('ListSets > set[1] > setName').text).to eq('My Org')
71+
expect(doc.at_css('ListSets > set[2] > setName').text).to eq('2020-05-06 - ')
7172
end
7273

7374
it 'renders an identifier (setSpec) for each set' do
7475
visit oai_path(verb: 'ListSets')
7576
doc = Nokogiri::XML(page.body)
76-
expect(doc.at_css('ListSets > set > setSpec').text).to eq(organization.default_stream.id.to_s)
77+
expect(doc.at_css('ListSets > set[2] > setSpec').text).to eq(organization.default_stream.id.to_s)
7778
end
7879

7980
it 'renders a description for each set' do
@@ -184,14 +185,17 @@
184185
it 'renders the set membership of each item' do
185186
visit oai_path(verb: 'ListRecords', metadataPrefix: 'marc21')
186187
doc = Nokogiri::XML(page.body)
187-
expect(doc.at_css('ListRecords > record > header > setSpec').text).to eq(organization.default_stream.id.to_s)
188+
record_set = doc.css('ListRecords > record > header').first
189+
expect(record_set.css('setSpec').map(&:text)).to contain_exactly("organization/#{organization.slug}",
190+
organization.default_stream.id.to_s)
188191
end
189192

190193
it 'renders records in the requested set' do
191194
visit oai_path(verb: 'ListRecords', metadataPrefix: 'marc21', set: organization.default_stream.id.to_s)
192195
doc = Nokogiri::XML(page.body)
193-
doc.css('ListRecords > record > header > setSpec').each do |record_set|
194-
expect(record_set.text).to eq(organization.default_stream.id.to_s)
196+
doc.css('ListRecords > record > header') do |record_set|
197+
expect(record_set.css('setSpec').map(&:text)).to contain_exactly("organization/#{organization.slug}",
198+
organization.default_stream.id.to_s)
195199
end
196200
end
197201

0 commit comments

Comments
 (0)