Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions .env.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
TIMDEX_HOST=FAKE_TIMDEX_HOST
TIMDEX_GRAPHQL=https://FAKE_TIMDEX_HOST/graphql
TIMDEX_INDEX=FAKE_TIMDEX_INDEX
ALMA_OPENURL=https://na06.alma.exlibrisgroup.com/view/uresolver/01MIT_INST/openurl?
GDT=false
MIT_PRIMO_URL=https://mit.primo.exlibrisgroup.com
PRIMO_API_KEY=FAKE_PRIMO_API_KEY
PRIMO_API_URL=https://api-na.hosted.exlibrisgroup.com/primo/v1
PRIMO_SCOPE=cdi
PRIMO_TAB=all
PRIMO_VID=01MIT_INST:MIT
SYNDETICS_PRIMO_URL=https://syndetics.com/index.php?client=primo
TIMDEX_GRAPHQL=https://FAKE_TIMDEX_HOST/graphql
TIMDEX_HOST=FAKE_TIMDEX_HOST
TIMDEX_INDEX=FAKE_TIMDEX_INDEX
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ See `Optional Environment Variables` for more information.

### Required Environment Variables

- `ALMA_OPENURL`: The base URL for Alma openurls found in CDI records.
- `MIT_PRIMO_URL`: The base URL for MIT Libraries' Primo instance (used to generate record links).
- `PRIMO_API_KEY`: The Primo Search API key.
- `PRIMO_API_URL`: The Primo Search API base URL.
- `PRIMO_SCOPE`: The Primo Search API `scope` param (set to `cdi` for CDI-scoped results).
- `PRIMO_TAB`: The Primo Search API `tab` param (typically `all`).
- `PRIMO_VID`: The Primo Search API `vid` (or 'view ID`) param.
- `SYNDETICS_PRIMO_URL`: The Syndetics API URL for Primo. This is used to construct thumbnail URLs.
- `TIMDEX_GRAPHQL`: Set this to the URL of the GraphQL endpoint. There is no default value in the application.

### Optional Environment Variables
Expand Down Expand Up @@ -121,6 +129,7 @@ may have unexpected consequences if applied to other TIMDEX UI apps.
- `GLOBAL_ALERT`: The main functionality for this comes from our theme gem, but when set the value will be rendered as
safe html above the main header of the site.
- `PLATFORM_NAME`: The value set is added to the header after the MIT Libraries logo. The logic and CSS for this comes from our theme gem.
- `PRIMO_TIMEOUT`: The number of seconds before a Primo request times out (default 6).
- `REQUESTS_PER_PERIOD` - number of requests that can be made for general throttles per `REQUEST_PERIOD`
- `REQUEST_PERIOD` - time in minutes used along with `REQUESTS_PER_PERIOD`
- `REDIRECT_REQUESTS_PER_PERIOD`- number of requests that can be made that the query string starts with our legacy redirect parameter to throttle per `REQUEST_PERIOD`
Expand Down
320 changes: 320 additions & 0 deletions app/models/normalize_primo_record.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
# Transforms a PNX doc from Primo Search API into a normalized record.
class NormalizePrimoRecord
def initialize(record, query)
@record = record
@query = query
end

def normalize
{
'title' => title,
'creators' => creators,
'source' => source,
'year' => year,
'format' => format,
'links' => links,
'citation' => citation,
'container' => container_title,
'identifier' => record_id,
'summary' => summary,
'numbering' => numbering,
'chapter_numbering' => chapter_numbering,
'thumbnail' => thumbnail,
'publisher' => publisher,
'location' => best_location,
'subjects' => subjects,
'availability' => best_availability,
'other_availability' => other_availability?
}
end

private

def title
if @record['pnx']['display']['title'].present?
@record['pnx']['display']['title'].join
else
'unknown title'
end
end

def creators
return [] unless @record['pnx']['display']['creator'] || @record['pnx']['display']['contributor']

author_list = []

if @record['pnx']['display']['creator']
creators = sanitize_authors(@record['pnx']['display']['creator'])
creators.each do |creator|
author_list << { value: creator, link: author_link(creator) }
end
end

if @record['pnx']['display']['contributor']
contributors = sanitize_authors(@record['pnx']['display']['contributor'])
contributors.each do |contributor|
author_list << { value: contributor, link: author_link(contributor) }
end
end

author_list.uniq
end

def source
'Primo'
end

def year
if @record['pnx']['display']['creationdate'].present?
@record['pnx']['display']['creationdate'].join
else
return unless @record['pnx']['search'] && @record['pnx']['search']['creationdate']

@record['pnx']['search']['creationdate'].join
end
end

def format
return unless @record['pnx']['display']['type']

normalize_type(@record['pnx']['display']['type'].join)
end

# While the links object in the Primo response often contains more than the Alma openurl, that is
# the one that is most predictably useful to us. The record_link is constructed.
def links
links = []

# Use dedup URL as the full record link if available, otherwise use record link
if dedup_url.present?
links << { 'url' => dedup_url, 'kind' => 'full record' }
elsif record_link.present?
links << { 'url' => record_link, 'kind' => 'full record' }
end

# Add openurl if available
links << { 'url' => openurl, 'kind' => 'openurl' } if openurl.present?

# Return links if we found any
links.any? ? links : []
end

def citation
return unless @record['pnx']['addata']

if @record['pnx']['addata']['volume'].present?
if @record['pnx']['addata']['issue'].present?
"volume #{@record['pnx']['addata']['volume'].join} issue #{@record['pnx']['addata']['issue'].join}"
else
"volume #{@record['pnx']['addata']['volume'].join}"
end
elsif @record['pnx']['addata']['date'].present? && @record['pnx']['addata']['pages'].present?
"#{@record['pnx']['addata']['date'].join}, pp. #{@record['pnx']['addata']['pages'].join}"
end
end

def container_title
return unless @record['pnx']['addata']

if @record['pnx']['addata']['jtitle'].present?
@record['pnx']['addata']['jtitle'].join
elsif @record['pnx']['addata']['btitle'].present?
@record['pnx']['addata']['btitle'].join
end
end

def record_id
return unless @record['pnx']['control']['recordid']

@record['pnx']['control']['recordid'].join
end

def summary
return unless @record['pnx']['display']['description']

@record['pnx']['display']['description'].join(' ')
end

# This constructs a link to the record in Primo.
#
# We've altered this method slightly to address bugs introduced in the Primo VE November 2021
# release. The search_scope param is now required for CDI fulldisplay links, and the context param
# is now required for local (catalog) fulldisplay links.
#
# In order to avoid more surprises, we're adding all of the params included in the fulldisplay
# example links provided here, even though not all of them are actually required at present:
# https://developers.exlibrisgroup.com/primo/apis/deep-links-new-ui/
#
# We should keep an eye on this over subsequent Primo reeleases and revert it to something more
# minimalist/sensible when Ex Libris fixes this issue.
def record_link
return unless @record['pnx']['control']['recordid']
return unless @record['context']

record_id = @record['pnx']['control']['recordid'].join
base = [ENV.fetch('MIT_PRIMO_URL'), '/discovery/fulldisplay?'].join
query = {
docid: record_id,
vid: ENV.fetch('PRIMO_VID'),
context: @record['context'],
search_scope: 'all',
lang: 'en',
tab: ENV.fetch('PRIMO_TAB')
}.to_query
[base, query].join
end

def numbering
return unless @record['pnx']['addata']
return unless @record['pnx']['addata']['volume']

if @record['pnx']['addata']['issue'].present?
"volume #{@record['pnx']['addata']['volume'].join} issue #{@record['pnx']['addata']['issue'].join}"
else
"volume #{@record['pnx']['addata']['volume'].join}"
end
end

def chapter_numbering
return unless @record['pnx']['addata']
return unless @record['pnx']['addata']['btitle']
return unless @record['pnx']['addata']['date'] && @record['pnx']['addata']['pages']

"#{@record['pnx']['addata']['date'].join}, pp. #{@record['pnx']['addata']['pages'].join}"
end

def sanitize_authors(authors)
authors.map! { |author| author.split(';') }.flatten! if authors.any? { |author| author.include?(';') }
authors.map { |author| author.strip.gsub(/\$\$Q.*$/, '') }
end

def author_link(author)
[ENV.fetch('MIT_PRIMO_URL'),
'/discovery/search?query=creator,exact,',
encode_author(author),
'&tab=', ENV.fetch('PRIMO_TAB'),
'&search_scope=all&vid=',
ENV.fetch('PRIMO_VID')].join
end

def encode_author(author)
URI.encode_uri_component(author)
end

def normalize_type(type)
r_types = {
'BKSE' => 'eBook',
'reference_entry' => 'Reference Entry',
'Book_chapter' => 'Book Chapter'
}
r_types[type] || type.capitalize
end

# It's possible we'll encounter records that use a different server,
# so we want to test against our expected server to guard against
# malformed URLs. This assumes all URL strings begin with https://.
def openurl
return unless @record['delivery'] && @record['delivery']['almaOpenurl']

# Check server match
openurl_server = ENV.fetch('ALMA_OPENURL', nil)[8, 4]
record_openurl_server = @record['delivery']['almaOpenurl'][8, 4]
if openurl_server == record_openurl_server
construct_primo_openurl
else
Rails.logger.warn "Alma openurl server mismatch. Expected #{openurl_server}, but received #{record_openurl_server}. (record ID: #{record_id})"
@record['delivery']['almaOpenurl']
end
end

def construct_primo_openurl
return unless @record['delivery']['almaOpenurl']

# Here we are converting the Alma link resolver URL provided by the Primo
# Search API to redirect to the Primo UI. This is done for UX purposes,
# as the regular Alma link resolver URLs redirect to a plaintext
# disambiguation page.
primo_openurl_base = [ENV.fetch('MIT_PRIMO_URL', nil),
'/discovery/openurl?institution=',
ENV.fetch('EXL_INST_ID', nil),
'&vid=',
ENV.fetch('PRIMO_VID', nil),
'&'].join
primo_openurl = @record['delivery']['almaOpenurl'].gsub(ENV.fetch('ALMA_OPENURL', nil), primo_openurl_base)

# The ctx params appear to break Primo openurls, so we need to remove them.
params = Rack::Utils.parse_nested_query(primo_openurl)
filtered = params.delete_if { |key, _value| key.starts_with?('ctx') }
URI::DEFAULT_PARSER.unescape(filtered.to_param)
end

def thumbnail
return unless @record['pnx']['addata'] && @record['pnx']['addata']['isbn']

# A record can have multiple ISBNs, so we are assuming here that
# the thumbnail URL can be constructed from the first occurrence
isbn = @record['pnx']['addata']['isbn'].first
[ENV.fetch('SYNDETICS_PRIMO_URL', nil), '&isbn=', isbn, '/sc.jpg'].join
end

def publisher
return unless @record['pnx']['addata'] && @record['pnx']['addata']['pub']

@record['pnx']['addata']['pub'].first
end

def best_location
return unless @record['delivery']
return unless @record['delivery']['bestlocation']

loc = @record['delivery']['bestlocation']
["#{loc['mainLocation']} #{loc['subLocation']}", loc['callNumber']]
end

def subjects
return [] unless @record['pnx']['display']['subject']

@record['pnx']['display']['subject']
end

def best_availability
return unless best_location

@record['delivery']['bestlocation']['availabilityStatus']
end

def other_availability?
return unless @record['delivery']['bestlocation']
return unless @record['delivery']['holding']

@record['delivery']['holding'].length > 1
end

# FRBR Group check based on:
# https://knowledge.exlibrisgroup.com/Primo/Knowledge_Articles/Primo_Search_API_-_how_to_get_FRBR_Group_members_after_a_search
def frbrized?
return unless @record['pnx']['facets']
return unless @record['pnx']['facets']['frbrtype']

@record['pnx']['facets']['frbrtype'].join == '5'
end

def dedup_url
return unless frbrized?
return unless @record['pnx']['facets']['frbrgroupid'] &&
@record['pnx']['facets']['frbrgroupid'].length == 1

frbr_group_id = @record['pnx']['facets']['frbrgroupid'].join
base = [ENV.fetch('MIT_PRIMO_URL', nil), '/discovery/search?'].join

query = {
query: "any,contains,#{@query}",
tab: ENV.fetch('PRIMO_TAB', nil),
search_scope: ENV.fetch('PRIMO_SCOPE', nil),
sortby: 'date_d',
vid: ENV.fetch('PRIMO_VID', nil),
facet: "frbrgroupid,include,#{frbr_group_id}"
}.to_query
[base, query].join
end
end
19 changes: 19 additions & 0 deletions app/models/normalize_primo_results.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Batch normalization for Primo Search API results
class NormalizePrimoResults
def initialize(results, query)
@results = results
@query = query
end

def normalize
return [] unless @results&.dig('docs')

@results['docs'].filter_map do |doc|
NormalizePrimoRecord.new(doc, @query).normalize
end
end

def total_results
@results&.dig('info', 'total') || 0
end
end
Loading