Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ may have unexpected consequences if applied to other TIMDEX UI apps.
- `REQUEST_PERIOD` - time in minutes used along with `REQUESTS_PER_PERIOD`
- `REDIRECT_REQUESTS_PER_PERIOD`- number of requests that can be made that the query string starts with our legacy redirect parameter to throttle per `REQUEST_PERIOD`
- `REDIRECT_REQUEST_PERIOD`- time in minutes used along with `REDIRECT_REQUEST_PERIOD`
- `RESULTS_PER_PAGE`: The number of results to display per page. Use an even number to avoid peculiarities. Defaults to 20 if unset.
- `SENTRY_DSN`: Client key for Sentry exception logging.
- `SENTRY_ENV`: Sentry environment for the application. Defaults to 'unknown' if unset.
- `TACOS_SOURCE`: If set, this value is sent to TACOS (as the `sourceSystem` value) to distinguish which application
Expand Down
92 changes: 59 additions & 33 deletions app/controllers/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def load_geodata_results
@errors = extract_errors(response)
return unless @errors.nil?

@pagination = Analyzer.new(@enhanced_query, response, :timdex).pagination
hits = response.dig(:data, 'search', 'hits') || 0
@pagination = Analyzer.new(@enhanced_query, hits, :timdex).pagination
raw_results = extract_results(response)
@results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize
@filters = extract_filters(response)
Expand All @@ -87,43 +88,58 @@ def load_timdex_results
end

def load_all_results
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like the smaller methods here. It's much easier to read with well named methods.

# Parallel fetching from both APIs
primo_thread = Thread.new { fetch_primo_data }
timdex_thread = Thread.new { fetch_timdex_data }
# Fetch results from both APIs in parallel
primo_data, timdex_data = fetch_all_data

# Wait for both threads to complete
primo_data = primo_thread.value
timdex_data = timdex_thread.value

# Collect any errors from either API
all_errors = []
all_errors.concat(primo_data[:errors]) if primo_data[:errors]
all_errors.concat(timdex_data[:errors]) if timdex_data[:errors]
@errors = all_errors.any? ? all_errors : nil
# Combine errors from both APIs
@errors = combine_errors(primo_data[:errors], timdex_data[:errors])

# Zipper merge results from both APIs
primo_results = primo_data[:results] || []
timdex_results = timdex_data[:results] || []
@results = primo_results.zip(timdex_results).flatten.compact
@results = merge_results(primo_data[:results], timdex_data[:results])

# For now, just use primo pagination as a placeholder
@pagination = primo_data[:pagination] || {}
# Use Analyzer for combined pagination calculation
@pagination = Analyzer.new(@enhanced_query, timdex_data[:hits], :all,
primo_data[:hits]).pagination

# Handle primo continuation for high page numbers
@show_primo_continuation = primo_data[:show_continuation] || false
end

def fetch_all_data
# Parallel fetching from both APIs
primo_thread = Thread.new { fetch_primo_data }
timdex_thread = Thread.new { fetch_timdex_data }

[primo_thread.value, timdex_thread.value]
end

def combine_errors(*error_arrays)
all_errors = error_arrays.compact.flatten
all_errors.any? ? all_errors : nil
end

def merge_results(primo_results, timdex_results)
(primo_results || []).zip(timdex_results || []).flatten.compact
end

def fetch_primo_data
current_page = @enhanced_query[:page] || 1
per_page = @enhanced_query[:per_page] || 20
per_page = if @active_tab == 'all'
ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
else
ENV.fetch('RESULTS_PER_PAGE', '20').to_i
end
offset = (current_page - 1) * per_page

# Check if we're beyond Primo API limits before making the request.
return { results: [], pagination: {}, errors: nil, show_continuation: true } if offset >= Analyzer::PRIMO_MAX_OFFSET
if offset >= Analyzer::PRIMO_MAX_OFFSET
return { results: [], pagination: {}, errors: nil, show_continuation: true, hits: 0 }
end

primo_response = query_primo
primo_response = query_primo(per_page, offset)
hits = primo_response.dig('info', 'total') || 0
results = NormalizePrimoResults.new(primo_response, @enhanced_query[:q]).normalize
pagination = Analyzer.new(@enhanced_query, primo_response, :primo).pagination
pagination = Analyzer.new(@enhanced_query, hits , :primo).pagination

# Handle empty results from Primo API. Sometimes Primo will return no results at a given offset,
# despite claiming in the initial query that more are available. This happens randomly and
Expand All @@ -142,23 +158,37 @@ def fetch_primo_data
end
end

{ results: results, pagination: pagination, errors: errors, show_continuation: show_continuation }
{ results: results, pagination: pagination, errors: errors, show_continuation: show_continuation,
hits: hits }
rescue StandardError => e
{ results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false }
{ results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false, hits: 0 }
end

def fetch_timdex_data
query = QueryBuilder.new(@enhanced_query).query
# For all tab, modify query to use half page size
if @active_tab == 'all'
per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
page = @enhanced_query[:page] || 1
from_offset = ((page - 1) * per_page).to_s

query_builder = QueryBuilder.new(@enhanced_query)
query = query_builder.query
query['from'] = from_offset
else
query = QueryBuilder.new(@enhanced_query).query
end

response = query_timdex(query)
errors = extract_errors(response)

if errors.nil?
pagination = Analyzer.new(@enhanced_query, response, :timdex).pagination
hits = response.dig(:data, 'search', 'hits') || 0
pagination = Analyzer.new(@enhanced_query, hits, :timdex).pagination
raw_results = extract_results(response)
results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize
{ results: results, pagination: pagination, errors: nil }
{ results: results, pagination: pagination, errors: nil, hits: hits }
else
{ results: [], pagination: {}, errors: errors }
{ results: [], pagination: {}, errors: errors, hits: 0 }
end
end

Expand Down Expand Up @@ -191,16 +221,12 @@ def query_timdex(query)
end
end

def query_primo
def query_primo(per_page, offset)
# We generate unique cache keys to avoid naming collisions.
cache_key = generate_cache_key(@enhanced_query)

Rails.cache.fetch("#{cache_key}/primo", expires_in: 12.hours) do
primo_search = PrimoSearch.new
per_page = @enhanced_query[:per_page] || 20
current_page = @enhanced_query[:page] || 1
offset = (current_page - 1) * per_page

primo_search.search(@enhanced_query[:q], per_page, offset)
end
end
Expand Down
66 changes: 35 additions & 31 deletions app/models/analyzer.rb
Original file line number Diff line number Diff line change
@@ -1,54 +1,58 @@
class Analyzer
attr_accessor :pagination

RESULTS_PER_PAGE = 20

# Primo API theoretical maximum recommended offset is 2000 records (per Ex Libris documentation)
# but in practice, the API often can't deliver results beyond ~960 records for large result sets,
# likely due to performance constraints.
PRIMO_MAX_OFFSET = 960

def initialize(enhanced_query, response, source)
# Initializes pagination analysis for search results.
#
# @param enhanced_query [Hash] Query parameters including :page (current page number)
# @param hits [Integer] Number of hits from primary source (TIMDEX for :all, source-specific otherwise)
# @param source [Symbol] Source tab (:primo, :timdex, or :all)
# @param secondary_hits [Integer, nil] Optional hit count from secondary source (Primo hits for :all)
def initialize(enhanced_query, hits, source, secondary_hits = nil)
@source = source
@enhanced_query = enhanced_query
@pagination = {}
@pagination[:hits] = hits(response)
@pagination[:start] = ((enhanced_query[:page] - 1) * RESULTS_PER_PAGE) + 1
@pagination[:end] = [enhanced_query[:page] * RESULTS_PER_PAGE, @pagination[:hits]].min
@pagination[:prev] = enhanced_query[:page] - 1 if enhanced_query[:page] > 1
@pagination[:next] = next_page(enhanced_query[:page], @pagination[:hits]) if next_page(
enhanced_query[:page], @pagination[:hits]
)
@pagination[:per_page] = RESULTS_PER_PAGE
set_pagination(hits, secondary_hits)
end

private

def hits(response)
return 0 if response.nil?

if @source == :primo
primo_hits(response)
elsif @source == :timdex
timdex_hits(response)
# Sets the pagination hash with hit counts and per_page values.
#
# @param hits [Integer] Hit count from primary source
# @param secondary_hits [Integer, nil] Optional hit count from secondary source
def set_pagination(hits, secondary_hits = nil)
if @source == :all
@pagination[:hits] = (secondary_hits || 0) + (hits || 0)
@pagination[:per_page] = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
calculate_pagination_values
else
0
@pagination[:hits] = hits || 0
@pagination[:per_page] = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No change requested at this time, but we should consider a future refactor to set this once at Rails startup rather than the constant Env.fetch logic. I doubt it would change anything performance-wise, but it would be a lot cleaner code to stop repeating this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean like setting it as a global constant? That would be a big improvement, especially if we ever want to change the default number of results.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking more Rails custom config or something similar:
https://guides.rubyonrails.org/configuring.html#custom-configuration

So maybe we'd end up with Rails.configuration.timdexui.per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i which we could then access throughout the app.

I'm not sure we've done that in any of our apps, but doing that or setting a global constant in an initializer might be worth exploring in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, interesting. I haven't used custom config before, but it looks useful!

calculate_pagination_values
end
end

def primo_hits(response)
return 0 unless response.is_a?(Hash)

response.dig('info', 'total') || 0
end

def timdex_hits(response)
return 0 unless response.is_a?(Hash) && response.key?(:data) && response[:data].key?('search')
return 0 unless response[:data]['search'].is_a?(Hash) && response[:data]['search'].key?('hits')

response[:data]['search']['hits']
# Calculates and sets pagination navigation values (start, end, prev, next).
# Uses the already-set @pagination[:hits] and @pagination[:per_page] values.
def calculate_pagination_values
page = @enhanced_query[:page] || 1
@pagination[:start] = ((page - 1) * @pagination[:per_page]) + 1
@pagination[:end] = [page * @pagination[:per_page], @pagination[:hits]].min
@pagination[:prev] = page - 1 if page > 1
@pagination[:next] = next_page(page, @pagination[:hits]) if next_page(page, @pagination[:hits])
end

# Calculates the next page number if more results are available.
#
# @param page [Integer] Current page number
# @param hits [Integer] Total number of results available
# @return [Integer, nil] Next page number or nil if no more pages
def next_page(page, hits)
page + 1 if page * RESULTS_PER_PAGE < hits
page + 1 if page * @pagination[:per_page] < hits
end
end
8 changes: 4 additions & 4 deletions app/models/query_builder.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
class QueryBuilder
attr_reader :query

RESULTS_PER_PAGE = 20
QUERY_PARAMS = %w[q citation contributors fundingInformation identifiers locations subjects title booleanType].freeze
FILTER_PARAMS = %i[accessToFilesFilter contentTypeFilter contributorsFilter formatFilter languagesFilter
literaryFormFilter placesFilter sourceFilter subjectsFilter].freeze
Expand All @@ -10,7 +9,8 @@ class QueryBuilder

def initialize(enhanced_query)
@query = {}
@query['from'] = calculate_from(enhanced_query[:page])
@per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
@query['from'] = calculate_from(enhanced_query[:page], @per_page)

if Feature.enabled?(:geodata)
@query['geobox'] = 'true' if enhanced_query[:geobox] == 'true'
Expand All @@ -27,10 +27,10 @@ def initialize(enhanced_query)

private

def calculate_from(page = 1)
def calculate_from(page = 1, per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i)
# This needs to return a string because Timdex needs $from to be a String
page = 1 if page.to_i.zero?
((page - 1) * RESULTS_PER_PAGE).to_s
((page - 1) * per_page).to_s
end

def extract_query(enhanced_query)
Expand Down
Loading