Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 105 additions & 43 deletions app/controllers/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,48 +88,117 @@ def load_timdex_results
end

def load_all_results
# Fetch results from both APIs in parallel
primo_data, timdex_data = fetch_all_data
current_page = @enhanced_query[:page] || 1
per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
data = if current_page.to_i == 1
fetch_all_tab_first_page(current_page, per_page)
else
fetch_all_tab_deeper_pages(current_page, per_page)
end

# Combine errors from both APIs
@errors = combine_errors(primo_data[:errors], timdex_data[:errors])
@results = data[:results]
@errors = data[:errors]
@pagination = data[:pagination]
@show_primo_continuation = data[:show_primo_continuation]
end

# Zipper merge results from both APIs
@results = merge_results(primo_data[:results], timdex_data[:results])
def fetch_all_tab_first_page(current_page, per_page)
primo_data, timdex_data = parallel_fetch({ offset: 0, per_page: per_page }, { offset: 0, per_page: per_page })

# Use Analyzer for combined pagination calculation
@pagination = Analyzer.new(@enhanced_query, timdex_data[:hits], :all,
primo_data[:hits]).pagination
paginator = build_paginator_from_data(primo_data, timdex_data, current_page, per_page)

# Handle primo continuation for high page numbers
@show_primo_continuation = primo_data[:show_continuation] || false
assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page)
end

def fetch_all_data
# Parallel fetching from both APIs
primo_thread = Thread.new { fetch_primo_data }
timdex_thread = Thread.new { fetch_timdex_data }
def fetch_all_tab_deeper_pages(current_page, per_page)
primo_summary, timdex_summary = parallel_fetch({ offset: 0, per_page: 1 }, { offset: 0, per_page: 1 })

paginator = build_paginator_from_data(primo_summary, timdex_summary, current_page, per_page)

primo_data, timdex_data = fetch_all_tab_page_chunks(paginator)

assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page, deeper: true)
end

# Launch parallel fetch threads for Primo and Timdex and return their data
def parallel_fetch(primo_opts = {}, timdex_opts = {})
primo_thread = Thread.new { fetch_primo_data(**primo_opts) }
timdex_thread = Thread.new { fetch_timdex_data(**timdex_opts) }

[primo_thread.value, timdex_thread.value]
end

# Build a paginator from raw API response data
def build_paginator_from_data(primo_data, timdex_data, current_page, per_page)
primo_total = primo_data[:hits] || 0
timdex_total = timdex_data[:hits] || 0

MergedSearchPaginator.new(
primo_total: primo_total,
timdex_total: timdex_total,
current_page: current_page,
per_page: per_page
)
end

# For deeper pages, compute merge_plan and api_offsets, then conditionally fetch page chunks
def fetch_all_tab_page_chunks(paginator)
merge_plan = paginator.merge_plan
primo_count = merge_plan.count(:primo)
timdex_count = merge_plan.count(:timdex)
primo_offset, timdex_offset = paginator.api_offsets

primo_thread = primo_count > 0 ? Thread.new { fetch_primo_data(offset: primo_offset, per_page: primo_count) } : nil
timdex_thread = if timdex_count > 0
Thread.new do
fetch_timdex_data(offset: timdex_offset, per_page: timdex_count)
end
end

primo_data = if primo_thread
primo_thread.value
else
{ results: [], errors: nil, hits: paginator.primo_total, show_continuation: false }
end

timdex_data = if timdex_thread
timdex_thread.value
else
{ results: [], errors: nil, hits: paginator.timdex_total }
end

[primo_data, timdex_data]
end

# Assemble the final result hash from paginator and API data
def assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page, deeper: false)
primo_total = primo_data[:hits] || 0
timdex_total = timdex_data[:hits] || 0

merged = paginator.merge_results(primo_data[:results] || [], timdex_data[:results] || [])
errors = combine_errors(primo_data[:errors], timdex_data[:errors])
pagination = Analyzer.new(@enhanced_query, timdex_total, :all, primo_total).pagination

show_primo_continuation = if deeper
page_offset = (current_page - 1) * per_page
primo_data[:show_continuation] || (page_offset >= Analyzer::PRIMO_MAX_OFFSET)
else
primo_data[:show_continuation]
end

{ results: merged, errors: errors, pagination: pagination, show_primo_continuation: show_primo_continuation }
end

def combine_errors(*error_arrays)
all_errors = error_arrays.compact.flatten
all_errors.any? ? all_errors : nil
end

def merge_results(primo_results, timdex_results)
(primo_results || []).zip(timdex_results || []).flatten.compact
end

def fetch_primo_data
def fetch_primo_data(offset: nil, per_page: nil)
# Default to current page if not provided
current_page = @enhanced_query[:page] || 1
per_page = if @active_tab == 'all'
ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
else
ENV.fetch('RESULTS_PER_PAGE', '20').to_i
end
offset = (current_page - 1) * per_page
per_page ||= ENV.fetch('RESULTS_PER_PAGE', '20').to_i
offset ||= (current_page - 1) * per_page

# Check if we're beyond Primo API limits before making the request.
if offset >= Analyzer::PRIMO_MAX_OFFSET
Expand All @@ -139,7 +208,7 @@ def fetch_primo_data
primo_response = query_primo(per_page, offset)
hits = primo_response.dig('info', 'total') || 0
results = NormalizePrimoResults.new(primo_response, @enhanced_query[:q]).normalize
pagination = Analyzer.new(@enhanced_query, hits , :primo).pagination
pagination = Analyzer.new(@enhanced_query, hits, :primo).pagination

# Handle empty results from Primo API. Sometimes Primo will return no results at a given offset,
# despite claiming in the initial query that more are available. This happens randomly and
Expand All @@ -151,8 +220,9 @@ def fetch_primo_data
if results.empty?
docs = primo_response['docs'] if primo_response.is_a?(Hash)
if docs.nil? || docs.empty?
# Only show continuation for pagination scenarios (page > 1), not for searches with no results
show_continuation = true if current_page > 1
# Only show continuation for pagination scenarios (where offset is present), not for
# searches with no results
show_continuation = true if offset > 0
else
errors = [{ 'message' => 'No more results available at this page number.' }]
end
Expand All @@ -164,19 +234,10 @@ def fetch_primo_data
{ results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false, hits: 0 }
end

def fetch_timdex_data
# For all tab, modify query to use half page size
if @active_tab == 'all'
per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
page = @enhanced_query[:page] || 1
from_offset = ((page - 1) * per_page).to_s

query_builder = QueryBuilder.new(@enhanced_query)
query = query_builder.query
query['from'] = from_offset
else
query = QueryBuilder.new(@enhanced_query).query
end
def fetch_timdex_data(offset: nil, per_page: nil)
query = QueryBuilder.new(@enhanced_query).query
query['from'] = offset.to_s if offset
query['size'] = per_page.to_s if per_page

response = query_timdex(query)
errors = extract_errors(response)
Expand Down Expand Up @@ -223,7 +284,8 @@ def query_timdex(query)

def query_primo(per_page, offset)
# We generate unique cache keys to avoid naming collisions.
cache_key = generate_cache_key(@enhanced_query)
# Include per_page and offset in the cache key to ensure pagination works correctly.
cache_key = generate_cache_key(@enhanced_query.merge(per_page: per_page, offset: offset))

Rails.cache.fetch("#{cache_key}/primo", expires_in: 12.hours) do
primo_search = PrimoSearch.new
Expand Down
73 changes: 73 additions & 0 deletions app/models/merged_search_paginator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# frozen_string_literal: true

# MergedSearchPaginator encapsulates stateless merged pagination logic for combining two API result sets.
# It calculates the merge plan, API offsets, and merges the results for a given page.
class MergedSearchPaginator
attr_reader :primo_total, :timdex_total, :current_page, :per_page

def initialize(primo_total:, timdex_total:, current_page:, per_page:)
@primo_total = primo_total
@timdex_total = timdex_total
@current_page = current_page
@per_page = per_page
end

# Returns an array of :primo and :timdex symbols for the merged result order on this page
def merge_plan
total_results = primo_total + timdex_total
start_index = (current_page - 1) * per_page
end_index = [start_index + per_page, total_results].min
plan = []
primo_used = 0
timdex_used = 0
i = 0
while i < end_index
if primo_used < primo_total && (timdex_used >= timdex_total || primo_used <= timdex_used)
source = :primo
primo_used += 1
elsif timdex_used < timdex_total
source = :timdex
timdex_used += 1
end
plan << source if i >= start_index
i += 1
end
plan
end

# Returns [primo_offset, timdex_offset] for the start of this page
def api_offsets
start_index = (current_page - 1) * per_page
primo_offset = 0
timdex_offset = 0
i = 0
while i < start_index
if primo_offset < primo_total && (timdex_offset >= timdex_total || primo_offset <= timdex_offset)
primo_offset += 1
elsif timdex_offset < timdex_total
timdex_offset += 1
else
break
end
i += 1
end
[primo_offset, timdex_offset]
end

# Merges two result arrays according to the merge plan
def merge_results(primo_results, timdex_results)
merged = []
primo_idx = 0
timdex_idx = 0
merge_plan.each do |source|
if source == :primo
merged << primo_results[primo_idx] if primo_idx < primo_results.length
primo_idx += 1
else
merged << timdex_results[timdex_idx] if timdex_idx < timdex_results.length
timdex_idx += 1
end
end
merged
end
end
Loading