MITLibraries · jazairi · Nov 21, 2025
diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb
@@ -88,48 +88,117 @@ def load_timdex_results
   end
 
   def load_all_results
-    # Fetch results from both APIs in parallel
-    primo_data, timdex_data = fetch_all_data
+    current_page = @enhanced_query[:page] || 1
+    per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+    data = if current_page.to_i == 1
+             fetch_all_tab_first_page(current_page, per_page)
+           else
+             fetch_all_tab_deeper_pages(current_page, per_page)
+           end
 
-    # Combine errors from both APIs
-    @errors = combine_errors(primo_data[:errors], timdex_data[:errors])
+    @results = data[:results]
+    @errors = data[:errors]
+    @pagination = data[:pagination]
+    @show_primo_continuation = data[:show_primo_continuation]
+  end
 
-    # Zipper merge results from both APIs
-    @results = merge_results(primo_data[:results], timdex_data[:results])
+  def fetch_all_tab_first_page(current_page, per_page)
+    primo_data, timdex_data = parallel_fetch({ offset: 0, per_page: per_page }, { offset: 0, per_page: per_page })
 
-    # Use Analyzer for combined pagination calculation
-    @pagination = Analyzer.new(@enhanced_query, timdex_data[:hits], :all,
-                               primo_data[:hits]).pagination
+    paginator = build_paginator_from_data(primo_data, timdex_data, current_page, per_page)
 
-    # Handle primo continuation for high page numbers
-    @show_primo_continuation = primo_data[:show_continuation] || false
+    assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page)
   end
 
-  def fetch_all_data
-    # Parallel fetching from both APIs
-    primo_thread = Thread.new { fetch_primo_data }
-    timdex_thread = Thread.new { fetch_timdex_data }
+  def fetch_all_tab_deeper_pages(current_page, per_page)
+    primo_summary, timdex_summary = parallel_fetch({ offset: 0, per_page: 1 }, { offset: 0, per_page: 1 })
+
+    paginator = build_paginator_from_data(primo_summary, timdex_summary, current_page, per_page)
+
+    primo_data, timdex_data = fetch_all_tab_page_chunks(paginator)
+
+    assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page, deeper: true)
+  end
+
+  # Launch parallel fetch threads for Primo and Timdex and return their data
+  def parallel_fetch(primo_opts = {}, timdex_opts = {})
+    primo_thread = Thread.new { fetch_primo_data(**primo_opts) }
+    timdex_thread = Thread.new { fetch_timdex_data(**timdex_opts) }
 
     [primo_thread.value, timdex_thread.value]
   end
 
+  # Build a paginator from raw API response data
+  def build_paginator_from_data(primo_data, timdex_data, current_page, per_page)
+    primo_total = primo_data[:hits] || 0
+    timdex_total = timdex_data[:hits] || 0
+
+    MergedSearchPaginator.new(
+      primo_total: primo_total,
+      timdex_total: timdex_total,
+      current_page: current_page,
+      per_page: per_page
+    )
+  end
+
+  # For deeper pages, compute merge_plan and api_offsets, then conditionally fetch page chunks
+  def fetch_all_tab_page_chunks(paginator)
+    merge_plan = paginator.merge_plan
+    primo_count = merge_plan.count(:primo)
+    timdex_count = merge_plan.count(:timdex)
+    primo_offset, timdex_offset = paginator.api_offsets
+
+    primo_thread = primo_count > 0 ? Thread.new { fetch_primo_data(offset: primo_offset, per_page: primo_count) } : nil
+    timdex_thread = if timdex_count > 0
+                      Thread.new do
+                        fetch_timdex_data(offset: timdex_offset, per_page: timdex_count)
+                      end
+                    end
+
+    primo_data = if primo_thread
+                   primo_thread.value
+                 else
+                   { results: [], errors: nil, hits: paginator.primo_total, show_continuation: false }
+                 end
+
+    timdex_data = if timdex_thread
+                    timdex_thread.value
+                  else
+                    { results: [], errors: nil, hits: paginator.timdex_total }
+                  end
+
+    [primo_data, timdex_data]
+  end
+
+  # Assemble the final result hash from paginator and API data
+  def assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page, deeper: false)
+    primo_total = primo_data[:hits] || 0
+    timdex_total = timdex_data[:hits] || 0
+
+    merged = paginator.merge_results(primo_data[:results] || [], timdex_data[:results] || [])
+    errors = combine_errors(primo_data[:errors], timdex_data[:errors])
+    pagination = Analyzer.new(@enhanced_query, timdex_total, :all, primo_total).pagination
+
+    show_primo_continuation = if deeper
+                                page_offset = (current_page - 1) * per_page
+                                primo_data[:show_continuation] || (page_offset >= Analyzer::PRIMO_MAX_OFFSET)
+                              else
+                                primo_data[:show_continuation]
+                              end
+
+    { results: merged, errors: errors, pagination: pagination, show_primo_continuation: show_primo_continuation }
+  end
+
   def combine_errors(*error_arrays)
     all_errors = error_arrays.compact.flatten
     all_errors.any? ? all_errors : nil
   end
 
-  def merge_results(primo_results, timdex_results)
-    (primo_results || []).zip(timdex_results || []).flatten.compact
-  end
-
-  def fetch_primo_data
+  def fetch_primo_data(offset: nil, per_page: nil)
+    # Default to current page if not provided
     current_page = @enhanced_query[:page] || 1
-    per_page = if @active_tab == 'all'
-                 ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
-               else
-                 ENV.fetch('RESULTS_PER_PAGE', '20').to_i
-               end
-    offset = (current_page - 1) * per_page
+    per_page ||= ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+    offset ||= (current_page - 1) * per_page
 
     # Check if we're beyond Primo API limits before making the request.
     if offset >= Analyzer::PRIMO_MAX_OFFSET
@@ -139,7 +208,7 @@ def fetch_primo_data
     primo_response = query_primo(per_page, offset)
     hits = primo_response.dig('info', 'total') || 0
     results = NormalizePrimoResults.new(primo_response, @enhanced_query[:q]).normalize
-    pagination = Analyzer.new(@enhanced_query, hits , :primo).pagination
+    pagination = Analyzer.new(@enhanced_query, hits, :primo).pagination
 
     # Handle empty results from Primo API. Sometimes Primo will return no results at a given offset,
     # despite claiming in the initial query that more are available. This happens randomly and
@@ -151,8 +220,9 @@ def fetch_primo_data
     if results.empty?
       docs = primo_response['docs'] if primo_response.is_a?(Hash)
       if docs.nil? || docs.empty?
-        # Only show continuation for pagination scenarios (page > 1), not for searches with no results
-        show_continuation = true if current_page > 1
+        # Only show continuation for pagination scenarios (where offset is present), not for
+        # searches with no results
+        show_continuation = true if offset > 0
       else
         errors = [{ 'message' => 'No more results available at this page number.' }]
       end
@@ -164,19 +234,10 @@ def fetch_primo_data
     { results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false, hits: 0 }
   end
 
-  def fetch_timdex_data
-    # For all tab, modify query to use half page size
-    if @active_tab == 'all'
-      per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
-      page = @enhanced_query[:page] || 1
-      from_offset = ((page - 1) * per_page).to_s
-
-      query_builder = QueryBuilder.new(@enhanced_query)
-      query = query_builder.query
-      query['from'] = from_offset
-    else
-      query = QueryBuilder.new(@enhanced_query).query
-    end
+  def fetch_timdex_data(offset: nil, per_page: nil)
+    query = QueryBuilder.new(@enhanced_query).query
+    query['from'] = offset.to_s if offset
+    query['size'] = per_page.to_s if per_page
 
     response = query_timdex(query)
     errors = extract_errors(response)
@@ -223,7 +284,8 @@ def query_timdex(query)
 
   def query_primo(per_page, offset)
     # We generate unique cache keys to avoid naming collisions.
-    cache_key = generate_cache_key(@enhanced_query)
+    # Include per_page and offset in the cache key to ensure pagination works correctly.
+    cache_key = generate_cache_key(@enhanced_query.merge(per_page: per_page, offset: offset))
 
     Rails.cache.fetch("#{cache_key}/primo", expires_in: 12.hours) do
       primo_search = PrimoSearch.new

diff --git a/app/models/merged_search_paginator.rb b/app/models/merged_search_paginator.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+# MergedSearchPaginator encapsulates stateless merged pagination logic for combining two API result sets.
+# It calculates the merge plan, API offsets, and merges the results for a given page.
+class MergedSearchPaginator
+  attr_reader :primo_total, :timdex_total, :current_page, :per_page
+
+  def initialize(primo_total:, timdex_total:, current_page:, per_page:)
+    @primo_total = primo_total
+    @timdex_total = timdex_total
+    @current_page = current_page
+    @per_page = per_page
+  end
+
+  # Returns an array of :primo and :timdex symbols for the merged result order on this page
+  def merge_plan
+    total_results = primo_total + timdex_total
+    start_index = (current_page - 1) * per_page
+    end_index = [start_index + per_page, total_results].min
+    plan = []
+    primo_used = 0
+    timdex_used = 0
+    i = 0
+    while i < end_index
+      if primo_used < primo_total && (timdex_used >= timdex_total || primo_used <= timdex_used)
+        source = :primo
+        primo_used += 1
+      elsif timdex_used < timdex_total
+        source = :timdex
+        timdex_used += 1
+      end
+      plan << source if i >= start_index
+      i += 1
+    end
+    plan
+  end
+
+  # Returns [primo_offset, timdex_offset] for the start of this page
+  def api_offsets
+    start_index = (current_page - 1) * per_page
+    primo_offset = 0
+    timdex_offset = 0
+    i = 0
+    while i < start_index
+      if primo_offset < primo_total && (timdex_offset >= timdex_total || primo_offset <= timdex_offset)
+        primo_offset += 1
+      elsif timdex_offset < timdex_total
+        timdex_offset += 1
+      else
+        break
+      end
+      i += 1
+    end
+    [primo_offset, timdex_offset]
+  end
+
+  # Merges two result arrays according to the merge plan
+  def merge_results(primo_results, timdex_results)
+    merged = []
+    primo_idx = 0
+    timdex_idx = 0
+    merge_plan.each do |source|
+      if source == :primo
+        merged << primo_results[primo_idx] if primo_idx < primo_results.length
+        primo_idx += 1
+      else
+        merged << timdex_results[timdex_idx] if timdex_idx < timdex_results.length
+        timdex_idx += 1
+      end
+    end
+    merged
+  end
+end