Add pagination support to 'all' tab

jazairi · jazairi · commit a516eecde122 · 2025-11-18T15:23:49.000-05:00
Why these changes are being introduced: We intentionally decided to implement pagination for the 'all' tab separately to avoid excessively large tickets. Relevant ticket(s): - [USE-178](https://mitlibraries.atlassian.net/browse/USE-178) - [USE-179](https://mitlibraries.atlassian.net/browse/USE-179) How this addresses that need: This updates the Search Controller and Analyzer Model to handle pagination from multiple sources ('all' tab) in addition to single sources. Side effects of this change: I found it very difficult to follow where and how the 'per-page' value is set. I tried to improve the situation by moving this value to env. I'm not sure it fully makes sense it, but I doubt it will without a full refactor of the data flow architecture.
diff --git a/README.md b/README.md
@@ -116,6 +116,7 @@ may have unexpected consequences if applied to other TIMDEX UI apps.
 - `REQUEST_PERIOD` - time in minutes used along with `REQUESTS_PER_PERIOD`
 - `REDIRECT_REQUESTS_PER_PERIOD`- number of requests that can be made that the query string starts with our legacy redirect parameter to throttle per `REQUEST_PERIOD`
 - `REDIRECT_REQUEST_PERIOD`- time in minutes used along with `REDIRECT_REQUEST_PERIOD`
+- `RESULTS_PER_PAGE`: The number of results to display per page. Use an odd number to avoid peculiarities. Defaults to 20 if unset.
 - `SENTRY_DSN`: Client key for Sentry exception logging.
 - `SENTRY_ENV`: Sentry environment for the application. Defaults to 'unknown' if unset.
 - `TACOS_SOURCE`: If set, this value is sent to TACOS (as the `sourceSystem` value) to distinguish which application
diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb
@@ -23,7 +23,7 @@ def results
       render 'results_geo'
       return
     end
-    
+
     # Route to appropriate search based on active tab
     case @active_tab
     when 'primo'
@@ -65,7 +65,8 @@ def load_geodata_results
     @errors = extract_errors(response)
     return unless @errors.nil?
 
-    @pagination = Analyzer.new(@enhanced_query, response, :timdex).pagination
+    hits = response.dig(:data, 'search', 'hits') || 0
+    @pagination = Analyzer.new(@enhanced_query, { hits: hits }, :timdex).pagination
     raw_results = extract_results(response)
     @results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize
     @filters = extract_filters(response)
@@ -87,53 +88,66 @@ def load_timdex_results
   end
 
   def load_all_results
-    # Parallel fetching from both APIs
-    primo_thread = Thread.new { fetch_primo_data }
-    timdex_thread = Thread.new { fetch_timdex_data }
-
-    # Wait for both threads to complete
-    primo_data = primo_thread.value
-    timdex_data = timdex_thread.value
+    # Fetch results from both APIs in parallel
+    primo_data, timdex_data = fetch_all_data
 
-    # Collect any errors from either API
-    all_errors = []
-    all_errors.concat(primo_data[:errors]) if primo_data[:errors]
-    all_errors.concat(timdex_data[:errors]) if timdex_data[:errors]
-    @errors = all_errors.any? ? all_errors : nil
+    # Combine errors from both APIs
+    @errors = combine_errors(primo_data[:errors], timdex_data[:errors])
 
     # Zipper merge results from both APIs
-    primo_results = primo_data[:results] || []
-    timdex_results = timdex_data[:results] || []
-    @results = primo_results.zip(timdex_results).flatten.compact
+    @results = merge_results(primo_data[:results], timdex_data[:results])
+
+    # Use Analyzer for combined pagination calculation
+    @pagination = Analyzer.new(@enhanced_query, { hits: timdex_data[:hits] }, :all, { hits: primo_data[:hits] }).pagination
 
-    # For now, just use primo pagination as a placeholder
-    @pagination = primo_data[:pagination] || {}
-    
     # Handle primo continuation for high page numbers
     @show_primo_continuation = primo_data[:show_continuation] || false
   end
 
+  def fetch_all_data
+    # Parallel fetching from both APIs
+    primo_thread = Thread.new { fetch_primo_data }
+    timdex_thread = Thread.new { fetch_timdex_data }
+
+    [primo_thread.value, timdex_thread.value]
+  end
+
+  def combine_errors(*error_arrays)
+    all_errors = error_arrays.compact.flatten
+    all_errors.any? ? all_errors : nil
+  end
+
+  def merge_results(primo_results, timdex_results)
+    (primo_results || []).zip(timdex_results || []).flatten.compact
+  end
+
   def fetch_primo_data
     current_page = @enhanced_query[:page] || 1
-    per_page = @enhanced_query[:per_page] || 20
+    per_page = if @active_tab == 'all'
+                 ENV.fetch('RESULTS_PER_PAGE',
+                           '20').to_i / 2
+               else
+                 ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+               end
     offset = (current_page - 1) * per_page
 
     # Check if we're beyond Primo API limits before making the request.
     if offset >= Analyzer::PRIMO_MAX_OFFSET
-      return { results: [], pagination: {}, errors: nil, show_continuation: true }
+      return { results: [], pagination: {}, errors: nil, show_continuation: true, hits: 0 }
     end
 
-    primo_response = query_primo
+    primo_response = query_primo(per_page, offset)
+    hits = primo_response.dig('info', 'total') || 0
     results = NormalizePrimoResults.new(primo_response, @enhanced_query[:q]).normalize
-    pagination = Analyzer.new(@enhanced_query, primo_response, :primo).pagination
+    pagination = Analyzer.new(@enhanced_query, { hits: hits }, :primo).pagination
 
     # Handle empty results from Primo API. Sometimes Primo will return no results at a given offset,
     # despite claiming in the initial query that more are available. This happens randomly and
     # seemingly for no reason (well below the recommended offset of 2,000). While the bug also
     # exists in Primo UI, sending users there seems like the best we can do.
     show_continuation = false
     errors = nil
-    
+
     if results.empty?
       docs = primo_response['docs'] if primo_response.is_a?(Hash)
       if docs.nil? || docs.empty?
@@ -144,23 +158,37 @@ def fetch_primo_data
       end
     end
 
-    { results: results, pagination: pagination, errors: errors, show_continuation: show_continuation }
+    { results: results, pagination: pagination, errors: errors, show_continuation: show_continuation,
+      hits: hits }
   rescue StandardError => e
-    { results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false }
+    { results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false, hits: 0 }
   end
 
   def fetch_timdex_data
-    query = QueryBuilder.new(@enhanced_query).query
+    # For all tab, modify query to use half page size
+    if @active_tab == 'all'
+      per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i / 2
+      page = @enhanced_query[:page] || 1
+      from_offset = ((page - 1) * per_page).to_s
+
+      query_builder = QueryBuilder.new(@enhanced_query)
+      query = query_builder.query
+      query['from'] = from_offset
+    else
+      query = QueryBuilder.new(@enhanced_query).query
+    end
+
     response = query_timdex(query)
     errors = extract_errors(response)
-    
+
     if errors.nil?
-      pagination = Analyzer.new(@enhanced_query, response, :timdex).pagination
+      hits = response.dig(:data, 'search', 'hits') || 0
+      pagination = Analyzer.new(@enhanced_query, { hits: hits }, :timdex).pagination
       raw_results = extract_results(response)
       results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize
-      { results: results, pagination: pagination, errors: nil }
+      { results: results, pagination: pagination, errors: nil, hits: hits }
     else
-      { results: [], pagination: {}, errors: errors }
+      { results: [], pagination: {}, errors: errors, hits: 0 }
     end
   end
 
@@ -189,16 +217,12 @@ def query_timdex(query)
     end
   end
 
-  def query_primo
+  def query_primo(per_page, offset)
     # We generate unique cache keys to avoid naming collisions.
     cache_key = generate_cache_key(@enhanced_query)
 
     Rails.cache.fetch("#{cache_key}/primo", expires_in: 12.hours) do
       primo_search = PrimoSearch.new
-      per_page = @enhanced_query[:per_page] || 20
-      current_page = @enhanced_query[:page] || 1
-      offset = (current_page - 1) * per_page
-
       primo_search.search(@enhanced_query[:q], per_page, offset)
     end
   end
diff --git a/app/models/analyzer.rb b/app/models/analyzer.rb
@@ -1,54 +1,49 @@
 class Analyzer
   attr_accessor :pagination
 
-  RESULTS_PER_PAGE = 20
-
   # Primo API theoretical maximum recommended offset is 2000 records (per Ex Libris documentation)
   # but in practice, the API often can't deliver results beyond ~960 records for large result sets,
   # likely due to performance constraints.
   PRIMO_MAX_OFFSET = 960
 
-  def initialize(enhanced_query, response, source)
+  def initialize(enhanced_query, response, source, secondary_response = nil)
     @source = source
+    @enhanced_query = enhanced_query
     @pagination = {}
-    @pagination[:hits] = hits(response)
-    @pagination[:start] = ((enhanced_query[:page] - 1) * RESULTS_PER_PAGE) + 1
-    @pagination[:end] = [enhanced_query[:page] * RESULTS_PER_PAGE, @pagination[:hits]].min
-    @pagination[:prev] = enhanced_query[:page] - 1 if enhanced_query[:page] > 1
-    @pagination[:next] = next_page(enhanced_query[:page], @pagination[:hits]) if next_page(
-      enhanced_query[:page], @pagination[:hits]
-    )
-    @pagination[:per_page] = RESULTS_PER_PAGE
+    calculate_pagination(response, secondary_response)
   end
 
   private
 
-  def hits(response)
-    return 0 if response.nil?
-
-    if @source == :primo
-      primo_hits(response)
-    elsif @source == :timdex
-      timdex_hits(response)
+  def calculate_pagination(response, secondary_response = nil)
+    if @source == :all
+      @pagination[:hits] = (secondary_response&.dig(:hits) || 0) + (response&.dig(:hits) || 0)
+      @pagination[:per_page] = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+      calculate_all_pagination
     else
-      0
+      @pagination[:hits] = response&.dig(:hits) || 0
+      @pagination[:per_page] = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+      calculate_standard_pagination
     end
   end
 
-  def primo_hits(response)
-    return 0 unless response.is_a?(Hash)
-
-    response.dig('info', 'total') || 0
+  def calculate_standard_pagination
+    page = @enhanced_query[:page] || 1
+    @pagination[:start] = ((page - 1) * @pagination[:per_page]) + 1
+    @pagination[:end] = [page * @pagination[:per_page], @pagination[:hits]].min
+    @pagination[:prev] = page - 1 if page > 1
+    @pagination[:next] = next_page(page, @pagination[:hits]) if next_page(page, @pagination[:hits])
   end
 
-  def timdex_hits(response)
-    return 0 unless response.is_a?(Hash) && response.key?(:data) && response[:data].key?('search')
-    return 0 unless response[:data]['search'].is_a?(Hash) && response[:data]['search'].key?('hits')
-
-    response[:data]['search']['hits']
+  def calculate_all_pagination
+    page = @enhanced_query[:page] || 1
+    @pagination[:start] = ((page - 1) * @pagination[:per_page]) + 1
+    @pagination[:end] = [page * @pagination[:per_page], @pagination[:hits]].min
+    @pagination[:prev] = page - 1 if page > 1
+    @pagination[:next] = page + 1 if page * @pagination[:per_page] < @pagination[:hits]
   end
 
   def next_page(page, hits)
-    page + 1 if page * RESULTS_PER_PAGE < hits
+    page + 1 if page * @pagination[:per_page] < hits
   end
 end
diff --git a/app/models/query_builder.rb b/app/models/query_builder.rb
@@ -1,7 +1,6 @@
 class QueryBuilder
   attr_reader :query
 
-  RESULTS_PER_PAGE = 20
   QUERY_PARAMS = %w[q citation contributors fundingInformation identifiers locations subjects title booleanType].freeze
   FILTER_PARAMS = %i[accessToFilesFilter contentTypeFilter contributorsFilter formatFilter languagesFilter
                      literaryFormFilter placesFilter sourceFilter subjectsFilter].freeze
@@ -10,7 +9,8 @@ class QueryBuilder
 
   def initialize(enhanced_query)
     @query = {}
-    @query['from'] = calculate_from(enhanced_query[:page])
+    @per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i
+    @query['from'] = calculate_from(enhanced_query[:page], @per_page)
 
     if Feature.enabled?(:geodata)
       @query['geobox'] = 'true' if enhanced_query[:geobox] == 'true'
@@ -27,10 +27,10 @@ def initialize(enhanced_query)
 
   private
 
-  def calculate_from(page = 1)
+  def calculate_from(page = 1, per_page = ENV.fetch('RESULTS_PER_PAGE', '20').to_i)
     # This needs to return a string because Timdex needs $from to be a String
     page = 1 if page.to_i.zero?
-    ((page - 1) * RESULTS_PER_PAGE).to_s
+    ((page - 1) * per_page).to_s
   end
 
   def extract_query(enhanced_query)
diff --git a/test/controllers/search_controller_test.rb b/test/controllers/search_controller_test.rb
diff --git a/test/models/analyzer_test.rb b/test/models/analyzer_test.rb