Cache Primo queries

jazairi · jazairi · commit a6955532aa2f · 2025-10-09T14:04:53.000-04:00
Why these changes are being introduced: The Primo API performs poorly. We should do what we can to mitigate this. Relevant ticket(s): * [USE-76](https://mitlibraries.atlassian.net/browse/USE-76) How this addresses that need: This adds caching to Primo queries. Side effects of this change: None, but it's worth noting that we experience many unique searches, and this will only help with latency if USE UI has seen the query before.
diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb
@@ -17,9 +17,9 @@ def results
 
     # Determine which tab to load - default to primo unless gdt is enabled
     @active_tab = if Flipflop.enabled?(:gdt)
-                    'gdt'  # Keep existing GDT behavior unchanged
+                    'gdt' # Keep existing GDT behavior unchanged
                   else
-                    params[:tab] || 'primo'  # Default to primo for new tabbed interface
+                    params[:tab] || 'primo' # Default to primo for new tabbed interface
                   end
     @enhanced_query = Enhancer.new(params).enhanced_query
 
@@ -53,25 +53,23 @@ def load_gdt_results
   end
 
   def load_primo_results
-    begin
-      primo_search = PrimoSearch.new
-      per_page = params[:per_page] || 20
-      primo_response = primo_search.search(params[:q], per_page)
-      
-      @results = NormalizePrimoResults.new(primo_response, params[:q]).normalize
-      
-      # Basic pagination for now.
-      if @results.present?
-        @pagination = {
-          hits: @results.count,
-          start: 1,
-          end: @results.count
-        }
-      end
-      
-    rescue StandardError => e
-      @errors = handle_primo_errors(e)
+    primo_response = cache_primo_query
+    @results = NormalizePrimoResults.new(primo_response, @enhanced_query[:q]).normalize
+
+    # Enhanced pagination using cached response
+    if @results.present?
+      total_hits = primo_response.dig('info', 'total') || @results.count
+      per_page = @enhanced_query[:per_page] || 20
+      current_page = @enhanced_query[:page] || 1
+
+      @pagination = {
+        hits: total_hits,
+        start: ((current_page - 1) * per_page) + 1,
+        end: [current_page * per_page, total_hits].min
+      }
     end
+  rescue StandardError => e
+    @errors = handle_primo_errors(e)
   end
 
   def load_timdex_results
@@ -88,16 +86,9 @@ def active_filters
   end
 
   def cache_timdex_query(query)
-    # Create cache key for this query
-    # Sorting query hash to ensure consistent key generation regardless of the parameter order
-    sorted_query = query.sort_by { |k, v| k.to_sym }.to_h
-    cache_key = Digest::MD5.hexdigest(sorted_query.to_s)
+    cache_key = generate_cache_key(query)
 
     # builder hands off to wrapper which returns raw results here
-    # We are using two difference caches to allow for Geo and USE to be cached separately. This ensures we don't have
-    # cache key collision for these two different query types. In practice, the likelihood of this happening is low,
-    # as the query parameters are different for each type and they won't often be run with the same cache backend other
-    # than locally, but this is a safeguard.
     # The response type is a GraphQL::Client::Response, which is not directly serializable, so we convert it to a hash.
     Rails.cache.fetch("#{cache_key}/#{@active_tab}", expires_in: 12.hours) do
       raw = if @active_tab == 'gdt'
@@ -112,6 +103,26 @@ def cache_timdex_query(query)
     end
   end
 
+  def cache_primo_query
+    cache_key = generate_cache_key(@enhanced_query)
+
+    Rails.cache.fetch("#{cache_key}/primo", expires_in: 12.hours) do
+      primo_search = PrimoSearch.new
+      per_page = @enhanced_query[:per_page] || 20
+      primo_search.search(@enhanced_query[:q], per_page)
+    end
+  end
+
+  # We are using two difference caches to allow for Geo and USE to be cached separately. This ensures we don't have
+  # cache key collision for these two different query types. In practice, the likelihood of this happening is low,
+  # as the query parameters are different for each type and they won't often be run with the same cache backend other
+  # than locally, but this is a safeguard.
+  def generate_cache_key(query)
+    # Sorting query hash to ensure consistent key generation regardless of the parameter order
+    sorted_query = query.sort_by { |k, _v| k.to_sym }.to_h
+    Digest::MD5.hexdigest(sorted_query.to_s)
+  end
+
   def execute_geospatial_query(query)
     if query['geobox'] == 'true' && query[:geodistance] == 'true'
       TimdexBase::Client.query(TimdexSearch::AllQuery, variables: query)
@@ -261,7 +272,7 @@ def validate_geobox_values!
 
   def handle_primo_errors(error)
     Rails.logger.error("Primo search error: #{error.message}")
-    
+
     if error.is_a?(ArgumentError)
       [{ 'message' => 'Primo search is not properly configured.' }]
     elsif error.is_a?(HTTP::TimeoutError)