diff --git a/.env.test b/.env.test index 2f053a6e..3398f16e 100644 --- a/.env.test +++ b/.env.test @@ -7,6 +7,9 @@ PRIMO_SCOPE=cdi PRIMO_TAB=all PRIMO_VID=01MIT_INST:MIT SYNDETICS_PRIMO_URL=https://syndetics.com/index.php?client=primo +TACOS_HOST=FAKE_TACOS_HOST +TACOS_URL=http://FAKE_TACOS_HOST/graphql +TACOS_SOURCE=FAKE_TACOS_SOURCE TIMDEX_GRAPHQL=https://FAKE_TIMDEX_HOST/graphql TIMDEX_HOST=FAKE_TIMDEX_HOST TIMDEX_INDEX=FAKE_TIMDEX_INDEX \ No newline at end of file diff --git a/README.md b/README.md index 0ac7c522..3af8e50f 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ mode (e.g., `GDT=false` will still enable GDT features). Note that this is curre may have unexpected consequences if applied to other TIMDEX UI apps. - `GLOBAL_ALERT`: The main functionality for this comes from our theme gem, but when set the value will be rendered as safe html above the main header of the site. +- `ORIGINS`: sets origins for CORS (currently used only for TACOS API calls). - `PLATFORM_NAME`: The value set is added to the header after the MIT Libraries logo. The logic and CSS for this comes from our theme gem. - `PRIMO_TIMEOUT`: The number of seconds before a Primo request times out (default 6). - `REQUESTS_PER_PERIOD` - number of requests that can be made for general throttles per `REQUEST_PERIOD` @@ -136,6 +137,10 @@ may have unexpected consequences if applied to other TIMDEX UI apps. - `REDIRECT_REQUEST_PERIOD`- time in minutes used along with `REDIRECT_REQUEST_PERIOD` - `SENTRY_DSN`: Client key for Sentry exception logging. - `SENTRY_ENV`: Sentry environment for the application. Defaults to 'unknown' if unset. +- `TACOS_SOURCE`: If set, this value is sent to TACOS (as the `sourceSystem` value) to distinguish which application +instance is sending what search traffic. Defaults to "unset" if not defined. +- `TACOS_URL`: The GraphQL endpoint for the [TACOS API](https://github.com/mitlibraries/tacos/). When set, the + application will log search terms to TACOS (and eventually return suggested resources that TACOS detects). - `TIMDEX_INDEX`: Name of the index, or alias, to provide to the GraphQL endpoint. Defaults to `nil` which will let TIMDEX determine the best index to use. Wildcard values can be set, for example `rdi*` would search any indexes that begin with `rdi` in the underlying OpenSearch instance behind TIMDEX. - `TIMDEX_SOURCES`: Comma-separated list of sources to display in the advanced-search source selection element. This overrides the default which is set in ApplicationHelper. @@ -143,6 +148,10 @@ may have unexpected consequences if applied to other TIMDEX UI apps. #### Test Environment-only Variables - `SPEC_REPORTER`: Optional variable. If set, enables spec reporter style output from tests rather than minimal output. +- `TACOS_HOST`: Test Env only. Used to ensure the VCR cassettes can properly scrub specific host data to make sure we +get the same cassettes regardless of which host was used to generate the cassettes. This should be set to the hostname +that matches `TACOS_URL`. Ex: If `TACOS_URL` is `http://localhost:3001/graphql` then `TACOS_HOST` should be +`localhost:3001`. - `TIMDEX_HOST`: Test Env only. Used to ensure the VCR cassettes can properly scrub specific host data to make sure we get the same cassettes regardless of which host was used to generate the cassettes. This should be set to the host name that matches `TIMDEX_GRAPHQL`. Ex: If `TIMDEX_GRAPHQL` is `https://www.example.com/graphql` then `TIMDEX_HOST` should be `www.example.com`. ### Generating VCR Cassettes diff --git a/app/controllers/tacos_controller.rb b/app/controllers/tacos_controller.rb new file mode 100644 index 00000000..715e2247 --- /dev/null +++ b/app/controllers/tacos_controller.rb @@ -0,0 +1,9 @@ +class TacosController < ApplicationController + layout false + + def analyze + return unless ApplicationHelper.tacos_enabled? + + Tacos.analyze(params[:q]) + end +end diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index 710d0aa5..04cb03c6 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -1,4 +1,9 @@ module ApplicationHelper + def tacos_enabled? + ENV.fetch('TACOS_URL', '').present? + end + module_function :tacos_enabled? + def timdex_sources ENV.fetch('TIMDEX_SOURCES', timdex_source_defaults).split(',') end diff --git a/app/models/tacos.rb b/app/models/tacos.rb new file mode 100644 index 00000000..f9e1bc33 --- /dev/null +++ b/app/models/tacos.rb @@ -0,0 +1,49 @@ +class Tacos + # The tacos_client argument here is unused in production - it is provided for + # our test suite so that we can mock various error conditions to ensure that + # error handling happens as we intend. + def self.analyze(term, tacos_client = nil) + tacos_http = setup(tacos_client) + query = '{ "query": "{ logSearchEvent(searchTerm: \"' + clean_term(term) + '\", sourceSystem: \"' + tacos_source + '\" ) { phrase source detectors { suggestedResources { title url } } } }" }' + begin + raw_response = tacos_http.timeout(http_timeout).post(tacos_url, body: query) + JSON.parse(raw_response.to_s) + rescue HTTP::Error + {"error" => "A connection error has occurred"} + rescue JSON::ParserError + {"error" => "A parsing error has occurred"} + end + end + + private + + def self.clean_term(term) + term.gsub('"', '\'') + end + + def self.http_timeout + ENV.fetch('TIMDEX_TIMEOUT', 6).to_f + end + + def self.origins + ENV.fetch('ORIGINS', nil) + end + + # We define the HTTP connection this way so that it can be overridden during + # testing, to make sure that the .analyze method can handle specific error + # conditions. + def self.setup(tacos_client) + tacos_client || HTTP.persistent(tacos_url) + .headers(accept: 'application/json', + 'Content-Type': 'application/json', + origin: origins) + end + + def self.tacos_source + ENV.fetch('TACOS_SOURCE', 'timdexui_unset') + end + + def self.tacos_url + ENV.fetch('TACOS_URL', nil) + end +end diff --git a/app/views/search/_trigger_tacos.html.erb b/app/views/search/_trigger_tacos.html.erb new file mode 100644 index 00000000..967e34fc --- /dev/null +++ b/app/views/search/_trigger_tacos.html.erb @@ -0,0 +1,8 @@ +<% return unless (tacos_enabled? and @enhanced_query[:q].present?) %> + +<% data_url = "/analyze?q=#{URI.encode_www_form_component(@enhanced_query[:q])}" %> + +
> +
diff --git a/app/views/search/results.html.erb b/app/views/search/results.html.erb index d9b97e02..aa89ac79 100644 --- a/app/views/search/results.html.erb +++ b/app/views/search/results.html.erb @@ -68,4 +68,6 @@ <% end %> +<%= render(partial: 'trigger_tacos') if tacos_enabled? %> + <%= javascript_include_tag "filters" %> diff --git a/app/views/tacos/analyze.html.erb b/app/views/tacos/analyze.html.erb new file mode 100644 index 00000000..ef05b8b3 --- /dev/null +++ b/app/views/tacos/analyze.html.erb @@ -0,0 +1 @@ + diff --git a/config/routes.rb b/config/routes.rb index ec77b2e3..ce24c7ef 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -8,6 +8,8 @@ get 'issn', to: 'fact#issn' get 'pmid', to: 'fact#pmid' + get 'analyze', to: 'tacos#analyze' + get 'record/(:id)', to: 'record#view', as: 'record', diff --git a/test/controllers/search_controller_test.rb b/test/controllers/search_controller_test.rb index b3be172c..c33385ed 100644 --- a/test/controllers/search_controller_test.rb +++ b/test/controllers/search_controller_test.rb @@ -275,7 +275,7 @@ def setup end assert_response :success - assert_select('div[data-content-loader-url-value]', 0) + assert_select('div[data-content-loader-url-value].fact-container', 0) end end @@ -305,7 +305,7 @@ def setup assert_response :success - assert_select('div[data-content-loader-url-value]', 0) + assert_select('div[data-content-loader-url-value].fact-container', 0) end end @@ -333,7 +333,7 @@ def setup end assert_response :success - assert_select('div[data-content-loader-url-value]', 0) + assert_select('div[data-content-loader-url-value].fact-container', 0) end end @@ -361,7 +361,31 @@ def setup end assert_response :success - assert_select('div[data-content-loader-url-value]', 0) + assert_select('div[data-content-loader-url-value].fact-container', 0) + end + end + + test 'TACOS intervention is inserted when TACOS enabled' do + VCR.use_cassette('tacos', + allow_playback_repeats: true) do + get '/results?q=tacos' + + assert_response :success + + tacos_div = assert_select('div[data-content-loader-url-value].tacos-container') + assert_equal '/analyze?q=tacos', tacos_div.attribute('data-content-loader-url-value').value + end + end + + test 'TACOS intervention not inserted when TACOS not enabled' do + VCR.use_cassette('tacos', + allow_playback_repeats: true) do + ClimateControl.modify(TACOS_URL: '') do + get '/results?q=tacos' + end + assert_response :success + + assert_select('div[data-content-loader-url-value].tacos-container', 0) end end diff --git a/test/controllers/tacos_controller_test.rb b/test/controllers/tacos_controller_test.rb new file mode 100644 index 00000000..9432182f --- /dev/null +++ b/test/controllers/tacos_controller_test.rb @@ -0,0 +1,12 @@ +require 'test_helper' + +class TacosControllerTest < ActionDispatch::IntegrationTest + test 'analyze route exists but returns an HTML comment for now' do + VCR.use_cassette('tacos direct') do + get '/analyze?q=direct' + + assert_response :success + assert_equal "\n", response.body + end + end +end diff --git a/test/helpers/application_helper_test.rb b/test/helpers/application_helper_test.rb index 01df7651..cf3fdbba 100644 --- a/test/helpers/application_helper_test.rb +++ b/test/helpers/application_helper_test.rb @@ -3,6 +3,16 @@ class ApplicationHelperTest < ActionView::TestCase include ApplicationHelper + test 'tacos_enabled? requires TACOS_URL env variable to have non-zero length' do + ClimateControl.modify TACOS_URL: 'foo' do + assert_equal true, tacos_enabled? + end + + ClimateControl.modify TACOS_URL: '' do + assert_equal false, tacos_enabled? + end + end + test 'index_page_title renders as expected' do assert_equal 'Search | MIT Libraries', index_page_title end diff --git a/test/models/tacos_test.rb b/test/models/tacos_test.rb new file mode 100644 index 00000000..8d545b5a --- /dev/null +++ b/test/models/tacos_test.rb @@ -0,0 +1,62 @@ +require 'test_helper' + +class TacosConnectionError + def timeout(_) + self + end + + def post(_url, body:) + raise HTTP::ConnectionError, "forced connection failure" + end +end + +class TacosParsingError + def timeout(_) + self + end + + def post(_url, body:) + 'This is not valid json' + end +end + +class TacosTest < ActiveSupport::TestCase + test 'TACOS model has a call method that reflects a search term back' do + VCR.use_cassette('tacos popcorn') do + searchterm = 'popcorn' + + result = Tacos.analyze(searchterm) + + assert_instance_of Hash, result + assert_equal searchterm, result['data']['logSearchEvent']['phrase'] + end + end + + test 'TACOS model will use ENV to populate the sourceSystem value' do + VCR.use_cassette('tacos fake system') do + ClimateControl.modify(TACOS_SOURCE: 'faked') do + result = Tacos.analyze('popcorn') + + assert_equal 'faked', result['data']['logSearchEvent']['source'] + end + end + end + + test 'TACOS model catches connection errors' do + tacos_client = TacosConnectionError.new + + result = Tacos.analyze('popcorn', tacos_client) + + assert_instance_of Hash, result + assert_equal 'A connection error has occurred', result['error'] + end + + test 'TACOS model catches parsing errors' do + tacos_client = TacosParsingError.new + + result = Tacos.analyze('popcorn', tacos_client) + + assert_instance_of Hash, result + assert_equal 'A parsing error has occurred', result['error'] + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 60b82092..3f113331 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -18,7 +18,7 @@ end VCR.configure do |config| - config.ignore_localhost = true + config.ignore_localhost = false config.cassette_library_dir = 'test/vcr_cassettes' config.hook_into :webmock config.allow_http_connections_when_no_cassette = false @@ -26,6 +26,8 @@ config.filter_sensitive_data('http://FAKE_TIMDEX_HOST/graphql/') { ENV.fetch('TIMDEX_GRAPHQL').to_s } config.filter_sensitive_data('FAKE_TIMDEX_INDEX') { ENV.fetch('TIMDEX_INDEX').to_s } config.filter_sensitive_data('FAKE_PRIMO_API_KEY') { ENV.fetch('PRIMO_API_KEY').to_s } + config.filter_sensitive_data('FAKE_TACOS_HOST') { ENV.fetch('TACOS_HOST').to_s } + config.filter_sensitive_data('http://FAKE_TACOS_HOST/graphql/') { ENV.fetch('TACOS_URL').to_s } end module ActiveSupport diff --git a/test/vcr_cassettes/tacos.yml b/test/vcr_cassettes/tacos.yml new file mode 100644 index 00000000..58875a52 --- /dev/null +++ b/test/vcr_cassettes/tacos.yml @@ -0,0 +1,88 @@ +--- +http_interactions: +- request: + method: post + uri: https://FAKE_TIMDEX_HOST/graphql + body: + encoding: UTF-8 + string: '{"query":"query TimdexSearch__BaseQuery($q: String, $citation: String, + $contributors: String, $fundingInformation: String, $identifiers: String, + $locations: String, $subjects: String, $title: String, $index: String, $from: + String, $booleanType: String, $accessToFilesFilter: [String!], $contentTypeFilter: + [String!], $contributorsFilter: [String!], $formatFilter: [String!], $languagesFilter: + [String!], $literaryFormFilter: String, $placesFilter: [String!], $sourceFilter: + [String!], $subjectsFilter: [String!]) {\n search(searchterm: $q, citation: + $citation, contributors: $contributors, fundingInformation: $fundingInformation, + identifiers: $identifiers, locations: $locations, subjects: $subjects, title: + $title, index: $index, from: $from, booleanType: $booleanType, accessToFilesFilter: + $accessToFilesFilter, contentTypeFilter: $contentTypeFilter, contributorsFilter: + $contributorsFilter, formatFilter: $formatFilter, languagesFilter: $languagesFilter, + literaryFormFilter: $literaryFormFilter, placesFilter: $placesFilter, sourceFilter: + $sourceFilter, subjectsFilter: $subjectsFilter) {\n hits\n records {\n timdexRecordId\n title\n contentType\n contributors + {\n kind\n value\n }\n publicationInformation\n dates + {\n kind\n value\n }\n links {\n kind\n restrictions\n text\n url\n }\n notes + {\n kind\n value\n }\n highlight {\n matchedField\n matchedPhrases\n }\n provider\n rights + {\n kind\n description\n uri\n }\n sourceLink\n summary\n }\n aggregations + {\n accessToFiles {\n key\n docCount\n }\n contentType + {\n key\n docCount\n }\n contributors {\n key\n docCount\n }\n format + {\n key\n docCount\n }\n languages {\n key\n docCount\n }\n literaryForm + {\n key\n docCount\n }\n places {\n key\n docCount\n }\n source + {\n key\n docCount\n }\n subjects {\n key\n docCount\n }\n }\n }\n}","variables":{"from":"0","q":"tacos","booleanType":"AND","index":"FAKE_TIMDEX_INDEX"},"operationName":"TimdexSearch__BaseQuery"}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - MIT Libraries Client + Content-Type: + - application/json + response: + status: + code: 200 + message: OK + headers: + Cache-Control: + - max-age=0, private, must-revalidate + Content-Length: + - '39172' + Content-Type: + - application/json; charset=utf-8 + Etag: + - W/"46d0fef2dc0398207ff0f67ac58ceb8f" + Nel: + - '{"report_to":"heroku-nel","response_headers":["Via"],"max_age":3600,"success_fraction":0.01,"failure_fraction":0.1}' + Referrer-Policy: + - strict-origin-when-cross-origin + Report-To: + - '{"group":"heroku-nel","endpoints":[{"url":"https://nel.heroku.com/reports?s=9lAxEBscLCBgQX%2Bp%2FLIPNnsiIz971izVEylC%2FOD3ERE%3D\u0026sid=67ff5de4-ad2b-4112-9289-cf96be89efed\u0026ts=1759503031"}],"max_age":3600}' + Reporting-Endpoints: + - heroku-nel="https://nel.heroku.com/reports?s=9lAxEBscLCBgQX%2Bp%2FLIPNnsiIz971izVEylC%2FOD3ERE%3D&sid=67ff5de4-ad2b-4112-9289-cf96be89efed&ts=1759503031" + Server: + - Heroku + Strict-Transport-Security: + - max-age=63072000; includeSubDomains + Vary: + - Accept, Origin + Via: + - 1.1 heroku-router + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - 356d6b91-414b-0ba9-98e4-597599888538 + X-Runtime: + - '0.487842' + X-Xss-Protection: + - '0' + Date: + - Fri, 03 Oct 2025 14:50:31 GMT + body: + encoding: ASCII-8BIT + string: !binary |- +  + recorded_at: Fri, 03 Oct 2025 14:50:31 GMT +recorded_with: VCR 6.3.1 diff --git a/test/vcr_cassettes/tacos_direct.yml b/test/vcr_cassettes/tacos_direct.yml new file mode 100644 index 00000000..d2a93580 --- /dev/null +++ b/test/vcr_cassettes/tacos_direct.yml @@ -0,0 +1,59 @@ +--- +http_interactions: +- request: + method: post + uri: http://FAKE_TACOS_HOST/graphql + body: + encoding: UTF-8 + string: '{ "query": "{ logSearchEvent(searchTerm: \"direct\", sourceSystem: + \"use\" ) { phrase source detectors { suggestedResources { title url } } } + }" }' + headers: + Accept: + - application/json + Content-Type: + - application/json + Connection: + - Keep-Alive + Host: + - FAKE_TACOS_HOST + User-Agent: + - http.rb/5.3.1 + response: + status: + code: 200 + message: OK + headers: + X-Frame-Options: + - SAMEORIGIN + X-Xss-Protection: + - '0' + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - strict-origin-when-cross-origin + Content-Type: + - application/json; charset=utf-8 + Vary: + - Accept, Origin + Etag: + - W/"a66b90423fc1658bd873263c1f6ab9f6" + Cache-Control: + - max-age=0, private, must-revalidate + X-Request-Id: + - 4b46e2b5-fde0-4de2-b0bb-5363548b47c6 + X-Runtime: + - '0.079767' + Server-Timing: + - start_processing.action_controller;dur=0.01, sql.active_record;dur=4.70, start_transaction.active_record;dur=0.00, + transaction.active_record;dur=12.18, instantiation.active_record;dur=0.11, + process_action.action_controller;dur=42.34 + Content-Length: + - '100' + body: + encoding: UTF-8 + string: '{"data":{"logSearchEvent":{"phrase":"direct","source":"use","detectors":{"suggestedResources":[]}}}}' + recorded_at: Fri, 03 Oct 2025 15:16:35 GMT +recorded_with: VCR 6.3.1 diff --git a/test/vcr_cassettes/tacos_fake_system.yml b/test/vcr_cassettes/tacos_fake_system.yml new file mode 100644 index 00000000..ba5213bc --- /dev/null +++ b/test/vcr_cassettes/tacos_fake_system.yml @@ -0,0 +1,59 @@ +--- +http_interactions: +- request: + method: post + uri: http://FAKE_TACOS_HOST/graphql + body: + encoding: UTF-8 + string: '{ "query": "{ logSearchEvent(searchTerm: \"popcorn\", sourceSystem: + \"faked\" ) { phrase source detectors { suggestedResources { title url } } + } }" }' + headers: + Accept: + - application/json + Content-Type: + - application/json + Connection: + - Keep-Alive + Host: + - FAKE_TACOS_HOST + User-Agent: + - http.rb/5.3.1 + response: + status: + code: 200 + message: OK + headers: + X-Frame-Options: + - SAMEORIGIN + X-Xss-Protection: + - '0' + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - strict-origin-when-cross-origin + Content-Type: + - application/json; charset=utf-8 + Vary: + - Accept, Origin + Etag: + - W/"035bbed4446f9d4569481bbb5d2f0b6a" + Cache-Control: + - max-age=0, private, must-revalidate + X-Request-Id: + - 9f427731-b972-4ca5-be53-a80a5c8ee2de + X-Runtime: + - '0.023578' + Server-Timing: + - start_processing.action_controller;dur=0.01, sql.active_record;dur=0.94, start_transaction.active_record;dur=0.00, + instantiation.active_record;dur=0.07, transaction.active_record;dur=0.98, + process_action.action_controller;dur=4.86 + Content-Length: + - '103' + body: + encoding: UTF-8 + string: '{"data":{"logSearchEvent":{"phrase":"popcorn","source":"faked","detectors":{"suggestedResources":[]}}}}' + recorded_at: Fri, 03 Oct 2025 13:55:59 GMT +recorded_with: VCR 6.3.1 diff --git a/test/vcr_cassettes/tacos_popcorn.yml b/test/vcr_cassettes/tacos_popcorn.yml new file mode 100644 index 00000000..0002783a --- /dev/null +++ b/test/vcr_cassettes/tacos_popcorn.yml @@ -0,0 +1,59 @@ +--- +http_interactions: +- request: + method: post + uri: http://FAKE_TACOS_HOST/graphql + body: + encoding: UTF-8 + string: '{ "query": "{ logSearchEvent(searchTerm: \"popcorn\", sourceSystem: + \"use\" ) { phrase source detectors { suggestedResources { title url } } } + }" }' + headers: + Accept: + - application/json + Content-Type: + - application/json + Connection: + - Keep-Alive + Host: + - FAKE_TACOS_HOST + User-Agent: + - http.rb/5.3.1 + response: + status: + code: 200 + message: OK + headers: + X-Frame-Options: + - SAMEORIGIN + X-Xss-Protection: + - '0' + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - strict-origin-when-cross-origin + Content-Type: + - application/json; charset=utf-8 + Vary: + - Accept, Origin + Etag: + - W/"84adabecd4fb001e4a3306daa2b2321e" + Cache-Control: + - max-age=0, private, must-revalidate + X-Request-Id: + - c72d3c64-ee74-4de3-b095-1416ef3f433b + X-Runtime: + - '0.023035' + Server-Timing: + - start_processing.action_controller;dur=0.01, sql.active_record;dur=0.85, start_transaction.active_record;dur=0.00, + instantiation.active_record;dur=0.07, transaction.active_record;dur=0.85, + process_action.action_controller;dur=4.63 + Content-Length: + - '101' + body: + encoding: UTF-8 + string: '{"data":{"logSearchEvent":{"phrase":"popcorn","source":"use","detectors":{"suggestedResources":[]}}}}' + recorded_at: Fri, 03 Oct 2025 13:55:59 GMT +recorded_with: VCR 6.3.1