diff --git a/usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md b/usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md index 5c29f420e6..21cd1d6e13 100644 --- a/usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md +++ b/usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md @@ -396,6 +396,7 @@ List of column names to request - `NAICS` - `PSC` - `Primary Place of Performance` +- `recipient_id` - `Recipient Location` - `Recipient Name` - `Recipient UEI` @@ -415,6 +416,7 @@ List of column names to request + `internal_id`: `68856340` (required, string, nullable) + `generated_internal_id`: `CONT_AWD_00013U_7090_KJ88_4735` (required, string, nullable) + `Mod`: `P00206` (required, string, nullable) ++ `recipient_id`: `1e5032cf-11df-a3bf-4240-6dda5f6d45ff-C` (optional, string, nullable) + `Recipient Name`: `LEIDOS INNOVATIONS CORPORATION` (required, string, nullable) + `Transaction Amount`: `40000000.00` (required, string, nullable) @@ -433,6 +435,7 @@ List of column names to request + `Last Date to Order` (required, string, nullable) + `Loan Value` (required, string, nullable) + `Mod` (required, string, nullable) ++ `recipient_id` (optional, string, nullable) + `Recipient Name` (required, string, nullable) + `Subsidy Cost` (required, string, nullable) -+ `Transaction Amount` (required, string, nullable) \ No newline at end of file ++ `Transaction Amount` (required, string, nullable) diff --git a/usaspending_api/awards/v2/lookups/elasticsearch_lookups.py b/usaspending_api/awards/v2/lookups/elasticsearch_lookups.py index 861d23bd03..6d084b9f8b 100644 --- a/usaspending_api/awards/v2/lookups/elasticsearch_lookups.py +++ b/usaspending_api/awards/v2/lookups/elasticsearch_lookups.py @@ -1,51 +1,90 @@ """ -Look ups for elasticsearch fields to be displayed for the front end +Lookups for elasticsearch fields to be displayed for the front end """ from copy import deepcopy +from dataclasses import dataclass +from enum import Enum from usaspending_api.awards.v2.lookups.lookups import all_award_types_mappings -TRANSACTIONS_LOOKUP = { - "Recipient Name": "recipient_name.keyword", - "Action Date": "action_date", - "Transaction Amount": "federal_action_obligation", - "Award Type": "type_description.keyword", - "Awarding Agency": "awarding_toptier_agency_name.keyword", - "Awarding Sub Agency": "awarding_subtier_agency_name.keyword", - "Funding Agency": "funding_toptier_agency_name", - "Funding Sub Agency": "funding_subtier_agency_name", - "Issued Date": "period_of_performance_start_date", - "Loan Value": "face_value_loan_guarantee", - "Subsidy Cost": "original_loan_subsidy_cost", - "Mod": "modification_number.keyword", - "Award ID": "display_award_id", - "awarding_agency_id": "awarding_agency_id", - "internal_id": "award_id", - "generated_internal_id": "generated_unique_award_id", - "Last Date to Order": "ordering_period_end_date", - "def_codes": "disaster_emergency_fund_codes", - "Transaction Description": "transaction_description.keyword", - "Action Type": "action_type", - "Recipient UEI": "recipient_uei.keyword", - "awarding_agency_slug": "awarding_toptier_agency_name.keyword", - "funding_agency_slug": "funding_toptier_agency_name.keyword", - "recipient_location_city_name": "recipient_location_city_name.keyword", - "recipient_location_state_code": "recipient_location_state_code", - "recipient_location_country_name": "recipient_location_country_name.keyword", - "recipient_location_address_line1": "recipient_location_address_line1.keyword", - "recipient_location_address_line2": "recipient_location_address_line2.keyword", - "recipient_location_address_line3": "recipient_location_address_line3.keyword", - "pop_city_name": "pop_city_name.keyword", - "pop_state_code": "pop_state_code", - "pop_country_name": "pop_country_name.keyword", - "naics_code": "naics_code.keyword", - "naics_description": "naics_description.keyword", - "product_or_service_code": "product_or_service_code.keyword", - "product_or_service_description": "product_or_service_description.keyword", - "cfda_number": "cfda_number.keyword", - "cfda_title": "cfda_title.keyword", -} + +@dataclass +class ElasticsearchField: + """ + Represents a field that is searchable by an API endpoint and pairs it with the corresponding elasticsearch field. + + Args: + field_name: The name of the field provided by the user when selecting fields and returned by the API + full_path: A complete path that may include additional field types such as ".keyword" + short_path: The full_path with any additional field types removed; may be 1:1 with full_path + """ + + field_name: str + full_path: str + short_path: str + + +class TransactionField(str, Enum): + ACTION_DATE = ("Action Date", "action_date") + ACTION_TYPE = ("Action Type", "action_type") + AWARD_ID = ("Award ID", "display_award_id") + AWARD_TYPE = ("Award Type", "type_description.keyword") + AWARDING_AGENCY = ("Awarding Agency", "awarding_toptier_agency_name.keyword") + AWARDING_AGENCY_ID = ("awarding_agency_id", "awarding_agency_id") + AWARDING_AGENCY_SLUG = ("awarding_agency_slug", "awarding_toptier_agency_name.keyword") + AWARDING_SUB_AGENCY = ("Awarding Sub Agency", "awarding_subtier_agency_name.keyword") + CFDA_NUMBER = ("cfda_number", "cfda_number.keyword") + CFDA_TITLE = ("cfda_title", "cfda_title.keyword") + DEF_CODES = ("def_codes", "disaster_emergency_fund_codes") + FUNDING_AGENCY = ("Funding Agency", "funding_toptier_agency_name.keyword") + FUNDING_AGENCY_SLUG = ("funding_agency_slug", "funding_toptier_agency_name.keyword") + FUNDING_SUB_AGENCY = ("Funding Sub Agency", "funding_subtier_agency_name.keyword") + GENERATED_INTERNAL_ID = ("generated_internal_id", "generated_unique_award_id") + INTERNAL_ID = ("internal_id", "award_id") + ISSUED_DATE = ("Issued Date", "period_of_performance_start_date") + LAST_DATE_TO_ORDER = ("Last Date to Order", "ordering_period_end_date") + LOAN_VALUE = ("Loan Value", "face_value_loan_guarantee") + MOD = ("Mod", "modification_number.keyword") + NAICS_CODE = ("naics_code", "naics_code.keyword") + NAICS_DESCRIPTION = ("naics_description", "naics_description.keyword") + POP_CITY_NAME = ("pop_city_name", "pop_city_name.keyword") + POP_COUNTRY_NAME = ("pop_country_name", "pop_country_name.keyword") + POP_STATE_CODE = ("pop_state_code", "pop_state_code") + PSC_CODE = ("product_or_service_code", "product_or_service_code.keyword") + PSC_DESCRIPTION = ("product_or_service_description", "product_or_service_description.keyword") + RECIPIENT_ID = ("recipient_id", "recipient_agg_key") + RECIPIENT_LOCATION_ADDRESS_LINE_1 = ("recipient_location_address_line1", "recipient_location_address_line1.keyword") + RECIPIENT_LOCATION_ADDRESS_LINE_2 = ("recipient_location_address_line2", "recipient_location_address_line2.keyword") + RECIPIENT_LOCATION_ADDRESS_LINE_3 = ("recipient_location_address_line3", "recipient_location_address_line3.keyword") + RECIPIENT_LOCATION_CITY_NAME = ("recipient_location_city_name", "recipient_location_city_name.keyword") + RECIPIENT_LOCATION_COUNTRY_NAME = ("recipient_location_country_name", "recipient_location_country_name.keyword") + RECIPIENT_LOCATION_STATE_CODE = ("recipient_location_state_code", "recipient_location_state_code") + RECIPIENT_NAME = ("Recipient Name", "recipient_name.keyword") + RECIPIENT_UEI = ("Recipient UEI", "recipient_uei.keyword") + SUBSIDY_COST = ("Subsidy Cost", "original_loan_subsidy_cost") + TRANSACTION_AMOUNT = ("Transaction Amount", "federal_action_obligation") + TRANSACTION_DESCRIPTION = ("Transaction Description", "transaction_description.keyword") + + def __new__(cls, field_name: str, full_path: str) -> "str": + obj = str.__new__(cls, field_name) + obj._value_ = field_name + short_path = full_path.split(".")[0] + obj._es_field = ElasticsearchField(field_name, full_path, short_path) + return obj + + @property + def field_name(self) -> str: + return self._es_field.field_name + + @property + def full_path(self) -> str: + return self._es_field.full_path + + @property + def short_path(self) -> str: + return self._es_field.short_path + base_mapping = { "Award ID": "display_award_id", @@ -145,8 +184,6 @@ }, } -TRANSACTIONS_SOURCE_LOOKUP = {key: value.replace(".keyword", "") for key, value in TRANSACTIONS_LOOKUP.items()} - CONTRACT_SOURCE_LOOKUP = {key: value.replace(".keyword", "") for key, value in contracts_mapping.items()} IDV_SOURCE_LOOKUP = {key: value.replace(".keyword", "") for key, value in idv_mapping.items()} NON_LOAN_ASST_SOURCE_LOOKUP = {key: value.replace(".keyword", "") for key, value in non_loan_assist_mapping.items()} diff --git a/usaspending_api/search/tests/integration/test_spending_by_transaction.py b/usaspending_api/search/tests/integration/test_spending_by_transaction.py index 6c9b482fb1..847b6160ee 100644 --- a/usaspending_api/search/tests/integration/test_spending_by_transaction.py +++ b/usaspending_api/search/tests/integration/test_spending_by_transaction.py @@ -23,6 +23,8 @@ def transaction_data(): recipient_location_zip5="abcde", piid="IND12PB00323", recipient_uei="testuei", + recipient_hash="1e5032cf-11df-a3bf-4240-6dda5f6d45ff", + recipient_levels=["C", "P", "R"], parent_uei="test_parent_uei", action_type="A", legal_entity_address_line1="test address line", @@ -270,6 +272,7 @@ def test_all_fields_returned(client, monkeypatch, transaction_data, elasticsearc "NAICS", "PSC", "Assistance Listing", + "recipient_id", ] request = { @@ -284,13 +287,64 @@ def test_all_fields_returned(client, monkeypatch, transaction_data, elasticsearc resp = client.post(ENDPOINT, content_type="application/json", data=json.dumps(request)) assert resp.status_code == status.HTTP_200_OK - assert len(resp.data["results"]) > 0 - for result in resp.data["results"]: - for field in fields: - assert field in result, f"Response item is missing field {field}" - - assert "Sausage" not in result - assert "A" not in result + assert len(resp.data["results"]) == 1 + assert resp.data["results"] == [ + { + "Action Date": "2010-10-01", + "Action Type": "A", + "Assistance Listing": {"cfda_number": "1234", "cfda_title": "cfda title 1"}, + "Award ID": "IND12PB00323", + "Award Type": None, + "Awarding Agency": None, + "Awarding Sub Agency": None, + "Funding Agency": None, + "Funding Sub Agency": None, + "Issued Date": None, + "Last Date to Order": None, + "Loan Value": None, + "Mod": None, + "NAICS": {"code": "naics code 1", "description": "naics description 1"}, + "PSC": {"code": "psc code 1", "description": "psc description 1"}, + "Primary Place of Performance": { + "city_name": "ARLINGTON", + "congressional_code": "popcongressionalcode", + "country_name": "UNITED STATES", + "county_code": "popcountycode", + "county_name": "popcountyname", + "location_country_code": "popcountrycode", + "state_code": "TX", + "state_name": "Texas", + "zip4": "popziplast4", + "zip5": "popzip5", + }, + "Recipient Location": { + "address_line1": "test address line", + "address_line2": "address2", + "address_line3": "address3", + "city_name": "ARLINGTON", + "congressional_code": "congressionalcode", + "country_name": "UNITED STATES", + "county_code": "001", + "county_name": "testcountyname", + "foreign_postal_code": "foreignpostalcode", + "foreign_province": "foreignprovince", + "location_country_code": "USA", + "state_code": "TX", + "state_name": "Texas", + "zip4": "6789", + "zip5": "abcde", + }, + "Recipient Name": None, + "Recipient UEI": "testuei", + "Subsidy Cost": None, + "Transaction Amount": None, + "Transaction Description": "test", + "awarding_agency_id": None, + "generated_internal_id": None, + "internal_id": 1, + "recipient_id": "1e5032cf-11df-a3bf-4240-6dda5f6d45ff-C", + } + ] @pytest.mark.django_db diff --git a/usaspending_api/search/tests/unit/test_elasticsearch_helpers.py b/usaspending_api/search/tests/unit/test_elasticsearch_helpers.py index abbb502fd4..68d41c426d 100644 --- a/usaspending_api/search/tests/unit/test_elasticsearch_helpers.py +++ b/usaspending_api/search/tests/unit/test_elasticsearch_helpers.py @@ -1,7 +1,4 @@ -from usaspending_api.search.v2.elasticsearch_helper import ( - es_minimal_sanitize, - swap_keys, -) +from usaspending_api.search.v2.elasticsearch_helper import es_minimal_sanitize from usaspending_api.search.v2.es_sanitization import es_sanitize @@ -21,19 +18,3 @@ def test_es_minimal_sanitize(): test_string = "!-^~/" processed_string = es_minimal_sanitize(test_string) assert processed_string == r"\!\-\^\~\/" - - -def test_swap_keys(): - test = { - "Recipient Name": "recipient_name", - "Action Date": "action_date", - "Transaction Amount": "federal_action_obligation", - } - - results = swap_keys(test) - - assert results == { - "recipient_name": "recipient_name", - "action_date": "action_date", - "federal_action_obligation": "federal_action_obligation", - } diff --git a/usaspending_api/search/v2/elasticsearch_helper.py b/usaspending_api/search/v2/elasticsearch_helper.py index 8b8f74c68f..6e0b7baee7 100644 --- a/usaspending_api/search/v2/elasticsearch_helper.py +++ b/usaspending_api/search/v2/elasticsearch_helper.py @@ -6,10 +6,7 @@ from elasticsearch_dsl import A from elasticsearch_dsl import Q as ES_Q -from usaspending_api.awards.v2.lookups.elasticsearch_lookups import ( - INDEX_ALIASES_TO_AWARD_TYPES, - TRANSACTIONS_SOURCE_LOOKUP, -) +from usaspending_api.awards.v2.lookups.elasticsearch_lookups import INDEX_ALIASES_TO_AWARD_TYPES from usaspending_api.common.data_classes import Pagination from usaspending_api.common.elasticsearch.search_wrappers import AwardSearch, Search, TransactionSearch from usaspending_api.common.query_with_filters import QueryWithFilters @@ -19,19 +16,6 @@ logger = logging.getLogger("console") DOWNLOAD_QUERY_SIZE = settings.MAX_DOWNLOAD_LIMIT -TRANSACTIONS_SOURCE_LOOKUP.update({v: k for k, v in TRANSACTIONS_SOURCE_LOOKUP.items()}) - - -def swap_keys(dictionary_): - return dict( - (TRANSACTIONS_SOURCE_LOOKUP.get(old_key, old_key), new_key) for (old_key, new_key) in dictionary_.items() - ) - - -def format_for_frontend(response): - """calls reverse key from TRANSACTIONS_LOOKUP""" - response = [result["_source"] for result in response] - return [swap_keys(result) for result in response] def get_total_results(keyword): diff --git a/usaspending_api/search/v2/urls.py b/usaspending_api/search/v2/urls.py index b3fe0b29f4..ceecbb5681 100644 --- a/usaspending_api/search/v2/urls.py +++ b/usaspending_api/search/v2/urls.py @@ -1,16 +1,18 @@ from django.urls import include, re_path -from usaspending_api.search.v2.views import search_elasticsearch as es from usaspending_api.search.v2.views.new_awards_over_time import NewAwardsOverTimeVisualizationViewSet from usaspending_api.search.v2.views.spending_by_award import SpendingByAwardVisualizationViewSet from usaspending_api.search.v2.views.spending_by_award_count import SpendingByAwardCountVisualizationViewSet from usaspending_api.search.v2.views.spending_by_category import SpendingByCategoryVisualizationViewSet from usaspending_api.search.v2.views.spending_by_geography import SpendingByGeographyVisualizationViewSet +from usaspending_api.search.v2.views.spending_by_transaction import SpendingByTransactionVisualizationViewSet +from usaspending_api.search.v2.views.spending_by_transaction_count import SpendingByTransactionCountVisualizationViewSet from usaspending_api.search.v2.views.spending_by_transaction_grouped import ( SpendingByTransactionGroupedVisualizationViewSet, ) from usaspending_api.search.v2.views.spending_over_time import SpendingOverTimeVisualizationViewSet from usaspending_api.search.v2.views.spending_by_subaward_grouped import SpendingBySubawardGroupedVisualizationViewSet +from usaspending_api.search.v2.views.transaction_spending_summary import TransactionSummaryVisualizationViewSet urlpatterns = [ re_path(r"^new_awards_over_time", NewAwardsOverTimeVisualizationViewSet.as_view()), @@ -20,9 +22,9 @@ re_path(r"^spending_by_category$", SpendingByCategoryVisualizationViewSet.as_view()), re_path(r"^spending_by_geography", SpendingByGeographyVisualizationViewSet.as_view()), re_path(r"^spending_by_subaward_grouped", SpendingBySubawardGroupedVisualizationViewSet.as_view()), - re_path(r"^spending_by_transaction_count", es.SpendingByTransactionCountVisualizationViewSet.as_view()), + re_path(r"^spending_by_transaction_count", SpendingByTransactionCountVisualizationViewSet.as_view()), re_path(r"^spending_by_transaction_grouped", SpendingByTransactionGroupedVisualizationViewSet.as_view()), - re_path(r"^spending_by_transaction", es.SpendingByTransactionVisualizationViewSet.as_view()), + re_path(r"^spending_by_transaction", SpendingByTransactionVisualizationViewSet.as_view()), re_path(r"^spending_over_time", SpendingOverTimeVisualizationViewSet.as_view()), - re_path(r"^transaction_spending_summary", es.TransactionSummaryVisualizationViewSet.as_view()), + re_path(r"^transaction_spending_summary", TransactionSummaryVisualizationViewSet.as_view()), ] diff --git a/usaspending_api/search/v2/views/search_elasticsearch.py b/usaspending_api/search/v2/views/search_elasticsearch.py deleted file mode 100644 index 65f67e29b1..0000000000 --- a/usaspending_api/search/v2/views/search_elasticsearch.py +++ /dev/null @@ -1,315 +0,0 @@ -import copy -import logging - -from django.conf import settings -from django.utils.text import slugify -from rest_framework.response import Response -from rest_framework.views import APIView - -from usaspending_api.common.api_versioning import api_transformations, API_TRANSFORM_FUNCTIONS -from usaspending_api.common.cache_decorator import cache_response -from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch -from usaspending_api.common.exceptions import ( - InvalidParameterException, - UnprocessableEntityException, -) -from usaspending_api.common.helpers.data_constants import state_name_from_code -from usaspending_api.common.helpers.generic_helper import get_simple_pagination_metadata, get_generic_filters_message -from usaspending_api.common.query_with_filters import QueryWithFilters -from usaspending_api.search.filters.elasticsearch.filter import QueryType -from usaspending_api.common.validator.award_filter import ( - AWARD_FILTER, - AWARD_FILTER_W_FILTERS, -) -from usaspending_api.common.validator.pagination import PAGINATION -from usaspending_api.common.validator.tinyshield import TinyShield -from usaspending_api.references.models import ToptierAgencyPublishedDABSView -from usaspending_api.search.v2.elasticsearch_helper import spending_by_transaction_count -from usaspending_api.search.v2.es_sanitization import es_minimal_sanitize -from usaspending_api.search.v2.elasticsearch_helper import spending_by_transaction_sum_and_count -from usaspending_api.awards.v2.lookups.elasticsearch_lookups import TRANSACTIONS_SOURCE_LOOKUP, TRANSACTIONS_LOOKUP - -logger = logging.getLogger(__name__) - -API_VERSION = settings.API_VERSION - - -@api_transformations(api_version=API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) -class SpendingByTransactionVisualizationViewSet(APIView): - """ - This route takes keyword search fields, and returns the fields of the searched term. - """ - - endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md" - - @cache_response() - def post(self, request): - program_activities_rule = [ - { - "name": "program_activities", - "type": "array", - "key": "filters|program_activities", - "array_type": "object", - "object_keys_min": 1, - "object_keys": { - "name": {"type": "text", "text_type": "search"}, - "code": { - "type": "integer", - }, - }, - } - ] - models = [ - { - "name": "fields", - "key": "fields", - "type": "array", - "array_type": "text", - "text_type": "search", - "optional": False, - } - ] - models.extend(copy.deepcopy(AWARD_FILTER_W_FILTERS)) - models.extend(copy.deepcopy(PAGINATION)) - models.extend(copy.deepcopy(program_activities_rule)) - self.models = models - for m in models: - if m["name"] in ("award_type_codes", "sort"): - m["optional"] = False - tiny_shield = TinyShield(models) - validated_payload = tiny_shield.block(request.data) - if "filters" in validated_payload and "program_activities" in validated_payload["filters"]: - tiny_shield.enforce_object_keys_min(validated_payload, program_activities_rule[0]) - - record_num = (validated_payload["page"] - 1) * validated_payload["limit"] - if record_num >= settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW: - raise UnprocessableEntityException( - "Page #{page} of size {limit} is over the maximum result limit ({es_limit}). Consider using custom data downloads to obtain large data sets.".format( - page=validated_payload["page"], - limit=validated_payload["limit"], - es_limit=settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW, - ) - ) - - payload_sort_key = validated_payload["sort"] - if payload_sort_key not in validated_payload["fields"]: - raise InvalidParameterException("Sort value not found in fields: {}".format(payload_sort_key)) - - permitted_sort_values = TRANSACTIONS_LOOKUP - if payload_sort_key not in TRANSACTIONS_LOOKUP and payload_sort_key not in [ - "Recipient Location", - "Primary Place of Performance", - "NAICS", - "PSC", - "Assistance Listing", - ]: - raise InvalidParameterException( - f"Sort value is not currently supported: {payload_sort_key}. Allowed values are: [{', '.join(permitted_sort_values.keys())}]" - ) - - if "filters" in validated_payload and "no intersection" in validated_payload["filters"]["award_type_codes"]: - # "Special case": there will never be results when the website provides this value - return Response( - { - "limit": validated_payload["limit"], - "results": [], - "page_metadata": { - "page": validated_payload["page"], - "next": None, - "previous": None, - "hasNext": False, - "hasPrevious": False, - }, - } - ) - match payload_sort_key: - case "Recipient Location": - sort_by_fields = [ - TRANSACTIONS_LOOKUP["recipient_location_city_name"], - TRANSACTIONS_LOOKUP["recipient_location_state_code"], - TRANSACTIONS_LOOKUP["recipient_location_country_name"], - TRANSACTIONS_LOOKUP["recipient_location_address_line1"], - TRANSACTIONS_LOOKUP["recipient_location_address_line2"], - TRANSACTIONS_LOOKUP["recipient_location_address_line3"], - ] - case "Primary Place of Performance": - sort_by_fields = [ - TRANSACTIONS_LOOKUP["pop_city_name"], - TRANSACTIONS_LOOKUP["pop_state_code"], - TRANSACTIONS_LOOKUP["pop_country_name"], - ] - case "NAICS": - sort_by_fields = [TRANSACTIONS_LOOKUP["naics_code"], TRANSACTIONS_LOOKUP["naics_description"]] - case "PSC": - sort_by_fields = [ - TRANSACTIONS_LOOKUP["product_or_service_code"], - TRANSACTIONS_LOOKUP["product_or_service_description"], - ] - case "Assistance Listing": - sort_by_fields = [TRANSACTIONS_LOOKUP["cfda_number"], TRANSACTIONS_LOOKUP["cfda_title"]] - case _: - sort_by_fields = [TRANSACTIONS_LOOKUP[payload_sort_key]] - sorts = [{field: validated_payload["order"] for field in sort_by_fields}] - - lower_limit = (validated_payload["page"] - 1) * validated_payload["limit"] - upper_limit = (validated_payload["page"]) * validated_payload["limit"] + 1 - if "keywords" in validated_payload["filters"]: - validated_payload["filters"]["keyword_search"] = [ - es_minimal_sanitize(x) for x in validated_payload["filters"]["keywords"] - ] - validated_payload["filters"].pop("keywords") - query_with_filters = QueryWithFilters(QueryType.TRANSACTIONS) - filter_query = query_with_filters.generate_elasticsearch_query(validated_payload["filters"]) - search = TransactionSearch().filter(filter_query).sort(*sorts)[lower_limit:upper_limit] - response = search.handle_execute() - return Response(self.build_elasticsearch_result(validated_payload, response)) - - def build_elasticsearch_result(self, request, response) -> dict: - results = [] - for res in response: - hit = res.to_dict() - # Parsing API response values from ES query result JSON - # We parse the `hit` (result from elasticsearch) to get the award type, use the type to determine - # which lookup dict to use, and then use that lookup to retrieve the correct value requested from `fields` - row = {} - for field in request["fields"]: - if field == "Assistance Listing": - row["Assistance Listing"] = { - "cfda_number": hit.get("cfda_number"), - "cfda_title": hit.get("cfda_title"), - } - - elif field == "Recipient Location": - row["Recipient Location"] = { - "location_country_code": hit.get("recipient_location_country_code"), - "country_name": hit.get("recipient_location_country_name"), - "state_code": hit.get("recipient_location_state_code"), - "state_name": state_name_from_code(hit.get("recipient_location_state_code")), - "city_name": hit.get("recipient_location_city_name"), - "county_code": hit.get("recipient_location_county_code"), - "county_name": hit.get("recipient_location_county_name"), - "address_line1": hit.get("legal_entity_address_line1"), - "address_line2": hit.get("legal_entity_address_line2"), - "address_line3": hit.get("legal_entity_address_line3"), - "congressional_code": hit.get("recipient_location_congressional_code"), - "zip4": hit.get("legal_entity_zip_last4"), - "zip5": hit.get("recipient_location_zip5"), - "foreign_postal_code": hit.get("legal_entity_foreign_posta"), - "foreign_province": hit.get("legal_entity_foreign_provi"), - } - - elif field == "Primary Place of Performance": - row["Primary Place of Performance"] = { - "location_country_code": hit.get("pop_country_code"), - "country_name": hit.get("pop_country_name"), - "state_code": hit.get("pop_state_code"), - "state_name": state_name_from_code(hit.get("pop_state_code")), - "city_name": hit.get("pop_city_name"), - "county_code": hit.get("pop_county_code"), - "county_name": hit.get("pop_county_name"), - "congressional_code": hit.get("pop_congressional_code"), - "zip4": hit.get("place_of_perform_zip_last4"), - "zip5": hit.get("pop_zip5"), - } - - elif field == "NAICS": - row["NAICS"] = { - "code": hit.get("naics_code"), - "description": hit.get("naics_description"), - } - - elif field == "PSC": - row["PSC"] = { - "code": hit.get("product_or_service_code"), - "description": hit.get("product_or_service_description"), - } - elif field == "awarding_agency_slug" or field == "funding_agency_slug": - row[field] = slugify(hit.get(TRANSACTIONS_SOURCE_LOOKUP[field])) - else: - row[field] = hit.get(TRANSACTIONS_SOURCE_LOOKUP[field]) - row["generated_internal_id"] = hit["generated_unique_award_id"] - row["internal_id"] = hit["award_id"] - - results.append(row) - - metadata = get_simple_pagination_metadata(len(response), request["limit"], request["page"]) - - return { - "limit": request["limit"], - "results": results[: request["limit"]], - "page_metadata": metadata, - "messages": get_generic_filters_message(request["filters"].keys(), [elem["name"] for elem in self.models]), - } - - def get_agency_slug(self, code): - code = str(code).zfill(3) - submission = ToptierAgencyPublishedDABSView.objects.filter(toptier_code=code).first() - if submission is None: - return None - return slugify(submission.name) - - -@api_transformations(api_version=API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) -class TransactionSummaryVisualizationViewSet(APIView): - """ - This route takes award filters, and returns the number of transactions and summation of federal action obligations. - """ - - endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/transaction_spending_summary.md" - - @cache_response() - def post(self, request): - """ - Returns a summary of transactions which match the award search filter - Desired values: - total number of transactions `award_count` - The federal_action_obligation sum of all those transactions `award_spending` - - *Note* Only deals with prime awards, future plans to include sub-awards. - """ - - models = [ - { - "name": "keywords", - "key": "filters|keywords", - "type": "array", - "array_type": "text", - "text_type": "search", - "optional": False, - "text_min": 3, - } - ] - validated_payload = TinyShield(models).block(request.data) - - results = spending_by_transaction_sum_and_count(validated_payload) - return Response({"results": results}) - - -@api_transformations(api_version=API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) -class SpendingByTransactionCountVisualizationViewSet(APIView): - """ - This route takes transaction search fields, and returns the transaction counts of the searched term. - """ - - endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction_count.md" - - @cache_response() - def post(self, request): - models = [] - models.extend(copy.deepcopy(AWARD_FILTER)) - for m in models: - if m["name"] == "keywords": - m["optional"] = True - elif m["name"] == "keyword": - m["optional"] = True - validated_payload = TinyShield(models).block(request.data) - if "keywords" in validated_payload["filters"]: - validated_payload["filters"]["keyword_search"] = [ - es_minimal_sanitize(x) for x in validated_payload["filters"]["keywords"] - ] - validated_payload["filters"].pop("keywords") - query_with_filters = QueryWithFilters(QueryType.TRANSACTIONS) - filter_query = query_with_filters.generate_elasticsearch_query(validated_payload["filters"]) - search = TransactionSearch().filter(filter_query) - results = spending_by_transaction_count(search) - return Response({"results": results}) diff --git a/usaspending_api/search/v2/views/spending_by_transaction.py b/usaspending_api/search/v2/views/spending_by_transaction.py new file mode 100644 index 0000000000..aa8906892c --- /dev/null +++ b/usaspending_api/search/v2/views/spending_by_transaction.py @@ -0,0 +1,256 @@ +import copy +import logging +import re +from enum import Enum + +from django.conf import settings +from django.utils.text import slugify +from rest_framework.response import Response +from rest_framework.views import APIView + +from usaspending_api.common.api_versioning import api_transformations, API_TRANSFORM_FUNCTIONS +from usaspending_api.common.cache_decorator import cache_response +from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch +from usaspending_api.common.exceptions import ( + InvalidParameterException, + UnprocessableEntityException, +) +from usaspending_api.common.helpers.data_constants import state_name_from_code +from usaspending_api.common.helpers.generic_helper import get_simple_pagination_metadata, get_generic_filters_message +from usaspending_api.common.query_with_filters import QueryWithFilters +from usaspending_api.search.filters.elasticsearch.filter import QueryType +from usaspending_api.common.validator.award_filter import AWARD_FILTER_W_FILTERS +from usaspending_api.common.validator.pagination import customize_pagination_with_sort_columns +from usaspending_api.common.validator.tinyshield import TinyShield +from usaspending_api.references.models import ToptierAgencyPublishedDABSView +from usaspending_api.search.v2.es_sanitization import es_minimal_sanitize +from usaspending_api.awards.v2.lookups.elasticsearch_lookups import TransactionField + +logger = logging.getLogger(__name__) + + +class DerivedField(str, Enum): + ASSISTANCE_LISTING = "Assistance Listing" + NAICS = "NAICS" + PRIMARY_PLACE_OF_PERFORMANCE = "Primary Place of Performance" + PSC = "PSC" + RECIPIENT_LOCATION = "Recipient Location" + + +@api_transformations(api_version=settings.API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) +class SpendingByTransactionVisualizationViewSet(APIView): + """ + This route takes keyword search fields, and returns the fields of the searched term. + """ + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction.md" + + @cache_response() + def post(self, request): + all_fields = [ + *[enum_val.value for enum_val in TransactionField], + *[enum_val.value for enum_val in DerivedField], + ] + program_activities_rule = [ + { + "name": "program_activities", + "type": "array", + "key": "filters|program_activities", + "array_type": "object", + "object_keys_min": 1, + "object_keys": { + "name": {"type": "text", "text_type": "search"}, + "code": { + "type": "integer", + }, + }, + } + ] + models = [ + { + "name": "fields", + "key": "fields", + "type": "array", + "array_type": "enum", + "enum_values": all_fields, + "optional": False, + } + ] + models.extend(copy.deepcopy(AWARD_FILTER_W_FILTERS)) + models.extend( + customize_pagination_with_sort_columns( + all_fields, default_sort_column=TransactionField.TRANSACTION_AMOUNT.value + ) + ) + models.extend(copy.deepcopy(program_activities_rule)) + self.models = models + for m in models: + if m["name"] in ("award_type_codes", "sort"): + m["optional"] = False + tiny_shield = TinyShield(models) + validated_payload = tiny_shield.block(request.data) + if "filters" in validated_payload and "program_activities" in validated_payload["filters"]: + tiny_shield.enforce_object_keys_min(validated_payload, program_activities_rule[0]) + + record_num = (validated_payload["page"] - 1) * validated_payload["limit"] + if record_num >= settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW: + raise UnprocessableEntityException( + "Page #{page} of size {limit} is over the maximum result limit ({es_limit}). Consider using custom data downloads to obtain large data sets.".format( + page=validated_payload["page"], + limit=validated_payload["limit"], + es_limit=settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW, + ) + ) + + payload_sort_key = validated_payload["sort"] + if payload_sort_key not in validated_payload["fields"]: + raise InvalidParameterException(f"Sort value not found in fields: {payload_sort_key}") + + if "filters" in validated_payload and "no intersection" in validated_payload["filters"]["award_type_codes"]: + # "Special case": there will never be results when the website provides this value + return Response( + { + "limit": validated_payload["limit"], + "results": [], + "page_metadata": { + "page": validated_payload["page"], + "next": None, + "previous": None, + "hasNext": False, + "hasPrevious": False, + }, + } + ) + match payload_sort_key: + case DerivedField.RECIPIENT_LOCATION: + sort_by_fields = [ + TransactionField.RECIPIENT_LOCATION_CITY_NAME.full_path, + TransactionField.RECIPIENT_LOCATION_STATE_CODE.full_path, + TransactionField.RECIPIENT_LOCATION_COUNTRY_NAME.full_path, + TransactionField.RECIPIENT_LOCATION_ADDRESS_LINE_1.full_path, + TransactionField.RECIPIENT_LOCATION_ADDRESS_LINE_2.full_path, + TransactionField.RECIPIENT_LOCATION_ADDRESS_LINE_3.full_path, + ] + case DerivedField.PRIMARY_PLACE_OF_PERFORMANCE: + sort_by_fields = [ + TransactionField.POP_CITY_NAME.full_path, + TransactionField.POP_STATE_CODE.full_path, + TransactionField.POP_COUNTRY_NAME.full_path, + ] + case DerivedField.NAICS: + sort_by_fields = [ + TransactionField.NAICS_CODE.full_path, + TransactionField.NAICS_DESCRIPTION.full_path, + ] + case DerivedField.PSC: + sort_by_fields = [ + TransactionField.PSC_CODE.full_path, + TransactionField.PSC_DESCRIPTION.full_path, + ] + case DerivedField.ASSISTANCE_LISTING: + sort_by_fields = [ + TransactionField.CFDA_NUMBER.full_path, + TransactionField.CFDA_TITLE.full_path, + ] + case _: + sort_by_fields = [TransactionField(payload_sort_key).full_path] + sorts = [{field: validated_payload["order"] for field in sort_by_fields}] + + lower_limit = (validated_payload["page"] - 1) * validated_payload["limit"] + upper_limit = (validated_payload["page"]) * validated_payload["limit"] + 1 + if "keywords" in validated_payload["filters"]: + validated_payload["filters"]["keyword_search"] = [ + es_minimal_sanitize(x) for x in validated_payload["filters"]["keywords"] + ] + validated_payload["filters"].pop("keywords") + query_with_filters = QueryWithFilters(QueryType.TRANSACTIONS) + filter_query = query_with_filters.generate_elasticsearch_query(validated_payload["filters"]) + search = TransactionSearch().filter(filter_query).sort(*sorts)[lower_limit:upper_limit] + response = search.handle_execute() + return Response(self.build_elasticsearch_result(validated_payload, response)) + + def build_elasticsearch_result(self, request, response) -> dict: + results = [] + for res in response: + hit = res.to_dict() + # Parsing API response values from ES query result JSON + # We parse the `hit` (result from elasticsearch) to get the award type, use the type to determine + # which lookup dict to use, and then use that lookup to retrieve the correct value requested from `fields` + row = {} + for field in request["fields"]: + match field: + case DerivedField.ASSISTANCE_LISTING: + row[DerivedField.ASSISTANCE_LISTING.value] = { + "cfda_number": hit.get("cfda_number"), + "cfda_title": hit.get("cfda_title"), + } + case DerivedField.RECIPIENT_LOCATION: + row[DerivedField.RECIPIENT_LOCATION.value] = { + "location_country_code": hit.get("recipient_location_country_code"), + "country_name": hit.get("recipient_location_country_name"), + "state_code": hit.get("recipient_location_state_code"), + "state_name": state_name_from_code(hit.get("recipient_location_state_code")), + "city_name": hit.get("recipient_location_city_name"), + "county_code": hit.get("recipient_location_county_code"), + "county_name": hit.get("recipient_location_county_name"), + "address_line1": hit.get("legal_entity_address_line1"), + "address_line2": hit.get("legal_entity_address_line2"), + "address_line3": hit.get("legal_entity_address_line3"), + "congressional_code": hit.get("recipient_location_congressional_code"), + "zip4": hit.get("legal_entity_zip_last4"), + "zip5": hit.get("recipient_location_zip5"), + "foreign_postal_code": hit.get("legal_entity_foreign_posta"), + "foreign_province": hit.get("legal_entity_foreign_provi"), + } + case DerivedField.PRIMARY_PLACE_OF_PERFORMANCE: + row[DerivedField.PRIMARY_PLACE_OF_PERFORMANCE.value] = { + "location_country_code": hit.get("pop_country_code"), + "country_name": hit.get("pop_country_name"), + "state_code": hit.get("pop_state_code"), + "state_name": state_name_from_code(hit.get("pop_state_code")), + "city_name": hit.get("pop_city_name"), + "county_code": hit.get("pop_county_code"), + "county_name": hit.get("pop_county_name"), + "congressional_code": hit.get("pop_congressional_code"), + "zip4": hit.get("place_of_perform_zip_last4"), + "zip5": hit.get("pop_zip5"), + } + case DerivedField.NAICS: + row[DerivedField.NAICS.value] = { + "code": hit.get("naics_code"), + "description": hit.get("naics_description"), + } + case DerivedField.PSC: + row[DerivedField.PSC.value] = { + "code": hit.get("product_or_service_code"), + "description": hit.get("product_or_service_description"), + } + case TransactionField.AWARDING_AGENCY_SLUG | TransactionField.FUNDING_AGENCY_SLUG: + row[field] = slugify(hit.get(TransactionField(field).short_path)) + case TransactionField.RECIPIENT_ID: + raw_value = hit.get(TransactionField.RECIPIENT_ID.short_path) + match_value = re.fullmatch(r"^(.*)/([CPR]{1})$", raw_value) + row[field] = f"{match_value[1]}-{match_value[2]}" if match_value else None + case _: + row[field] = hit.get(TransactionField(field).short_path) + + row["generated_internal_id"] = hit["generated_unique_award_id"] + row["internal_id"] = hit["award_id"] + + results.append(row) + + metadata = get_simple_pagination_metadata(len(response), request["limit"], request["page"]) + + return { + "limit": request["limit"], + "results": results[: request["limit"]], + "page_metadata": metadata, + "messages": get_generic_filters_message(request["filters"].keys(), [elem["name"] for elem in self.models]), + } + + def get_agency_slug(self, code): + code = str(code).zfill(3) + submission = ToptierAgencyPublishedDABSView.objects.filter(toptier_code=code).first() + if submission is None: + return None + return slugify(submission.name) diff --git a/usaspending_api/search/v2/views/spending_by_transaction_count.py b/usaspending_api/search/v2/views/spending_by_transaction_count.py new file mode 100644 index 0000000000..d46ca67639 --- /dev/null +++ b/usaspending_api/search/v2/views/spending_by_transaction_count.py @@ -0,0 +1,48 @@ +import copy +import logging + +from django.conf import settings +from rest_framework.response import Response +from rest_framework.views import APIView + +from usaspending_api.common.api_versioning import api_transformations, API_TRANSFORM_FUNCTIONS +from usaspending_api.common.cache_decorator import cache_response +from usaspending_api.common.elasticsearch.search_wrappers import TransactionSearch +from usaspending_api.common.query_with_filters import QueryWithFilters +from usaspending_api.search.filters.elasticsearch.filter import QueryType +from usaspending_api.common.validator.award_filter import AWARD_FILTER +from usaspending_api.common.validator.tinyshield import TinyShield +from usaspending_api.search.v2.elasticsearch_helper import spending_by_transaction_count +from usaspending_api.search.v2.es_sanitization import es_minimal_sanitize + +logger = logging.getLogger(__name__) + + +@api_transformations(api_version=settings.API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) +class SpendingByTransactionCountVisualizationViewSet(APIView): + """ + This route takes transaction search fields, and returns the transaction counts of the searched term. + """ + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/spending_by_transaction_count.md" + + @cache_response() + def post(self, request): + models = [] + models.extend(copy.deepcopy(AWARD_FILTER)) + for m in models: + if m["name"] == "keywords": + m["optional"] = True + elif m["name"] == "keyword": + m["optional"] = True + validated_payload = TinyShield(models).block(request.data) + if "keywords" in validated_payload["filters"]: + validated_payload["filters"]["keyword_search"] = [ + es_minimal_sanitize(x) for x in validated_payload["filters"]["keywords"] + ] + validated_payload["filters"].pop("keywords") + query_with_filters = QueryWithFilters(QueryType.TRANSACTIONS) + filter_query = query_with_filters.generate_elasticsearch_query(validated_payload["filters"]) + search = TransactionSearch().filter(filter_query) + results = spending_by_transaction_count(search) + return Response({"results": results}) diff --git a/usaspending_api/search/v2/views/transaction_spending_summary.py b/usaspending_api/search/v2/views/transaction_spending_summary.py new file mode 100644 index 0000000000..7f38311af5 --- /dev/null +++ b/usaspending_api/search/v2/views/transaction_spending_summary.py @@ -0,0 +1,48 @@ +import logging + +from django.conf import settings +from rest_framework.response import Response +from rest_framework.views import APIView + +from usaspending_api.common.api_versioning import api_transformations, API_TRANSFORM_FUNCTIONS +from usaspending_api.common.cache_decorator import cache_response +from usaspending_api.common.validator.tinyshield import TinyShield +from usaspending_api.search.v2.elasticsearch_helper import spending_by_transaction_sum_and_count + +logger = logging.getLogger(__name__) + + +@api_transformations(api_version=settings.API_VERSION, function_list=API_TRANSFORM_FUNCTIONS) +class TransactionSummaryVisualizationViewSet(APIView): + """ + This route takes award filters, and returns the number of transactions and summation of federal action obligations. + """ + + endpoint_doc = "usaspending_api/api_contracts/contracts/v2/search/transaction_spending_summary.md" + + @cache_response() + def post(self, request): + """ + Returns a summary of transactions which match the award search filter + Desired values: + total number of transactions `award_count` + The federal_action_obligation sum of all those transactions `award_spending` + + *Note* Only deals with prime awards, future plans to include sub-awards. + """ + + models = [ + { + "name": "keywords", + "key": "filters|keywords", + "type": "array", + "array_type": "text", + "text_type": "search", + "optional": False, + "text_min": 3, + } + ] + validated_payload = TinyShield(models).block(request.data) + + results = spending_by_transaction_sum_and_count(validated_payload) + return Response({"results": results})