diff --git a/caching/private_api/crawler/private_api_crawler.py b/caching/private_api/crawler/private_api_crawler.py index 4f945d3e3..6ba3c2346 100644 --- a/caching/private_api/crawler/private_api_crawler.py +++ b/caching/private_api/crawler/private_api_crawler.py @@ -113,8 +113,7 @@ def process_pages(self, *, pages: list[TopicPage, CommonPage]) -> None: self.process_all_sections_in_page(page=page) except AttributeError: logger.info( - "`%s` page has no dynamic content blocks. " - "So only the headless CMS API detail has been processed", + "`%s` page has no dynamic content blocks. So only the headless CMS API detail has been processed", page.title, ) logger.info("Completed %s / %s pages", index, pages_count) diff --git a/cms/dashboard/management/commands/build_cms_site.py b/cms/dashboard/management/commands/build_cms_site.py index e7311509c..db5dd8a40 100644 --- a/cms/dashboard/management/commands/build_cms_site.py +++ b/cms/dashboard/management/commands/build_cms_site.py @@ -70,12 +70,8 @@ def handle(self, *args, **options): build_cms_site_helpers.create_landing_page(parent_page=root_page) - build_cms_site_helpers.create_acknowledgement_page( - name="acknowledgement", parent_page=root_page - ) - build_cms_site_helpers.create_feedback_page( - name="feedback", parent_page=root_page - ) + build_cms_site_helpers.create_acknowledgement_page(name="acknowledgement", parent_page=root_page) + build_cms_site_helpers.create_feedback_page(name="feedback", parent_page=root_page) build_cms_site_helpers.create_menu_snippet() @classmethod @@ -93,12 +89,8 @@ def _build_weather_health_alerts_section(cls, root_page: UKHSARootPage) -> None: weather_health_alerts_page = build_cms_site_helpers.create_composite_page( name="weather_health_alerts", parent_page=root_page ) - build_cms_site_helpers.create_composite_page( - name="heat_health_alerts", parent_page=weather_health_alerts_page - ) - build_cms_site_helpers.create_composite_page( - name="cold_health_alerts", parent_page=weather_health_alerts_page - ) + build_cms_site_helpers.create_composite_page(name="heat_health_alerts", parent_page=weather_health_alerts_page) + build_cms_site_helpers.create_composite_page(name="cold_health_alerts", parent_page=weather_health_alerts_page) @classmethod def _build_access_our_data_section(cls, root_page: UKHSARootPage) -> None: @@ -113,33 +105,23 @@ def _build_access_our_data_section(cls, root_page: UKHSARootPage) -> None: name="access_our_data_data_structure", parent_page=access_our_data_parent_page, ) - build_cms_site_helpers.create_bulk_downloads_page( - name="bulk_downloads", parent_page=root_page - ) + build_cms_site_helpers.create_bulk_downloads_page(name="bulk_downloads", parent_page=root_page) @classmethod def _build_respiratory_viruses_section(cls, root_page: UKHSARootPage) -> None: - covid_19_page = build_cms_site_helpers.create_topic_page( - name="covid_19", parent_page=root_page - ) - influenza_page = build_cms_site_helpers.create_topic_page( - name="influenza", parent_page=root_page - ) + covid_19_page = build_cms_site_helpers.create_topic_page(name="covid_19", parent_page=root_page) + influenza_page = build_cms_site_helpers.create_topic_page(name="influenza", parent_page=root_page) other_respiratory_viruses_page = build_cms_site_helpers.create_topic_page( name="other_respiratory_viruses", parent_page=root_page ) # Because the index page links to these pages # they need to be created first, referenced and then moved under the index page - respiratory_viruses_index_page = ( - build_cms_site_helpers.create_respiratory_viruses_index_page( - name="respiratory-viruses", parent_page=root_page - ) + respiratory_viruses_index_page = build_cms_site_helpers.create_respiratory_viruses_index_page( + name="respiratory-viruses", parent_page=root_page ) - other_respiratory_viruses_page.move( - target=respiratory_viruses_index_page, pos="last-child" - ) + other_respiratory_viruses_page.move(target=respiratory_viruses_index_page, pos="last-child") influenza_page.move(target=respiratory_viruses_index_page, pos="last-child") covid_19_page.move(target=respiratory_viruses_index_page, pos="last-child") @@ -161,19 +143,11 @@ def _build_cover_section(cls, root_page: UKHSARootPage) -> None: def _build_common_pages(cls, root_page: UKHSARootPage) -> None: build_cms_site_helpers.create_common_page(name="start", parent_page=root_page) build_cms_site_helpers.create_common_page(name="about", parent_page=root_page) - build_cms_site_helpers.create_common_page( - name="location_based_data", parent_page=root_page - ) - build_cms_site_helpers.create_common_page( - name="whats_coming", parent_page=root_page - ) + build_cms_site_helpers.create_common_page(name="location_based_data", parent_page=root_page) + build_cms_site_helpers.create_common_page(name="whats_coming", parent_page=root_page) build_cms_site_helpers.create_common_page(name="cookies", parent_page=root_page) - build_cms_site_helpers.create_common_page( - name="accessibility_statement", parent_page=root_page - ) - build_cms_site_helpers.create_common_page( - name="compliance", parent_page=root_page - ) + build_cms_site_helpers.create_common_page(name="accessibility_statement", parent_page=root_page) + build_cms_site_helpers.create_common_page(name="compliance", parent_page=root_page) @staticmethod def _clear_cms() -> None: diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py b/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py index 0550ae1ec..f41d815da 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py @@ -4,9 +4,7 @@ def create_respiratory_viruses_index_page_body() -> list[dict]: covid_page = TopicPage.objects.get(slug="covid-19") influenza_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") return [ { @@ -45,9 +43,7 @@ def create_respiratory_viruses_index_page_body() -> list[dict]: def create_cover_index_page_body() -> list[dict]: - childhood_vaccinations_topic_page = TopicPage.objects.get( - slug="childhood-vaccinations" - ) + childhood_vaccinations_topic_page = TopicPage.objects.get(slug="childhood-vaccinations") return [ { "type": "text", diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py b/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py index ca310a706..f02058f78 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py @@ -6,9 +6,7 @@ def create_landing_page_body_wih_page_links() -> list[dict]: respiratory_viruses_index = CompositePage.objects.get(slug="respiratory-viruses") covid_page = TopicPage.objects.get(slug="covid-19") influenza_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") weather_health_alerts_page = CompositePage.objects.get(slug="weather-health-alerts") return [ diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/menu.py b/cms/dashboard/management/commands/build_cms_site_helpers/menu.py index 351c3c30f..a341f80d6 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/menu.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/menu.py @@ -19,9 +19,7 @@ def _create_menu_data() -> list[dict]: landing_page = LandingPage.objects.first() covid_page = TopicPage.objects.get(slug="covid-19") flu_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") childhood_vaccinations_index_page = CompositePage.objects.get(slug="cover") weather_health_alerts_page = CompositePage.objects.get(slug="weather-health-alerts") diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/pages.py b/cms/dashboard/management/commands/build_cms_site_helpers/pages.py index 1888ffa55..b4036c02d 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/pages.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/pages.py @@ -77,9 +77,7 @@ def create_landing_page(*, parent_page: Page) -> LandingPage: return page -def _create_index_page( - *, page_data: dict, parent_page: Page, create_index_page_body_func: Callable -) -> CompositePage: +def _create_index_page(*, page_data: dict, parent_page: Page, create_index_page_body_func: Callable) -> CompositePage: index_page_body: list[dict] = create_index_page_body_func() page = CompositePage( @@ -134,7 +132,7 @@ def create_topic_page(*, name: str, parent_page: Page) -> TopicPage: seo_title=data["meta"]["seo_title"], search_description=data["meta"]["search_description"], is_public=data["is_public"], - page_classification=data["page_classification"] + page_classification=data["page_classification"], ) _add_page_to_parent(page=page, parent_page=parent_page) @@ -198,9 +196,7 @@ def _get_or_create_button_id() -> int: return internal_button_snippet.id -def _add_download_button_to_composite_body( - *, body: dict[list[dict]] -) -> dict[list[dict]]: +def _add_download_button_to_composite_body(*, body: dict[list[dict]]) -> dict[list[dict]]: body.append( { "type": "internal_button", diff --git a/ingestion/aws_client.py b/ingestion/aws_client.py index b5febae62..7fad3ffc8 100644 --- a/ingestion/aws_client.py +++ b/ingestion/aws_client.py @@ -1,4 +1,5 @@ import datetime +import json import logging import boto3 @@ -100,6 +101,15 @@ def move_file_to_failed_folder(self, *, key: str) -> None: self._copy_file_to_failed(key=key) self._delete_file_from_inbound(key=key) + def upload_json_to_inbound(self, *, key: str, payload: dict) -> None: + """Uploads a JSON payload to the inbound folder in the ingest bucket.""" + self._client.put_object( + Bucket=self._bucket_name, + Key=key, + Body=json.dumps(payload).encode("utf-8"), + ContentType="application/json", + ) + def _copy_file_to_processed(self, *, key: str) -> None: """Copies the file matching the given `key` into the processed folder within the s3 bucket diff --git a/metrics/api/permissions/fluent_permissions.py b/metrics/api/permissions/fluent_permissions.py index cd528254c..215bac06a 100644 --- a/metrics/api/permissions/fluent_permissions.py +++ b/metrics/api/permissions/fluent_permissions.py @@ -12,7 +12,7 @@ def validate_permissions_for_non_public( metric: str, geography: str, geography_type: str, - rbac_permissions: Iterable[RBACPermission] + rbac_permissions: Iterable[RBACPermission], ) -> bool: """Compares the given data parameters to see if the `rbac_permissions` allow access to the non-public data diff --git a/metrics/api/serializers/charts/single_category_charts.py b/metrics/api/serializers/charts/single_category_charts.py index 46b9d958f..76a4896ee 100644 --- a/metrics/api/serializers/charts/single_category_charts.py +++ b/metrics/api/serializers/charts/single_category_charts.py @@ -72,7 +72,6 @@ def __init__(self, *args, **kwargs): class ChartsSerializer(BaseChartsSerializer): - plots = ChartPlotsListSerializer() def to_models(self, request: Request) -> ChartRequestParams: diff --git a/metrics/api/serializers/charts/subplot_charts.py b/metrics/api/serializers/charts/subplot_charts.py index 99475d04c..c1775c871 100644 --- a/metrics/api/serializers/charts/subplot_charts.py +++ b/metrics/api/serializers/charts/subplot_charts.py @@ -47,16 +47,10 @@ def validate(self, data): subplot_theme = data.get("theme") subplot_sub_theme = data.get("sub_theme") if not subplot_theme and not chart_theme: - msg = ( - "'theme' must be specified at either " - "subplot_parameters or chart_parameters level" - ) + msg = "'theme' must be specified at either subplot_parameters or chart_parameters level" raise serializers.ValidationError(msg) if not subplot_sub_theme and not chart_sub_theme: - msg = ( - "'sub_theme' must be specified at either " - "subplot_parameters or chart_parameters level" - ) + msg = "'sub_theme' must be specified at either subplot_parameters or chart_parameters level" raise serializers.ValidationError(msg) return data diff --git a/metrics/api/views/downloads/subplot_downloads/api_view.py b/metrics/api/views/downloads/subplot_downloads/api_view.py index 0e16e0980..6281530c5 100644 --- a/metrics/api/views/downloads/subplot_downloads/api_view.py +++ b/metrics/api/views/downloads/subplot_downloads/api_view.py @@ -106,7 +106,7 @@ def _write_headline_to_csv( metric_group: str, queryset: CoreTimeSeriesQuerySet | CoreHeadlineQuerySet, response: HttpResponse, - headers: list[str] | None + headers: list[str] | None, ) -> None: if DataSourceFileType[metric_group].is_headline: serializer = self._get_serializer_class( diff --git a/metrics/data/in_memory_models/geography_relationships/utla_to_region.py b/metrics/data/in_memory_models/geography_relationships/utla_to_region.py index 054aa8cf1..4696022cf 100644 --- a/metrics/data/in_memory_models/geography_relationships/utla_to_region.py +++ b/metrics/data/in_memory_models/geography_relationships/utla_to_region.py @@ -1,156 +1,156 @@ UTLA_TO_REGION_LOOKUP: dict[str, str] = { - "E06000001": "North East", # Hartlepool - "E06000002": "North East", # Middlesborough - "E06000003": "North East", # Redcar and Cleveland - "E06000004": "North East", # Stockton-on-Tees - "E06000005": "North East", # Darlington - "E06000006": "North West", # Halton - "E06000007": "North West", # Warrington - "E06000008": "North West", # Blackburn with Darwen - "E06000009": "North West", # Blackpool - "E06000010": "Yorkshire and The Humber", # Kingston upon Hull, City of - "E06000011": "Yorkshire and The Humber", # East Riding of Yorkshire - "E06000012": "Yorkshire and The Humber", # North East Lincolnshire - "E06000013": "Yorkshire and The Humber", # North Lincolnshire - "E06000014": "Yorkshire and The Humber", # York - "E06000015": "East Midlands", # Derby - "E06000016": "East Midlands", # Leicester - "E06000017": "East Midlands", # Rutland - "E06000018": "East Midlands", # Nottingham - "E06000019": "West Midlands", # Herefordshire, County of - "E06000020": "West Midlands", # Telford and Wrekin - "E06000021": "West Midlands", # Stoke-on-Trent - "E06000022": "South West", # Bath and North Somerset - "E06000023": "South West", # Bristol, City of - "E06000024": "South West", # North Somerset - "E06000025": "South West", # South Gloucestershire - "E06000026": "South West", # Plymouth - "E06000027": "South West", # Torbay - "E06000030": "South West", # Swindon - "E06000031": "East of England", # Peterborough - "E06000032": "East of England", # Luton - "E06000033": "East of England", # Southend-on-Sea - "E06000034": "East of England", # Thurrock - "E06000035": "South East", # Medway - "E06000036": "South East", # Bracknell Forest - "E06000037": "South East", # West Berkshire - "E06000038": "South East", # Reading - "E06000039": "South East", # Slough - "E06000040": "South East", # Windsor and Maidenhead - "E06000041": "South East", # Wokingham - "E06000042": "South East", # Milton Keynes - "E06000043": "South East", # Brighton and Hove - "E06000044": "South East", # Portsmouth - "E06000045": "South East", # Southampton - "E06000046": "South East", # Isle of Wight - "E06000047": "North East", # County Durham - "E06000049": "North West", # Cheshire East - "E06000050": "North West", # Cheshire West and Chester - "E06000051": "West Midlands", # Shropshire - "E06000052": "South West", # Cornwall - "E06000053": "South West", # Isles of Scilly - "E06000054": "South West", # Wiltshire - "E06000055": "East of England", # Bedford - "E06000056": "East of England", # Central Bedforshire - "E06000057": "North East", # Northumberland - "E06000058": "South West", # Bournemouth, Christchurch and Poole - "E06000059": "South West", # Dorset - "E06000060": "South East", # Buckinghamshire - "E06000061": "East Midlands", # North Northamptonshire - "E06000062": "East Midlands", # West Northamptonshire - "E06000063": "North West", # Cumberland - "E06000064": "North West", # Westmorland and Furness - "E10000023": "Yorkshire and The Humber", # North Yorkshire - "E10000027": "South West", # Somerset - "E08000001": "North West", # Bolton - "E08000002": "North West", # Bury - "E08000003": "North West", # Manchester - "E08000004": "North West", # Oldham - "E08000005": "North West", # Rochdale - "E08000006": "North West", # Salford - "E08000007": "North West", # Stockport - "E08000008": "North West", # Tameside - "E08000009": "North West", # Trafford - "E08000010": "North West", # Wigan - "E08000011": "North West", # Knowsley - "E08000012": "North West", # Liverpool - "E08000013": "North West", # St. Helens - "E08000014": "North West", # Sefton - "E08000015": "North West", # Wirral - "E08000016": "Yorkshire and The Humber", # Barnsley - "E08000017": "Yorkshire and The Humber", # Doncaster - "E08000018": "Yorkshire and The Humber", # Rotherham - "E08000019": "Yorkshire and The Humber", # Sheffield - "E08000021": "North East", # Newcastle upon Tyne - "E08000022": "North East", # North Tyneside - "E08000023": "North East", # South Tyneside - "E08000024": "North East", # Sunderland - "E08000025": "West Midlands", # Birmingham - "E08000026": "West Midlands", # Coventry - "E08000027": "West Midlands", # Dudley - "E08000028": "West Midlands", # Sandwell - "E08000029": "West Midlands", # Solihull - "E08000030": "West Midlands", # Walsall - "E08000031": "West Midlands", # Wolverhampton - "E08000032": "Yorkshire and The Humber", # Bradford - "E08000033": "Yorkshire and The Humber", # Calderdale - "E08000034": "Yorkshire and The Humber", # Kirklees - "E08000035": "Yorkshire and The Humber", # Leeds - "E08000036": "Yorkshire and The Humber", # Wakefield - "E08000037": "North East", # Gateshead - "E09000001": "London", # City of London - "E09000002": "London", # Barking and Dagenham - "E09000003": "London", # Barnet - "E09000004": "London", # Bexley - "E09000005": "London", # Brent - "E09000006": "London", # Bromley - "E09000007": "London", # Camden - "E09000008": "London", # Croydon - "E09000009": "London", # Ealing - "E09000010": "London", # Enfield - "E09000011": "London", # Greenwich - "E09000012": "London", # Hackney - "E09000013": "London", # Hammersmith and Fulham - "E09000014": "London", # Haringey - "E09000015": "London", # Harrow - "E09000016": "London", # Havering - "E09000017": "London", # Hillingdon - "E09000018": "London", # Hounslow - "E09000019": "London", # Islington - "E09000020": "London", # Kensington and Chelsea - "E09000021": "London", # Kingston upon Thames - "E09000022": "London", # Lambeth - "E09000023": "London", # Lewisham - "E09000024": "London", # Merton - "E09000025": "London", # Newham - "E09000026": "London", # Redbridge - "E09000027": "London", # Richmond upon Thames - "E09000028": "London", # Southwark - "E09000029": "London", # Sutton - "E09000030": "London", # Tower Hamlets - "E09000031": "London", # Waltham Forest - "E09000032": "London", # Wandsworth - "E09000033": "London", # Westminster - "E10000003": "East of England", # Cambridgeshire - "E10000006": "North West", # Cumbria - "E10000007": "East Midlands", # Derbyshire - "E10000008": "South West", # Devon - "E10000011": "South East", # East Sussex - "E10000012": "East of England", # Essex - "E10000013": "South West", # Gloucestershire - "E10000014": "South East", # Hampshire - "E10000015": "East of England", # Hertfordshire - "E10000016": "South East", # Kent - "E10000017": "North West", # Lancashire - "E10000018": "East Midlands", # Leicestershire - "E10000019": "East Midlands", # Lincolnshire - "E10000020": "East of England", # Norfolk - "E10000024": "East Midlands", # Nottinghamshire - "E10000025": "South East", # Oxfordshire - "E10000028": "West Midlands", # Staffordshire - "E10000029": "East of England", # Suffolk - "E10000030": "South East", # Surrey - "E10000031": "West Midlands", # Warwickshire - "E10000032": "South East", # West Sussex - "E10000034": "West Midlands", # Worcestershire + "E06000001": "North East", # Hartlepool + "E06000002": "North East", # Middlesborough + "E06000003": "North East", # Redcar and Cleveland + "E06000004": "North East", # Stockton-on-Tees + "E06000005": "North East", # Darlington + "E06000006": "North West", # Halton + "E06000007": "North West", # Warrington + "E06000008": "North West", # Blackburn with Darwen + "E06000009": "North West", # Blackpool + "E06000010": "Yorkshire and The Humber", # Kingston upon Hull, City of + "E06000011": "Yorkshire and The Humber", # East Riding of Yorkshire + "E06000012": "Yorkshire and The Humber", # North East Lincolnshire + "E06000013": "Yorkshire and The Humber", # North Lincolnshire + "E06000014": "Yorkshire and The Humber", # York + "E06000015": "East Midlands", # Derby + "E06000016": "East Midlands", # Leicester + "E06000017": "East Midlands", # Rutland + "E06000018": "East Midlands", # Nottingham + "E06000019": "West Midlands", # Herefordshire, County of + "E06000020": "West Midlands", # Telford and Wrekin + "E06000021": "West Midlands", # Stoke-on-Trent + "E06000022": "South West", # Bath and North Somerset + "E06000023": "South West", # Bristol, City of + "E06000024": "South West", # North Somerset + "E06000025": "South West", # South Gloucestershire + "E06000026": "South West", # Plymouth + "E06000027": "South West", # Torbay + "E06000030": "South West", # Swindon + "E06000031": "East of England", # Peterborough + "E06000032": "East of England", # Luton + "E06000033": "East of England", # Southend-on-Sea + "E06000034": "East of England", # Thurrock + "E06000035": "South East", # Medway + "E06000036": "South East", # Bracknell Forest + "E06000037": "South East", # West Berkshire + "E06000038": "South East", # Reading + "E06000039": "South East", # Slough + "E06000040": "South East", # Windsor and Maidenhead + "E06000041": "South East", # Wokingham + "E06000042": "South East", # Milton Keynes + "E06000043": "South East", # Brighton and Hove + "E06000044": "South East", # Portsmouth + "E06000045": "South East", # Southampton + "E06000046": "South East", # Isle of Wight + "E06000047": "North East", # County Durham + "E06000049": "North West", # Cheshire East + "E06000050": "North West", # Cheshire West and Chester + "E06000051": "West Midlands", # Shropshire + "E06000052": "South West", # Cornwall + "E06000053": "South West", # Isles of Scilly + "E06000054": "South West", # Wiltshire + "E06000055": "East of England", # Bedford + "E06000056": "East of England", # Central Bedforshire + "E06000057": "North East", # Northumberland + "E06000058": "South West", # Bournemouth, Christchurch and Poole + "E06000059": "South West", # Dorset + "E06000060": "South East", # Buckinghamshire + "E06000061": "East Midlands", # North Northamptonshire + "E06000062": "East Midlands", # West Northamptonshire + "E06000063": "North West", # Cumberland + "E06000064": "North West", # Westmorland and Furness + "E10000023": "Yorkshire and The Humber", # North Yorkshire + "E10000027": "South West", # Somerset + "E08000001": "North West", # Bolton + "E08000002": "North West", # Bury + "E08000003": "North West", # Manchester + "E08000004": "North West", # Oldham + "E08000005": "North West", # Rochdale + "E08000006": "North West", # Salford + "E08000007": "North West", # Stockport + "E08000008": "North West", # Tameside + "E08000009": "North West", # Trafford + "E08000010": "North West", # Wigan + "E08000011": "North West", # Knowsley + "E08000012": "North West", # Liverpool + "E08000013": "North West", # St. Helens + "E08000014": "North West", # Sefton + "E08000015": "North West", # Wirral + "E08000016": "Yorkshire and The Humber", # Barnsley + "E08000017": "Yorkshire and The Humber", # Doncaster + "E08000018": "Yorkshire and The Humber", # Rotherham + "E08000019": "Yorkshire and The Humber", # Sheffield + "E08000021": "North East", # Newcastle upon Tyne + "E08000022": "North East", # North Tyneside + "E08000023": "North East", # South Tyneside + "E08000024": "North East", # Sunderland + "E08000025": "West Midlands", # Birmingham + "E08000026": "West Midlands", # Coventry + "E08000027": "West Midlands", # Dudley + "E08000028": "West Midlands", # Sandwell + "E08000029": "West Midlands", # Solihull + "E08000030": "West Midlands", # Walsall + "E08000031": "West Midlands", # Wolverhampton + "E08000032": "Yorkshire and The Humber", # Bradford + "E08000033": "Yorkshire and The Humber", # Calderdale + "E08000034": "Yorkshire and The Humber", # Kirklees + "E08000035": "Yorkshire and The Humber", # Leeds + "E08000036": "Yorkshire and The Humber", # Wakefield + "E08000037": "North East", # Gateshead + "E09000001": "London", # City of London + "E09000002": "London", # Barking and Dagenham + "E09000003": "London", # Barnet + "E09000004": "London", # Bexley + "E09000005": "London", # Brent + "E09000006": "London", # Bromley + "E09000007": "London", # Camden + "E09000008": "London", # Croydon + "E09000009": "London", # Ealing + "E09000010": "London", # Enfield + "E09000011": "London", # Greenwich + "E09000012": "London", # Hackney + "E09000013": "London", # Hammersmith and Fulham + "E09000014": "London", # Haringey + "E09000015": "London", # Harrow + "E09000016": "London", # Havering + "E09000017": "London", # Hillingdon + "E09000018": "London", # Hounslow + "E09000019": "London", # Islington + "E09000020": "London", # Kensington and Chelsea + "E09000021": "London", # Kingston upon Thames + "E09000022": "London", # Lambeth + "E09000023": "London", # Lewisham + "E09000024": "London", # Merton + "E09000025": "London", # Newham + "E09000026": "London", # Redbridge + "E09000027": "London", # Richmond upon Thames + "E09000028": "London", # Southwark + "E09000029": "London", # Sutton + "E09000030": "London", # Tower Hamlets + "E09000031": "London", # Waltham Forest + "E09000032": "London", # Wandsworth + "E09000033": "London", # Westminster + "E10000003": "East of England", # Cambridgeshire + "E10000006": "North West", # Cumbria + "E10000007": "East Midlands", # Derbyshire + "E10000008": "South West", # Devon + "E10000011": "South East", # East Sussex + "E10000012": "East of England", # Essex + "E10000013": "South West", # Gloucestershire + "E10000014": "South East", # Hampshire + "E10000015": "East of England", # Hertfordshire + "E10000016": "South East", # Kent + "E10000017": "North West", # Lancashire + "E10000018": "East Midlands", # Leicestershire + "E10000019": "East Midlands", # Lincolnshire + "E10000020": "East of England", # Norfolk + "E10000024": "East Midlands", # Nottinghamshire + "E10000025": "South East", # Oxfordshire + "E10000028": "West Midlands", # Staffordshire + "E10000029": "East of England", # Suffolk + "E10000030": "South East", # Surrey + "E10000031": "West Midlands", # Warwickshire + "E10000032": "South East", # West Sussex + "E10000034": "West Midlands", # Worcestershire } diff --git a/metrics/data/models/rbac_models/rbac_group_permissions.py b/metrics/data/models/rbac_models/rbac_group_permissions.py index e1008fe5d..bf7e63594 100644 --- a/metrics/data/models/rbac_models/rbac_group_permissions.py +++ b/metrics/data/models/rbac_models/rbac_group_permissions.py @@ -7,7 +7,6 @@ class RBACGroupPermission(models.Model): - class Meta: db_table = "rbac_group_permissions" diff --git a/metrics/domain/charts/chart_settings/subplot_chart_settings.py b/metrics/domain/charts/chart_settings/subplot_chart_settings.py index f24441792..c32610fa5 100644 --- a/metrics/domain/charts/chart_settings/subplot_chart_settings.py +++ b/metrics/domain/charts/chart_settings/subplot_chart_settings.py @@ -36,7 +36,6 @@ class SubplotChartSettings(ChartSettings): - def __init__(self, *, chart_generation_payload: SubplotChartGenerationPayload): super().__init__(chart_generation_payload=chart_generation_payload) self.subplot_data: SubplotGenerationData = chart_generation_payload.subplot_data diff --git a/metrics/domain/charts/utils.py b/metrics/domain/charts/utils.py index c246076f8..cdbea3bef 100644 --- a/metrics/domain/charts/utils.py +++ b/metrics/domain/charts/utils.py @@ -22,7 +22,6 @@ def convert_large_numbers_to_short_text(number: int) -> str: Eg: 1000 = 1k, 2500 = 2k, 2690 = 3k, 100,000,000 = 1m """ if number >= E_NOTATION[1]: - for index in range(len(E_NOTATION)): try: if E_NOTATION[index] <= number < E_NOTATION[index + 1]: diff --git a/metrics/domain/models/plots_text.py b/metrics/domain/models/plots_text.py index 94b9f8fb1..ddf01e88d 100644 --- a/metrics/domain/models/plots_text.py +++ b/metrics/domain/models/plots_text.py @@ -323,10 +323,7 @@ def _build_description_for_section_of_data( if end_value > start_value: part_description = f"It rose from {start_value} on {start_date} to {end_value} on {end_date}. " elif end_value == start_value: - part_description = ( - f"The date fluctuates between {start_value} on {start_date}, " - f"ending with the same value on {end_date}. " - ) + part_description = f"The date fluctuates between {start_value} on {start_date}, ending with the same value on {end_date}. " else: part_description = f"It fell from {start_value} on {start_date} to {end_value} on {end_date}. " diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py new file mode 100644 index 000000000..fe537d84d --- /dev/null +++ b/metrics/interfaces/management/commands/seed_random.py @@ -0,0 +1,901 @@ +import random +import re +import time +from collections.abc import Callable, Iterable +from datetime import date, datetime, timedelta +from decimal import Decimal +from operator import itemgetter +from typing import TypeVar, cast, override + +from django.core.management import CommandParser, call_command +from django.core.management.base import BaseCommand +from django.db import transaction +from django.db.models import Model + +from ingestion.aws_client import AWSClient +from metrics.data.enums import TimePeriod +from metrics.data.models.api_models import APITimeSeries +from metrics.data.models.core_models.supporting import ( + Age, + Geography, + GeographyType, + Metric, + Stratum, + SubTheme, + Theme, + Topic, +) +from metrics.data.models.core_models.timeseries import CoreTimeSeries +from validation import enums as validation_enums +from validation.geography_code import ( + NATION_GEOGRAPHY_CODES, + UNITED_KINGDOM_GEOGRAPHY_CODE, +) + +SCALE_CONFIGS = { + # Approximate time-series row counts generated by scale: + # small ~1,500, medium ~180,000, large ~7,300,000. + "small": {"geographies": 5, "metrics": 10, "days": 30}, + "medium": {"geographies": 20, "metrics": 50, "days": 180}, + "large": {"geographies": 100, "metrics": 200, "days": 365}, +} +SEED_RANDOM_SEX_OPTIONS = ("all", "f", "m") +TModel = TypeVar("TModel", bound=Model) + + +class Command(BaseCommand): + @override + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--dataset", + choices=["cms", "metrics", "both"], + default="both", + help="Which dataset to seed: CMS, metrics, or both.", + ) + parser.add_argument( + "--scale", + choices=["small", "medium", "large"], + default="small", + help="Size of the random metrics dataset to generate.", + ) + parser.add_argument( + "--seed", + type=int, + required=False, + default=None, + help="Optional random seed for reproducible metric values.", + ) + parser.add_argument( + "--truncate-first", + action="store_true", + default=False, + help="Clear existing metrics tables before seeding to avoid duplicates.", + ) + parser.add_argument( + "--delivery", + choices=["db", "s3"], + default="db", + help="Delivery mode for metrics dataset: database insert or s3 ingestion files.", + ) + parser.add_argument( + "--non-public", + action="store_true", + default=False, + help="Mark generated metric points as non-public (`is_public=False`).", + ) + + def handle(self, *args, **options) -> None: + started_at = time.perf_counter() + dataset: str = options["dataset"] + scale: str = options["scale"] + truncate_first: bool = options["truncate_first"] + delivery: str = options["delivery"] + is_public: bool = not options["non_public"] + + selected_seed = ( + options["seed"] if options["seed"] is not None else int(time.time()) + ) + random.seed(selected_seed) # noqa: S311 # nosec B311 + self.stdout.write(f"Seed used: {selected_seed}") + + should_seed_cms = dataset in {"cms", "both"} + should_seed_metrics = dataset in {"metrics", "both"} + + counts: dict[str, int] = { + "Theme": 0, + "SubTheme": 0, + "Topic": 0, + "Metric": 0, + "Geography": 0, + "CoreTimeSeries": 0, + "APITimeSeries": 0, + } + + if should_seed_metrics: + scale_config = SCALE_CONFIGS[scale] + self.stderr.write("Seeding metrics dataset...") + if delivery == "s3": + counts = self._seed_metrics_data_to_s3( + scale_config=scale_config, + is_public=is_public, + progress_callback=self.stderr.write, + ) + else: + counts = self._seed_metrics_data( + scale_config=scale_config, + truncate_first=truncate_first, + is_public=is_public, + progress_callback=self.stderr.write, + ) + self.stderr.write("Metrics dataset seeding complete.") + + if should_seed_cms: + self.stderr.write("Building CMS site data...") + call_command("build_cms_site") + self.stderr.write("CMS site build complete.") + + runtime_seconds = time.perf_counter() - started_at + self._print_summary( + dataset=dataset, + scale=scale, + seed=selected_seed, + counts=counts, + runtime_seconds=runtime_seconds, + ) + + @classmethod + def _seed_metrics_data( + cls, + *, + scale_config: dict[str, int], + truncate_first: bool, + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> dict[str, int]: + """Seed supporting metric models and time series rows for the selected scale. + + Args: + scale_config: Scale-specific object counts for generated records. + truncate_first: Whether to clear existing metrics-related tables before seeding. + is_public: Whether generated metric rows should be marked as public. + progress_callback: Optional callback used to report progress updates. + + Returns: + Count of created records keyed by model or dataset name. + """ + if progress_callback is not None: + progress_callback("Preparing metric taxonomy and geography records...") + + with transaction.atomic(): + if truncate_first: + cls._truncate_metrics_data() + + themes, sub_themes, topics = cls._seed_theme_hierarchy() + metric_start_index = cls._get_next_random_metric_index() + + metrics = cls._bulk_create( + Metric, + [ + Metric( + name=f"Random Metric {metric_start_index + index}", + topic=topics[index % len(topics)], + ) + for index in range(scale_config["metrics"]) + ], + ) + + geographies = cls._seed_geographies(count=scale_config["geographies"]) + + stratum_record, _ = Stratum.objects.get_or_create(name="All") + age_record, _ = Age.objects.get_or_create(name="All ages") + stratum = cast(Stratum, stratum_record) + age = cast(Age, age_record) + + if progress_callback is not None: + progress_callback("Generating Core/API time series rows...") + core_count, api_count = cls._seed_time_series_rows( + metrics=metrics, + geographies=geographies, + stratum=stratum, + age=age, + days=scale_config["days"], + is_public=is_public, + progress_callback=progress_callback, + ) + + return { + "Theme": len(themes), + "SubTheme": len(sub_themes), + "Topic": len(topics), + "Metric": len(metrics), + "Geography": len(geographies), + "CoreTimeSeries": core_count, + "APITimeSeries": api_count, + } + + @classmethod + def _seed_metrics_data_to_s3( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> dict[str, int]: + if progress_callback is not None: + progress_callback("Generating ingestion payloads for S3 upload...") + + payloads = cls._build_timeseries_ingestion_payloads( + scale_config=scale_config, + is_public=is_public, + ) + client = AWSClient() + uploaded_files = 0 + for payload_index, payload in enumerate(payloads, start=1): + key = cls._build_s3_object_key(payload=payload, payload_index=payload_index) + client.upload_json_to_inbound(key=key, payload=payload) + uploaded_files += 1 + + if progress_callback is not None: + progress_callback( + f"Uploaded {uploaded_files:,} files to ingest bucket in/." + ) + + topic_rows = cls._build_theme_hierarchy_records()[2] + theme_count = len({theme_name for _, _, theme_name in topic_rows}) + sub_theme_count = len( + { + (sub_theme_name, theme_name) + for _, sub_theme_name, theme_name in topic_rows + } + ) + topic_count = len( + { + (topic_name, sub_theme_name, theme_name) + for topic_name, sub_theme_name, theme_name in topic_rows + } + ) + geography_count = len( + cls._build_geography_seed_values(count=scale_config["geographies"]) + ) + row_count = scale_config["metrics"] * geography_count * scale_config["days"] + + return { + "Theme": theme_count, + "SubTheme": sub_theme_count, + "Topic": topic_count, + "Metric": scale_config["metrics"], + "Geography": geography_count, + "CoreTimeSeries": row_count, + "APITimeSeries": row_count, + } + + @classmethod + def _truncate_metrics_data(cls) -> None: + """Delete all seeded metrics-related rows in dependency-safe order.""" + APITimeSeries.objects.all().delete() + CoreTimeSeries.objects.all().delete() + Metric.objects.all().delete() + Topic.objects.all().delete() + SubTheme.objects.all().delete() + Theme.objects.all().delete() + Geography.objects.all().delete() + GeographyType.objects.all().delete() + Age.objects.all().delete() + Stratum.objects.all().delete() + + @classmethod + def _seed_time_series_rows( + cls, + *, + metrics: list[Metric], + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> tuple[int, int]: + frequency = TimePeriod.Weekly.value + start_date = date.today() - timedelta(days=days - 1) + batch_size = 5000 + core_rows: list[CoreTimeSeries] = [] + api_rows: list[APITimeSeries] = [] + core_count = 0 + api_count = 0 + total_metrics = len(metrics) + total_row_count = total_metrics * len(geographies) * days + log_interval = max(1, total_metrics // 10) if total_metrics else 1 + + for metric_index, metric in enumerate(metrics, start=1): + for core_row, api_row in cls._build_time_series_rows_for_metric( + metric=metric, + geographies=geographies, + stratum=stratum, + age=age, + days=days, + is_public=is_public, + start_date=start_date, + frequency=frequency, + ): + core_rows.append(core_row) + core_rows, core_count = cls._flush_batch( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) + + api_rows.append(api_row) + api_rows, api_count = cls._flush_batch( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) + + if progress_callback is not None and ( + metric_index == total_metrics or metric_index % log_interval == 0 + ): + processed_row_count = metric_index * len(geographies) * days + progress_callback( + f"Processed {metric_index}/{total_metrics} metrics " + f"({processed_row_count:,}/{total_row_count:,} row groups)." + ) + + core_count = cls._flush_remaining( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) + api_count = cls._flush_remaining( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) + + if progress_callback is not None: + progress_callback( + f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows." + ) + + return core_count, api_count + + @staticmethod + def _flush_batch( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> tuple[list[TModel], int]: + if len(rows) < batch_size: + return rows, current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return [], current_count + len(rows) + + @staticmethod + def _flush_remaining( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> int: + if not rows: + return current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return current_count + len(rows) + + @classmethod + def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: + theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() + themes, themes_by_name = cls._upsert_themes(theme_names=theme_names) + sub_themes, sub_themes_by_key = cls._upsert_sub_themes( + theme_names=theme_names, + sub_theme_rows=sub_theme_rows, + themes_by_name=themes_by_name, + ) + topics = cls._upsert_topics( + topic_rows=topic_rows, + sub_themes_by_key=sub_themes_by_key, + ) + return themes, sub_themes, topics + + @classmethod + def _upsert_themes( + cls, + *, + theme_names: list[str], + ) -> tuple[list[Theme], dict[str, Theme]]: + themes_by_name = { + theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) + } + missing_theme_names = [ + name for name in theme_names if name not in themes_by_name + ] + if missing_theme_names: + cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) + themes_by_name.update( + { + theme.name: theme + for theme in Theme.objects.filter(name__in=missing_theme_names) + } + ) + return [themes_by_name[name] for name in theme_names], themes_by_name + + @classmethod + def _upsert_sub_themes( + cls, + *, + theme_names: list[str], + sub_theme_rows: list[tuple[str, str]], + themes_by_name: dict[str, Theme], + ) -> tuple[list[SubTheme], dict[tuple[str, str], SubTheme]]: + sub_theme_keys = list(dict.fromkeys(sub_theme_rows)) + existing_sub_themes = SubTheme.objects.select_related("theme").filter( + theme__name__in=theme_names, + name__in={name for name, _ in sub_theme_keys}, + ) + sub_themes_by_key = { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in existing_sub_themes + } + missing_sub_theme_keys = [ + (sub_theme_name, theme_name) + for sub_theme_name, theme_name in sub_theme_keys + if (sub_theme_name, theme_name) not in sub_themes_by_key + ] + if missing_sub_theme_keys: + cls._bulk_create( + SubTheme, + [ + SubTheme(name=sub_theme_name, theme=themes_by_name[theme_name]) + for sub_theme_name, theme_name in missing_sub_theme_keys + ], + ) + sub_themes_by_key.update( + { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in SubTheme.objects.select_related("theme").filter( + theme__name__in={ + theme_name for _, theme_name in missing_sub_theme_keys + }, + name__in={ + sub_theme_name + for sub_theme_name, _ in missing_sub_theme_keys + }, + ) + } + ) + return [sub_themes_by_key[key] for key in sub_theme_keys], sub_themes_by_key + + @classmethod + def _upsert_topics( + cls, + *, + topic_rows: list[tuple[str, str, str]], + sub_themes_by_key: dict[tuple[str, str], SubTheme], + ) -> list[Topic]: + topic_keys = list(dict.fromkeys(topic_rows)) + sub_themes_by_id_key = { + (sub_theme_name, theme_name): sub_themes_by_key[ + (sub_theme_name, theme_name) + ] + for _, sub_theme_name, theme_name in topic_keys + } + candidate_sub_theme_ids = [ + sub_theme.id for sub_theme in sub_themes_by_id_key.values() + ] + existing_topics = Topic.objects.filter( + sub_theme_id__in=candidate_sub_theme_ids, + name__in={topic_name for topic_name, _, _ in topic_keys}, + ) + topics_by_key = { + (topic.name, topic.sub_theme_id): topic for topic in existing_topics + } + missing_topic_keys = [ + topic_key + for topic_key in topic_keys + if ( + topic_key[0], + sub_themes_by_id_key[(topic_key[1], topic_key[2])].id, + ) + not in topics_by_key + ] + if missing_topic_keys: + cls._bulk_create( + Topic, + [ + Topic( + name=topic_name, + sub_theme=sub_themes_by_id_key[(sub_theme_name, theme_name)], + ) + for topic_name, sub_theme_name, theme_name in missing_topic_keys + ], + ) + topics_by_key.update( + { + (topic.name, topic.sub_theme_id): topic + for topic in Topic.objects.filter( + sub_theme_id__in=[ + sub_themes_by_id_key[(sub_theme_name, theme_name)].id + for _, sub_theme_name, theme_name in missing_topic_keys + ], + name__in={ + topic_name for topic_name, _, _ in missing_topic_keys + }, + ) + } + ) + return [ + topics_by_key[ + ( + topic_name, + sub_themes_by_id_key[(sub_theme_name, theme_name)].id, + ) + ] + for topic_name, sub_theme_name, theme_name in topic_keys + ] + + @classmethod + def _seed_geographies(cls, *, count: int) -> list[Geography]: + geography_seed_values = cls._build_geography_seed_values(count=count) + geography_type_names = { + record["geography_type"] for record in geography_seed_values + } + geography_type_names = sorted(geography_type_names) + geography_types_by_name = { + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter( + name__in=geography_type_names + ) + } + missing_geography_type_names = [ + name for name in geography_type_names if name not in geography_types_by_name + ] + if missing_geography_type_names: + cls._bulk_create( + GeographyType, + [GeographyType(name=name) for name in missing_geography_type_names], + ) + geography_types_by_name.update( + { + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter( + name__in=missing_geography_type_names + ) + } + ) + geography_types_by_name = { + name: geography_types_by_name[name] for name in geography_type_names + } + + geography_keys = list( + dict.fromkeys( + (record["name"], record["geography_type"], record["geography_code"]) + for record in geography_seed_values + ) + ) + existing_geographies = Geography.objects.select_related( + "geography_type" + ).filter( + name__in={name for name, _, _ in geography_keys}, + geography_type__name__in={ + geography_type for _, geography_type, _ in geography_keys + }, + ) + geographies_by_key = { + (geography.name, geography.geography_type.name): geography + for geography in existing_geographies + } + missing_geography_keys = [ + (name, geography_type, geography_code) + for name, geography_type, geography_code in geography_keys + if (name, geography_type) not in geographies_by_key + ] + if missing_geography_keys: + cls._bulk_create( + Geography, + [ + Geography( + name=name, + geography_code=geography_code, + geography_type=geography_types_by_name[geography_type], + ) + for name, geography_type, geography_code in missing_geography_keys + ], + ) + geographies_by_key.update( + { + (geography.name, geography.geography_type.name): geography + for geography in Geography.objects.select_related( + "geography_type" + ).filter( + name__in={name for name, _, _ in missing_geography_keys}, + geography_type__name__in={ + geography_type + for _, geography_type, _ in missing_geography_keys + }, + ) + } + ) + + return [ + geographies_by_key[(name, geography_type)] + for name, geography_type, _ in geography_keys + ] + + @classmethod + def _build_time_series_rows_for_metric( + cls, + *, + metric: Metric, + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + is_public: bool, + start_date: date, + frequency: str, + ) -> Iterable[tuple[CoreTimeSeries, APITimeSeries]]: + topic = metric.topic + sub_theme = topic.sub_theme + theme = sub_theme.theme + + for geography in geographies: + for day_offset in range(days): + current_date = start_date + timedelta(days=day_offset) + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 + metric_value = round( + base_value + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + 2, + ) + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + epidemiological_week = current_date.isocalendar().week + + yield ( + CoreTimeSeries( + metric=metric, + metric_frequency=frequency, + geography=geography, + stratum=stratum, + age=age, + sex=sex, + year=current_date.year, + month=current_date.month, + epiweek=epidemiological_week, + date=current_date, + metric_value=Decimal(str(metric_value)), + is_public=is_public, + ), + APITimeSeries( + metric_frequency=frequency, + age=age.name, + month=current_date.month, + geography_code=geography.geography_code, + metric_group=None, + theme=theme.name, + sub_theme=sub_theme.name, + topic=topic.name, + geography_type=geography.geography_type.name, + geography=geography.name, + metric=metric.name, + stratum=stratum.name, + sex=sex, + year=current_date.year, + epiweek=epidemiological_week, + date=current_date, + metric_value=float(metric_value), + is_public=is_public, + ), + ) + + @classmethod + def _build_timeseries_ingestion_payloads( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + ) -> list[dict[str, object]]: + _, _, topic_rows = cls._build_theme_hierarchy_records() + geographies = cls._build_geography_seed_values( + count=scale_config["geographies"] + ) + refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + start_date = date.today() - timedelta(days=scale_config["days"] - 1) + payloads: list[dict[str, object]] = [] + + for metric_index in range(scale_config["metrics"]): + topic_name, sub_theme_name, theme_name = topic_rows[ + metric_index % len(topic_rows) + ] + metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" + for geography in geographies: + time_series_rows: list[dict[str, object]] = [] + for day_offset in range(scale_config["days"]): + current_date = start_date + timedelta(days=day_offset) + metric_value = round( + random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 + 2, + ) + time_series_rows.append( + { + "epiweek": current_date.isocalendar().week, + "date": current_date.isoformat(), + "metric_value": metric_value, + "embargo": None, + "is_public": is_public, + } + ) + + sex_value = random.choice( # noqa: S311 # nosec B311 + SEED_RANDOM_SEX_OPTIONS + ) + payloads.append( + { + "parent_theme": theme_name, + "child_theme": sub_theme_name, + "topic": topic_name, + "metric_group": "cases", + "metric": metric_name, + "metric_frequency": TimePeriod.Weekly.value, + "geography_type": geography["geography_type"], + "geography": geography["name"], + "geography_code": geography["geography_code"], + "age": "all", + "sex": sex_value, + "stratum": "default", + "refresh_date": refresh_date, + "time_series": time_series_rows, + } + ) + + return payloads + + @classmethod + def _build_s3_object_key( + cls, + *, + payload: dict[str, object], + payload_index: int, + ) -> str: + topic_name = str(payload["topic"]) + metric_name = str(payload["metric"]) + geography_code = str(payload["geography_code"]) + age = str(payload["age"]) + sex = str(payload["sex"]) + stratum = str(payload["stratum"]) + safe_topic = cls._normalise_key(topic_name) + safe_metric = cls._normalise_key(metric_name) + return f"in/{safe_topic}_cases_{safe_metric}_{geography_code}_{age}_{sex}_{stratum}_{payload_index}.json" + + @staticmethod + def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel]: + """Materialise and bulk insert a sequence of model instances.""" + return model.objects.bulk_create(list(records)) + + @staticmethod + def _get_next_random_metric_index() -> int: + max_metric_index = 0 + for metric_name in Metric.objects.filter( + name__startswith="Random Metric " + ).values_list( + "name", + flat=True, + ): + match = re.fullmatch(r"Random Metric (\d+)", metric_name) + if match is None: + continue + max_metric_index = max(max_metric_index, int(match.group(1))) + return max_metric_index + 1 + + @classmethod + def _build_theme_hierarchy_records( + cls, + ) -> tuple[list[str], list[tuple[str, str]], list[tuple[str, str, str]]]: + child_to_parent: dict[str, str] = {} + normalised_to_child: dict[str, str] = {} + parent_by_name = validation_enums.ParentTheme.__members__ + + for child_theme_group in validation_enums.ChildTheme: + resolved_parent = ( + parent_by_name[child_theme_group.name].value + if child_theme_group.name in parent_by_name + else validation_enums.ParentTheme.INFECTIOUS_DISEASE.value + ) + for sub_theme_name in child_theme_group.return_list(): + child_to_parent[sub_theme_name] = resolved_parent + normalised_to_child[cls._normalise_key(sub_theme_name)] = sub_theme_name + + topic_rows: list[tuple[str, str, str]] = [] + sub_theme_pairs: set[tuple[str, str]] = set() + for topic_group in validation_enums.Topic: + normalised_topic_group = cls._normalise_key(topic_group.name) + sub_theme_name = normalised_to_child.get(normalised_topic_group) + if sub_theme_name is None: + continue + + parent_theme_name = child_to_parent[sub_theme_name] + sub_theme_pairs.add((sub_theme_name, parent_theme_name)) + topic_rows.extend( + (topic_value, sub_theme_name, parent_theme_name) + for topic_value in topic_group.return_list() + ) + + theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) + sub_theme_rows = sorted( + sub_theme_pairs, + key=itemgetter(1, 0), + ) + return theme_names, sub_theme_rows, topic_rows + + @classmethod + def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: + geographies: list[dict[str, str]] = [ + { + "name": "United Kingdom", + "geography_code": UNITED_KINGDOM_GEOGRAPHY_CODE, + "geography_type": (validation_enums.GeographyType.UNITED_KINGDOM.value), + } + ] + + geographies.extend( + { + "name": name, + "geography_code": code, + "geography_type": validation_enums.GeographyType.NATION.value, + } + for name, code in NATION_GEOGRAPHY_CODES.items() + ) + + if len(geographies) >= count: + return geographies[:count] + + extra_required = count - len(geographies) + geographies.extend( + { + "name": cls._format_enum_name(ltla.name), + "geography_code": ltla.value, + "geography_type": ( + validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value + ), + } + for ltla in list(validation_enums.LTLAs)[:extra_required] + ) + return geographies[:count] + + @staticmethod + def _normalise_key(value: str) -> str: + return value.lower().replace("-", "_") + + @staticmethod + def _format_enum_name(value: str) -> str: + return value.replace("_", " ").title() + + def _print_summary( + self, + *, + dataset: str, + scale: str, + seed: int, + counts: dict[str, int], + runtime_seconds: float, + ) -> None: + self.stdout.write("") + self.stdout.write("Seed random summary:") + self.stdout.write(f" dataset: {dataset}") + self.stdout.write(f" scale: {scale}") + self.stdout.write(f" seed used: {seed}") + self.stdout.write(f" Theme: {counts['Theme']}") + self.stdout.write(f" SubTheme: {counts['SubTheme']}") + self.stdout.write(f" Topic: {counts['Topic']}") + self.stdout.write(f" Metric: {counts['Metric']}") + self.stdout.write(f" Geography: {counts['Geography']}") + self.stdout.write(f" CoreTimeSeries: {counts['CoreTimeSeries']}") + self.stdout.write(f" APITimeSeries: {counts['APITimeSeries']}") + self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") diff --git a/metrics/interfaces/plots/access.py b/metrics/interfaces/plots/access.py index 6e4eac34d..e024efdae 100644 --- a/metrics/interfaces/plots/access.py +++ b/metrics/interfaces/plots/access.py @@ -37,10 +37,7 @@ def __init__(self): class DataNotFoundForAnyPlotError(Exception): def __init__(self): - message = ( - "No data was found for the plot(s) requested, " - "please review the request parameters of each plot provided." - ) + message = "No data was found for the plot(s) requested, please review the request parameters of each plot provided." super().__init__(message) diff --git a/metrics/interfaces/weather_health_alerts/access.py b/metrics/interfaces/weather_health_alerts/access.py index 132468e3f..d229c050e 100644 --- a/metrics/interfaces/weather_health_alerts/access.py +++ b/metrics/interfaces/weather_health_alerts/access.py @@ -50,7 +50,7 @@ def build_summary_data_for_alerts( geography_code: self._parse_core_headline_as_alarm_state( topic=topic, core_headline=core_headline ) - for geography_code, core_headline, in headlines_mapping.items() + for geography_code, core_headline in headlines_mapping.items() } alarm_states = { diff --git a/tests/system/test_seed_random.py b/tests/system/test_seed_random.py new file mode 100644 index 000000000..1bac11bcd --- /dev/null +++ b/tests/system/test_seed_random.py @@ -0,0 +1,69 @@ +from urllib.parse import quote + +import pytest +from django.core.management import call_command +from rest_framework.test import APIClient + +from metrics.data.models.api_models import APITimeSeries +from metrics.data.models.core_models.supporting import Geography, Metric +from metrics.data.models.core_models.timeseries import CoreTimeSeries + +EXPECTED_METRIC_COUNT = 10 +EXPECTED_GEOGRAPHY_COUNT = 5 +EXPECTED_TIME_SERIES_COUNT = 1_500 +HTTP_OK = 200 + + +class TestSeedRandomCommand: + @pytest.mark.django_db + def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): + """ + Given an empty metrics dataset + When the `seed_random` management command is run for small metrics scale + Then the expected amount of data is inserted + And it can be queried from the public tables API endpoint + """ + # Given + assert Metric.objects.count() == 0 + assert Geography.objects.count() == 0 + assert CoreTimeSeries.objects.count() == 0 + assert APITimeSeries.objects.count() == 0 + + # When + call_command( + "seed_random", + dataset="metrics", + scale="small", + seed=12345, + truncate_first=True, + ) + + # Then + assert Metric.objects.count() == EXPECTED_METRIC_COUNT + assert Geography.objects.count() == EXPECTED_GEOGRAPHY_COUNT + assert CoreTimeSeries.objects.count() == EXPECTED_TIME_SERIES_COUNT + assert APITimeSeries.objects.count() == EXPECTED_TIME_SERIES_COUNT + + sample_row = APITimeSeries.objects.order_by("id").first() + assert sample_row is not None + + api_client = APIClient() + path = ( + "/api/public/timeseries/" + f"themes/{quote(sample_row.theme, safe='')}/" + f"sub_themes/{quote(sample_row.sub_theme, safe='')}/" + f"topics/{quote(sample_row.topic, safe='')}/" + f"geography_types/{quote(sample_row.geography_type, safe='')}/" + f"geographies/{quote(sample_row.geography, safe='')}/" + "metrics" + ) + response = api_client.get( + path=path, + format="json", + HTTP_ACCEPT="application/json", + ) + + assert response.status_code == HTTP_OK + assert isinstance(response.data, list) + metric_names = [item["name"] for item in response.data] + assert sample_row.metric in metric_names diff --git a/tests/unit/ingestion/test_aws_client.py b/tests/unit/ingestion/test_aws_client.py index 578feb7bc..be4eb86a2 100644 --- a/tests/unit/ingestion/test_aws_client.py +++ b/tests/unit/ingestion/test_aws_client.py @@ -224,12 +224,7 @@ def test_move_file_to_processed_folder_records_correct_log( expected_processed_folder: str = ( aws_client_with_mocked_boto_client._processed_folder ) - expected_log = ( - f"Moving `{expected_filename}` " - f"from `{expected_inbound_folder}` " - f"to `{expected_processed_folder}` " - f"in s3" - ) + expected_log = f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_processed_folder}` in s3" assert expected_log in caplog.text # Tests for the `move_file_to_failed_folder()` method @@ -299,12 +294,7 @@ def test_move_file_to_failed_folder_records_correct_log( aws_client_with_mocked_boto_client._inbound_folder ) expected_failed_folder: str = aws_client_with_mocked_boto_client._failed_folder - expected_log = ( - f"Moving `{expected_filename}` " - f"from `{expected_inbound_folder}` " - f"to `{expected_failed_folder}` " - f"in s3" - ) + expected_log = f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_failed_folder}` in s3" assert expected_log in caplog.text # Tests for the _copy_file_to methods @@ -616,3 +606,19 @@ def test_build_processed_archive_key( # Then expected_key = f"processed/2025-01-01/COVID-19/{FAKE_FILE_NAME}" assert processed_archive_key == expected_key + + def test_upload_json_to_inbound_delegates_to_put_object( + self, aws_client_with_mocked_boto_client: AWSClient + ): + payload = {"key": "value"} + + aws_client_with_mocked_boto_client.upload_json_to_inbound( + key="in/sample.json", + payload=payload, + ) + + aws_client_with_mocked_boto_client._client.put_object.assert_called_once() + kwargs = aws_client_with_mocked_boto_client._client.put_object.call_args.kwargs + assert kwargs["Bucket"] == aws_client_with_mocked_boto_client._bucket_name + assert kwargs["Key"] == "in/sample.json" + assert kwargs["ContentType"] == "application/json" diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py new file mode 100644 index 000000000..e97d1d8a2 --- /dev/null +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -0,0 +1,887 @@ +from collections.abc import Iterator +from contextlib import ExitStack, nullcontext +from types import SimpleNamespace +from typing import cast +from unittest import mock + +import pytest +from django.core.management import CommandParser +from django.core.management.base import CommandError + +from metrics.data.models.core_models.supporting import Age, Stratum +from metrics.interfaces.management.commands.seed_random import SCALE_CONFIGS, Command + +MODULE_PATH = "metrics.interfaces.management.commands.seed_random" +FULL_BATCH_DAYS = 5000 +SMALL_GEO_COUNT = 3 +LARGE_GEO_COUNT = 7 +EXPECTED_BULK_CREATE_CALLS = 2 +EXPECTED_NEXT_METRIC_INDEX = 11 +EXPECTED_TIME_SERIES_POINTS = 2 +EXPECTED_METRIC_VALUE = 123.45 + + +def _fake_metric_hierarchy() -> SimpleNamespace: + theme = SimpleNamespace(name="Theme 1") + sub_theme = SimpleNamespace(name="SubTheme 1", theme=theme) + topic = SimpleNamespace(name="Topic 1", sub_theme=sub_theme) + return SimpleNamespace(name="Metric 1", topic=topic) + + +def _fake_geography() -> SimpleNamespace: + geography_type = SimpleNamespace(name="Nation") + return SimpleNamespace( + name="Area 1", + geography_code="RND0001", + geography_type=geography_type, + ) + + +def _fake_stratum() -> Stratum: + return cast(Stratum, SimpleNamespace(name="All")) + + +def _fake_age() -> Age: + return cast(Age, SimpleNamespace(name="All ages")) + + +def _assert_progress_messages(progress_messages: list[str]) -> None: + assert any( + message.startswith("Processed 1/1 metrics") for message in progress_messages + ) + assert any(message.startswith("Inserted ") for message in progress_messages) + + +class TestSeedRandomCommand: + def test_add_arguments_parses_defaults(self): + parser = CommandParser(prog="manage.py seed_random") + + Command().add_arguments(parser) + options = parser.parse_args([]) + + assert options.dataset == "both" + assert options.scale == "small" + assert options.seed is None + assert options.truncate_first is False + assert options.delivery == "db" + assert options.non_public is False + + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch(f"{MODULE_PATH}.call_command") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_metrics_dataset( + self, + spy_perf_counter: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_call_command: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [10.0, 14.5] + spy_seed_metrics_data.return_value = { + "Theme": 3, + "SubTheme": 6, + "Topic": 12, + "Metric": 10, + "Geography": 5, + "CoreTimeSeries": 1, + "APITimeSeries": 1, + } + + Command().handle( + dataset="metrics", + scale="small", + truncate_first=True, + seed=42, + delivery="db", + non_public=False, + ) + + spy_random_seed.assert_called_once_with(42) + spy_seed_metrics_data.assert_called_once_with( + scale_config=SCALE_CONFIGS["small"], + truncate_first=True, + is_public=True, + progress_callback=mock.ANY, + ) + spy_call_command.assert_not_called() + spy_print_summary.assert_called_once_with( + dataset="metrics", + scale="small", + seed=42, + counts=spy_seed_metrics_data.return_value, + runtime_seconds=4.5, + ) + + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch(f"{MODULE_PATH}.call_command") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.time") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_cms_dataset_uses_time_seed_and_builds_cms( + self, + spy_perf_counter: mock.MagicMock, + spy_time: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_call_command: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [20.0, 22.0] + spy_time.return_value = 1234 + + Command().handle( + dataset="cms", + scale="large", + truncate_first=False, + seed=None, + delivery="db", + non_public=False, + ) + + spy_random_seed.assert_called_once_with(1234) + spy_seed_metrics_data.assert_not_called() + spy_call_command.assert_called_once_with("build_cms_site") + spy_print_summary.assert_called_once_with( + dataset="cms", + scale="large", + seed=1234, + counts={ + "Theme": 0, + "SubTheme": 0, + "Topic": 0, + "Metric": 0, + "Geography": 0, + "CoreTimeSeries": 0, + "APITimeSeries": 0, + }, + runtime_seconds=2.0, + ) + + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_seed_metrics_data_to_s3") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_metrics_dataset_s3_delivery( + self, + spy_perf_counter: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data_to_s3: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [11.0, 13.0] + spy_seed_metrics_data_to_s3.return_value = { + "Theme": 1, + "SubTheme": 1, + "Topic": 1, + "Metric": 1, + "Geography": 1, + "CoreTimeSeries": 10, + "APITimeSeries": 10, + } + + Command().handle( + dataset="metrics", + scale="small", + truncate_first=False, + seed=99, + delivery="s3", + non_public=True, + ) + + spy_random_seed.assert_called_once_with(99) + spy_seed_metrics_data.assert_not_called() + spy_seed_metrics_data_to_s3.assert_called_once_with( + scale_config=SCALE_CONFIGS["small"], + is_public=False, + progress_callback=mock.ANY, + ) + spy_print_summary.assert_called_once_with( + dataset="metrics", + scale="small", + seed=99, + counts=spy_seed_metrics_data_to_s3.return_value, + runtime_seconds=2.0, + ) + + @mock.patch.object(Command, "_truncate_metrics_data") + @mock.patch.object(Command, "_seed_time_series_rows") + @mock.patch.object(Command, "_seed_geographies") + @mock.patch.object(Command, "_seed_theme_hierarchy") + @mock.patch.object(Command, "_get_next_random_metric_index") + @mock.patch.object(Command, "_bulk_create") + @mock.patch(f"{MODULE_PATH}.Metric") + @mock.patch(f"{MODULE_PATH}.transaction.atomic") + @mock.patch(f"{MODULE_PATH}.Stratum.objects.get_or_create") + @mock.patch(f"{MODULE_PATH}.Age.objects.get_or_create") + def test_seed_metrics_data_builds_expected_counts_and_calls( + self, + spy_age_get_or_create: mock.MagicMock, + spy_stratum_get_or_create: mock.MagicMock, + spy_atomic: mock.MagicMock, + spy_metric: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_get_next_random_metric_index: mock.MagicMock, + spy_seed_theme_hierarchy: mock.MagicMock, + spy_seed_geographies: mock.MagicMock, + spy_seed_time_series_rows: mock.MagicMock, + spy_truncate: mock.MagicMock, + ): + spy_progress_callback = mock.MagicMock() + spy_atomic.return_value = nullcontext() + spy_metric.side_effect = SimpleNamespace + spy_get_next_random_metric_index.return_value = 1 + spy_stratum_get_or_create.return_value = (SimpleNamespace(name="All"), False) + spy_age_get_or_create.return_value = (SimpleNamespace(name="All ages"), False) + spy_seed_time_series_rows.return_value = (77, 88) + + themes = [ + SimpleNamespace(name="infectious_disease"), + SimpleNamespace(name="climate_and_environment"), + ] + sub_themes = [ + SimpleNamespace(name="respiratory", theme=themes[0]), + SimpleNamespace(name="vectors", theme=themes[1]), + ] + topics = [ + SimpleNamespace( + name="COVID-19", + sub_theme=sub_themes[0], + ), + SimpleNamespace( + name="ticks", + sub_theme=sub_themes[1], + ), + ] + metrics = [ + SimpleNamespace( + name=f"Metric {index + 1}", topic=topics[index % len(topics)] + ) + for index in range(4) + ] + geography_types = [ + SimpleNamespace(name="Nation"), + SimpleNamespace(name="Lower Tier Local Authority"), + ] + geographies = [ + SimpleNamespace( + name="England", + geography_code="E92000001", + geography_type=geography_types[0], + ), + SimpleNamespace( + name="Area 2", + geography_code="E09000002", + geography_type=geography_types[1], + ), + ] + spy_seed_theme_hierarchy.return_value = (themes, sub_themes, topics) + spy_seed_geographies.return_value = geographies + spy_bulk_create.return_value = metrics + + result = Command._seed_metrics_data( + scale_config={"geographies": 2, "metrics": 4, "days": 9}, + truncate_first=True, + is_public=False, + progress_callback=spy_progress_callback, + ) + + assert result == { + "Theme": 2, + "SubTheme": 2, + "Topic": 2, + "Metric": 4, + "Geography": 2, + "CoreTimeSeries": 77, + "APITimeSeries": 88, + } + spy_truncate.assert_called_once_with() + spy_seed_time_series_rows.assert_called_once_with( + metrics=metrics, + geographies=geographies, + stratum=spy_stratum_get_or_create.return_value[0], + age=spy_age_get_or_create.return_value[0], + days=9, + is_public=False, + progress_callback=spy_progress_callback, + ) + spy_progress_callback.assert_any_call( + "Preparing metric taxonomy and geography records..." + ) + spy_progress_callback.assert_any_call("Generating Core/API time series rows...") + + def test_truncate_metrics_data_deletes_from_all_models(self): + model_names = [ + "APITimeSeries", + "CoreTimeSeries", + "Metric", + "Topic", + "SubTheme", + "Theme", + "Geography", + "GeographyType", + "Age", + "Stratum", + ] + + managers: dict[str, mock.MagicMock] = {} + with ExitStack() as stack: + for model_name in model_names: + manager = mock.MagicMock() + managers[model_name] = manager + stack.enter_context( + mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager) + ) + + Command._truncate_metrics_data() + + for model_name in model_names: + managers[model_name].all.assert_called_once_with() + managers[model_name].all.return_value.delete.assert_called_once_with() + + @mock.patch(f"{MODULE_PATH}.APITimeSeries") + @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") + @mock.patch(f"{MODULE_PATH}.random.choice") + def test_seed_time_series_rows_flushes_remainder( + self, + spy_random_choice: mock.MagicMock, + spy_core_time_series: mock.MagicMock, + spy_api_time_series: mock.MagicMock, + ): + spy_random_choice.return_value = "f" + spy_core_time_series.side_effect = lambda **kwargs: kwargs + spy_api_time_series.side_effect = lambda **kwargs: kwargs + spy_progress_callback = mock.MagicMock() + + core_count, api_count = Command._seed_time_series_rows( + metrics=[_fake_metric_hierarchy()], + geographies=[_fake_geography()], + stratum=_fake_stratum(), + age=_fake_age(), + days=1, + is_public=False, + progress_callback=spy_progress_callback, + ) + + assert core_count == 1 + assert api_count == 1 + spy_core_time_series.objects.bulk_create.assert_called_once() + spy_api_time_series.objects.bulk_create.assert_called_once() + assert spy_core_time_series.call_args.kwargs["sex"] == "f" + assert spy_api_time_series.call_args.kwargs["sex"] == "f" + assert spy_core_time_series.call_args.kwargs["is_public"] is False + assert spy_api_time_series.call_args.kwargs["is_public"] is False + progress_messages = [ + call.args[0] for call in spy_progress_callback.call_args_list + ] + _assert_progress_messages(progress_messages) + + @mock.patch(f"{MODULE_PATH}.APITimeSeries") + @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") + def test_seed_time_series_rows_flushes_at_batch_size( + self, + spy_core_time_series: mock.MagicMock, + spy_api_time_series: mock.MagicMock, + ): + spy_core_time_series.side_effect = lambda **kwargs: kwargs + spy_api_time_series.side_effect = lambda **kwargs: kwargs + + core_count, api_count = Command._seed_time_series_rows( + metrics=[_fake_metric_hierarchy()], + geographies=[_fake_geography()], + stratum=_fake_stratum(), + age=_fake_age(), + days=FULL_BATCH_DAYS, + is_public=True, + ) + + assert core_count == FULL_BATCH_DAYS + assert api_count == FULL_BATCH_DAYS + spy_core_time_series.objects.bulk_create.assert_called_once() + spy_api_time_series.objects.bulk_create.assert_called_once() + + def test_bulk_create_materialises_iterable_and_delegates(self): + class FakeModel: + objects = mock.MagicMock() + + def records_generator() -> Iterator[int]: + yield 1 + yield 2 + + FakeModel.objects.bulk_create.return_value = ["created-records"] + + result = Command._bulk_create(FakeModel, records_generator()) + + assert result == ["created-records"] + FakeModel.objects.bulk_create.assert_called_once_with([1, 2]) + + def test_print_summary_writes_expected_output(self): + command = Command() + command.stdout = mock.MagicMock() + + command._print_summary( + dataset="both", + scale="small", + seed=123, + counts={ + "Theme": 3, + "SubTheme": 6, + "Topic": 12, + "Metric": 10, + "Geography": 5, + "CoreTimeSeries": 1500, + "APITimeSeries": 1500, + }, + runtime_seconds=3.456, + ) + + expected_lines = [ + "", + "Seed random summary:", + " dataset: both", + " scale: small", + " seed used: 123", + " Theme: 3", + " SubTheme: 6", + " Topic: 12", + " Metric: 10", + " Geography: 5", + " CoreTimeSeries: 1500", + " APITimeSeries: 1500", + " runtime seconds: 3.46", + ] + actual_lines = [call.args[0] for call in command.stdout.write.call_args_list] + + assert actual_lines == expected_lines + + +def test_add_arguments_rejects_invalid_dataset_value(): + parser = CommandParser(prog="manage.py seed_random") + Command().add_arguments(parser) + + with pytest.raises(CommandError): + parser.parse_args(["--dataset", "invalid"]) + + +def test_build_theme_hierarchy_records_contains_expected_real_values(): + theme_names, sub_theme_rows, topic_rows = Command._build_theme_hierarchy_records() + + assert "infectious_disease" in theme_names + assert any(sub_theme == "respiratory" for sub_theme, _ in sub_theme_rows) + assert any( + topic == "COVID-19" and sub_theme == "respiratory" + for topic, sub_theme, _ in topic_rows + ) + + +def test_build_theme_hierarchy_records_skips_unmatched_topic_group(): + fake_topic_group = mock.Mock() + fake_topic_group.name = "DOES_NOT_MATCH_CHILD_THEME" + fake_topic_group.return_list.return_value = ["dummy-topic"] + + with mock.patch(f"{MODULE_PATH}.validation_enums.Topic", [fake_topic_group]): + _, _, topic_rows = Command._build_theme_hierarchy_records() + + assert topic_rows == [] + + +def test_build_geography_seed_values_returns_required_count(): + small_geographies = Command._build_geography_seed_values(count=SMALL_GEO_COUNT) + larger_geographies = Command._build_geography_seed_values(count=LARGE_GEO_COUNT) + + assert len(small_geographies) == SMALL_GEO_COUNT + assert len(larger_geographies) == LARGE_GEO_COUNT + assert small_geographies[0]["name"] == "United Kingdom" + assert larger_geographies[-1]["geography_type"] in { + "Nation", + "Lower Tier Local Authority", + } + + +def test_format_enum_name_replaces_underscores_and_title_cases(): + assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ( + "Lower Tier Local Authority" + ) + + +@mock.patch.object(Command, "_upsert_topics") +@mock.patch.object(Command, "_upsert_sub_themes") +@mock.patch.object(Command, "_upsert_themes") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +def test_seed_theme_hierarchy_delegates_to_upsert_helpers( + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_upsert_themes: mock.MagicMock, + spy_upsert_sub_themes: mock.MagicMock, + spy_upsert_topics: mock.MagicMock, +): + theme_names = ["theme_1"] + sub_theme_rows = [("sub_1", "theme_1")] + topic_rows = [("topic_1", "sub_1", "theme_1")] + themes = [SimpleNamespace(name="theme_1")] + sub_themes = [SimpleNamespace(name="sub_1", theme=themes[0])] + sub_theme_map = {("sub_1", "theme_1"): sub_themes[0]} + topics = [SimpleNamespace(name="topic_1", sub_theme=sub_themes[0])] + themes_by_name = {"theme_1": themes[0]} + + spy_build_theme_hierarchy_records.return_value = ( + theme_names, + sub_theme_rows, + topic_rows, + ) + spy_upsert_themes.return_value = (themes, themes_by_name) + spy_upsert_sub_themes.return_value = (sub_themes, sub_theme_map) + spy_upsert_topics.return_value = topics + + result = Command._seed_theme_hierarchy() + + assert result == (themes, sub_themes, topics) + spy_upsert_themes.assert_called_once_with(theme_names=theme_names) + spy_upsert_sub_themes.assert_called_once_with( + theme_names=theme_names, + sub_theme_rows=sub_theme_rows, + themes_by_name=themes_by_name, + ) + spy_upsert_topics.assert_called_once_with( + topic_rows=topic_rows, + sub_themes_by_key=sub_theme_map, + ) + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Theme") +def test_upsert_themes_creates_missing_and_returns_requested_order( + spy_theme: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + existing_theme = SimpleNamespace(name="theme_1") + created_theme = SimpleNamespace(name="theme_2") + spy_theme.side_effect = SimpleNamespace + spy_theme.objects.filter.side_effect = [[existing_theme], [created_theme]] + + themes, themes_by_name = Command._upsert_themes(theme_names=["theme_1", "theme_2"]) + + assert [theme.name for theme in themes] == ["theme_1", "theme_2"] + assert themes_by_name == {"theme_1": existing_theme, "theme_2": created_theme} + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.SubTheme") +def test_upsert_sub_themes_creates_missing_and_returns_requested_order( + spy_sub_theme: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + theme_1 = SimpleNamespace(name="theme_1") + theme_2 = SimpleNamespace(name="theme_2") + existing_sub_theme = SimpleNamespace(name="sub_1", theme=theme_1) + created_sub_theme = SimpleNamespace(name="sub_2", theme=theme_2) + + spy_sub_theme.side_effect = SimpleNamespace + spy_sub_theme.objects.select_related.return_value.filter.side_effect = [ + [existing_sub_theme], + [created_sub_theme], + ] + + sub_themes, sub_theme_map = Command._upsert_sub_themes( + theme_names=["theme_1", "theme_2"], + sub_theme_rows=[("sub_1", "theme_1"), ("sub_2", "theme_2")], + themes_by_name={"theme_1": theme_1, "theme_2": theme_2}, + ) + + assert [(sub_theme.name, sub_theme.theme.name) for sub_theme in sub_themes] == [ + ("sub_1", "theme_1"), + ("sub_2", "theme_2"), + ] + assert sub_theme_map == { + ("sub_1", "theme_1"): existing_sub_theme, + ("sub_2", "theme_2"): created_sub_theme, + } + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Topic") +def test_upsert_topics_creates_missing_and_returns_requested_order( + spy_topic: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + sub_theme_1 = SimpleNamespace(id=1, name="sub_1") + sub_theme_2 = SimpleNamespace(id=2, name="sub_2") + existing_topic = SimpleNamespace(name="topic_1", sub_theme_id=1) + created_topic = SimpleNamespace(name="topic_2", sub_theme_id=2) + + spy_topic.side_effect = lambda **kwargs: SimpleNamespace( + name=kwargs["name"], + sub_theme=kwargs["sub_theme"], + sub_theme_id=kwargs["sub_theme"].id, + ) + spy_topic.objects.filter.side_effect = [[existing_topic], [created_topic]] + + topics = Command._upsert_topics( + topic_rows=[("topic_1", "sub_1", "theme_1"), ("topic_2", "sub_2", "theme_2")], + sub_themes_by_key={ + ("sub_1", "theme_1"): sub_theme_1, + ("sub_2", "theme_2"): sub_theme_2, + }, + ) + + assert [(topic.name, topic.sub_theme_id) for topic in topics] == [ + ("topic_1", 1), + ("topic_2", 2), + ] + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Geography") +@mock.patch(f"{MODULE_PATH}.GeographyType") +def test_seed_geographies_creates_missing_types_and_geographies( + spy_geography_type: mock.MagicMock, + spy_geography: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, +): + nation_type = SimpleNamespace(name="Nation") + ltla_type = SimpleNamespace(name="Lower Tier Local Authority") + existing_geography = SimpleNamespace( + name="England", + geography_type=nation_type, + geography_code="E92000001", + ) + created_geography = SimpleNamespace( + name="Area 2", + geography_type=ltla_type, + geography_code="E09000002", + ) + spy_geography_type.side_effect = SimpleNamespace + spy_geography.side_effect = SimpleNamespace + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + }, + { + "name": "Area 2", + "geography_code": "E09000002", + "geography_type": "Lower Tier Local Authority", + }, + ] + spy_geography_type.objects.filter.side_effect = [[nation_type], [ltla_type]] + spy_geography.objects.select_related.return_value.filter.side_effect = [ + [existing_geography], + [created_geography], + ] + + result = Command._seed_geographies(count=2) + + assert [ + (geography.name, geography.geography_type.name) for geography in result + ] == [ + ("England", "Nation"), + ("Area 2", "Lower Tier Local Authority"), + ] + assert spy_bulk_create.call_count == EXPECTED_BULK_CREATE_CALLS + + +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Geography") +@mock.patch(f"{MODULE_PATH}.GeographyType") +def test_seed_geographies_reuses_existing_without_creating( + spy_geography_type: mock.MagicMock, + spy_geography: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, +): + nation_type = SimpleNamespace(name="Nation") + ltla_type = SimpleNamespace(name="Lower Tier Local Authority") + england = SimpleNamespace( + name="England", + geography_type=nation_type, + geography_code="E92000001", + ) + area_2 = SimpleNamespace( + name="Area 2", + geography_type=ltla_type, + geography_code="E09000002", + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + }, + { + "name": "Area 2", + "geography_code": "E09000002", + "geography_type": "Lower Tier Local Authority", + }, + ] + spy_geography_type.objects.filter.return_value = [nation_type, ltla_type] + spy_geography.objects.select_related.return_value.filter.return_value = [ + england, + area_2, + ] + + result = Command._seed_geographies(count=2) + + assert result == [england, area_2] + spy_bulk_create.assert_not_called() + + +@mock.patch(f"{MODULE_PATH}.Metric.objects.filter") +def test_get_next_random_metric_index_ignores_non_matching_names( + spy_metric_filter: mock.MagicMock, +): + spy_metric_filter.return_value.values_list.return_value = [ + "Random Metric 2", + "Random Metric x", + "Some Other Metric", + "Random Metric 10", + ] + + result = Command._get_next_random_metric_index() + + assert result == EXPECTED_NEXT_METRIC_INDEX + + +@mock.patch(f"{MODULE_PATH}.Metric.objects.filter") +def test_get_next_random_metric_index_defaults_to_one_when_no_matches( + spy_metric_filter: mock.MagicMock, +): + spy_metric_filter.return_value.values_list.return_value = ["Some Other Metric"] + + result = Command._get_next_random_metric_index() + + assert result == 1 + + +@mock.patch(f"{MODULE_PATH}.AWSClient") +@mock.patch.object(Command, "_build_s3_object_key") +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +@mock.patch.object(Command, "_build_timeseries_ingestion_payloads") +def test_seed_metrics_data_to_s3_uploads_payloads_and_returns_counts( + spy_build_payloads: mock.MagicMock, + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, + spy_build_s3_object_key: mock.MagicMock, + spy_aws_client: mock.MagicMock, +): + spy_progress_callback = mock.MagicMock() + payload = { + "topic": "COVID-19", + "metric": "COVID-19_cases_randomByDay_1", + "geography_code": "E92000001", + "age": "all", + "sex": "all", + "stratum": "default", + } + spy_build_payloads.return_value = [payload] + spy_build_s3_object_key.return_value = "in/key.json" + spy_build_theme_hierarchy_records.return_value = ( + [], + [], + [("COVID-19", "respiratory", "infectious_disease")], + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + } + ] + + result = Command._seed_metrics_data_to_s3( + scale_config={"geographies": 1, "metrics": 2, "days": 3}, + is_public=False, + progress_callback=spy_progress_callback, + ) + + assert result == { + "Theme": 1, + "SubTheme": 1, + "Topic": 1, + "Metric": 2, + "Geography": 1, + "CoreTimeSeries": 6, + "APITimeSeries": 6, + } + spy_aws_client.return_value.upload_json_to_inbound.assert_called_once_with( + key="in/key.json", + payload=payload, + ) + spy_progress_callback.assert_any_call( + "Generating ingestion payloads for S3 upload..." + ) + spy_progress_callback.assert_any_call("Uploaded 1 files to ingest bucket in/.") + + +@mock.patch(f"{MODULE_PATH}.random.choice") +@mock.patch(f"{MODULE_PATH}.random.uniform") +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +def test_build_timeseries_ingestion_payloads_builds_expected_shape( + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, + spy_random_uniform: mock.MagicMock, + spy_random_choice: mock.MagicMock, +): + spy_build_theme_hierarchy_records.return_value = ( + [], + [], + [("COVID-19", "respiratory", "infectious_disease")], + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + } + ] + spy_random_uniform.return_value = EXPECTED_METRIC_VALUE + spy_random_choice.return_value = "all" + + payloads = Command._build_timeseries_ingestion_payloads( + scale_config={"geographies": 1, "metrics": 1, "days": 2}, + is_public=True, + ) + + assert len(payloads) == 1 + payload = payloads[0] + assert payload["parent_theme"] == "infectious_disease" + assert payload["child_theme"] == "respiratory" + assert payload["topic"] == "COVID-19" + assert payload["metric_group"] == "cases" + assert payload["geography"] == "England" + assert payload["geography_code"] == "E92000001" + assert payload["age"] == "all" + assert payload["sex"] == "all" + assert payload["stratum"] == "default" + assert len(payload["time_series"]) == EXPECTED_TIME_SERIES_POINTS + assert payload["time_series"][0]["metric_value"] == EXPECTED_METRIC_VALUE + assert payload["time_series"][0]["is_public"] is True + + +def test_build_s3_object_key_builds_expected_file_name(): + payload = { + "topic": "COVID-19", + "metric": "COVID-19_cases_randomByDay_1", + "geography_code": "E92000001", + "age": "all", + "sex": "f", + "stratum": "default", + } + + result = Command._build_s3_object_key(payload=payload, payload_index=7) + + assert result == ( + "in/covid_19_cases_covid_19_cases_randombyday_1_E92000001_all_f_default_7.json" + )