From e1b9e05f50ad16044c770e70dac3a12ad9610ed3 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Sat, 21 Mar 2026 15:26:04 -0400 Subject: [PATCH] Allow supplemental API data to be passed to ColdfrontFetchProcessor This allows passing data for billable projects not currently in Coldfront Namely bare metal projects --- process_report/loader.py | 38 ++++++++++++---- .../processors/coldfront_fetch_processor.py | 22 ++++++++++ process_report/settings.py | 1 + .../test_coldfront_fetch_processor.py | 44 +++++++++++++++++++ process_report/tests/util.py | 10 ++++- 5 files changed, 105 insertions(+), 10 deletions(-) diff --git a/process_report/loader.py b/process_report/loader.py index 67d2cdea..967b1436 100644 --- a/process_report/loader.py +++ b/process_report/loader.py @@ -22,11 +22,23 @@ ] +SUPPLEMENTAL_START_DATE = "Start Date" +SUPPLEMENTAL_END_DATE = "End Date" + + @functools.lru_cache def get_rates_info(): return load_from_url() +def _is_in_time_range(start, end) -> bool: + # Leveraging inherent lexicographical order of YYYY-MM strings + return ( + start <= invoice_settings.invoice_month + and invoice_settings.invoice_month <= end + ) + + class Loader: @functools.lru_cache def get_csv_invoice_filepath_list(self) -> list[str]: @@ -126,13 +138,6 @@ def get_nonbillable_projects(self) -> pandas.DataFrame: 3. Is Timed: Boolean indicating if the nonbillable status is time-bound """ - def _is_in_time_range(timed_object) -> bool: - # Leveraging inherent lexicographical order of YYYY-MM strings - return ( - timed_object["start"] <= invoice_settings.invoice_month - and invoice_settings.invoice_month <= timed_object["end"] - ) - project_list = [] with open(invoice_settings.nonbillable_projects_filepath) as file: projects_dict = yaml.safe_load(file) @@ -142,7 +147,7 @@ def _is_in_time_range(timed_object) -> bool: cluster_list = project.get("clusters") if project.get("start"): - if not _is_in_time_range(project): + if not _is_in_time_range(project["start"], project["end"]): continue if cluster_list: @@ -154,7 +159,7 @@ def _is_in_time_range(timed_object) -> bool: for cluster in cluster_list: cluster_start_time = cluster.get("start") if cluster_start_time: - if _is_in_time_range(cluster): + if _is_in_time_range(cluster["start"], cluster["end"]): project_list.append((project_name, cluster["name"], True)) elif not cluster_start_time: project_list.append((project_name, cluster["name"], False)) @@ -179,5 +184,20 @@ def get_nonbillable_timed_projects(self) -> list[tuple[str, str]]: ].itertuples(index=False, name=None) ) + def get_supplement_api_data(self) -> pandas.DataFrame: + supplemental_df = pandas.DataFrame() + if invoice_settings.supplement_api_data_filepath: + supplemental_df = pandas.read_csv( + invoice_settings.supplement_api_data_filepath + ) + in_time_range_mask = supplemental_df.apply( + lambda row: _is_in_time_range( + row[SUPPLEMENTAL_START_DATE], row[SUPPLEMENTAL_END_DATE] + ), + axis=1, + ) + supplemental_df = supplemental_df[in_time_range_mask] + return supplemental_df + loader = Loader() diff --git a/process_report/processors/coldfront_fetch_processor.py b/process_report/processors/coldfront_fetch_processor.py index e581b8c9..7b0bbeee 100644 --- a/process_report/processors/coldfront_fetch_processor.py +++ b/process_report/processors/coldfront_fetch_processor.py @@ -26,6 +26,11 @@ CF_ATTR_INSTITUTION_SPECIFIC_CODE = "Institution-Specific Code" CF_ATTR_IS_COURSE = "Is Course?" +SUPPLEMENTAL_PROJECT_ID = "Project - Allocation Name" +SUPPLEMENTAL_PROJECT_NAME = "Project - Title" +SUPPLEMENTAL_PI = "Manager (PI)" +SUPPLEMENTAL_CLUSTER_NAME = "Cluster Name" + @dataclass class ColdfrontFetchProcessor(processor.Processor): @@ -33,6 +38,9 @@ class ColdfrontFetchProcessor(processor.Processor): default_factory=loader.get_nonbillable_projects ) coldfront_data_filepath: str = invoice_settings.coldfront_api_filepath + supplement_api_data: pandas.DataFrame = field( + default_factory=lambda: loader.get_supplement_api_data() + ) @functools.cached_property def coldfront_client(self): @@ -125,6 +133,20 @@ def _get_allocation_data(self, coldfront_api_data): except KeyError: continue + for _, row in self.supplement_api_data.iterrows(): + project_id = row[SUPPLEMENTAL_PROJECT_ID] + project_name = row[SUPPLEMENTAL_PROJECT_NAME] + pi_name = row[SUPPLEMENTAL_PI] + cluster_name = row[SUPPLEMENTAL_CLUSTER_NAME] + + allocation_data[(project_id, cluster_name)] = { + invoice.PROJECT_FIELD: project_name, + invoice.PI_FIELD: pi_name, + invoice.INSTITUTION_ID_FIELD: "N/A", + invoice.CLUSTER_NAME_FIELD: cluster_name, + invoice.IS_COURSE_FIELD: False, # (TODO) Quan Assuming supplemental data does not contain course info? + } + return allocation_data def _validate_allocation_data(self, allocation_data): diff --git a/process_report/settings.py b/process_report/settings.py index be9f5ffa..fc0786fa 100644 --- a/process_report/settings.py +++ b/process_report/settings.py @@ -8,6 +8,7 @@ class Settings(BaseSettings): # Coldfront info coldfront_api_filepath: str | None = None + supplement_api_data_filepath: str | None = None keycloak_client_id: str | None = None keycloak_client_secret: str | None = None diff --git a/process_report/tests/unit/processors/test_coldfront_fetch_processor.py b/process_report/tests/unit/processors/test_coldfront_fetch_processor.py index 7d9f84c3..d5775449 100644 --- a/process_report/tests/unit/processors/test_coldfront_fetch_processor.py +++ b/process_report/tests/unit/processors/test_coldfront_fetch_processor.py @@ -257,3 +257,47 @@ def test_missing_project_cluster_tuples(self, mock_get_allocation_data): assert str(cm.value) == ( f"Projects {expected_missing} not found in Coldfront and are billable! Please check the project names" ) + + @mock.patch( + "process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api", + ) + def test_supplement_api_data_used_when_coldfront_missing( + self, mock_get_allocation_data + ): + """Supplement API rows are applied to invoice in _get_allocation_data().""" + mock_get_allocation_data.return_value = self._get_mock_allocation_data( + ["P1"], + ["PI1"], + ["IC1"], + ["stack"], + ) + + supplemental_df = pandas.DataFrame( + { + "Project - Allocation Name": ["P2"], + "Project - Title": ["P2-supplement-name"], + "Manager (PI)": ["PI2"], + "Cluster Name": ["stack"], + } + ) + + test_invoice = self._get_test_invoice( + ["P1", "P2"], cluster_name=["stack", "stack"] + ) + + expected_invoice = self._get_test_invoice( + ["P1", "P2"], + ["P1-name", "P2-supplement-name"], + ["PI1", "PI2"], + ["IC1", "N/A"], + ["stack", "stack"], + [False, False], + ) + + test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor( + data=test_invoice, supplement_api_data=supplemental_df + ) + test_coldfront_fetch_proc.process() + output_invoice = test_coldfront_fetch_proc.data + + assert output_invoice.equals(expected_invoice) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 8bec3ed0..29452684 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -63,6 +63,7 @@ def new_coldfront_fetch_processor( data=None, nonbillable_projects=None, coldfront_data_filepath=None, + supplement_api_data=None, ): if data is None: data = pandas.DataFrame() @@ -70,8 +71,15 @@ def new_coldfront_fetch_processor( nonbillable_projects = pandas.DataFrame( columns=["Project Name", "Cluster", "Is Timed"] ) + if supplement_api_data is None: + supplement_api_data = pandas.DataFrame() return coldfront_fetch_processor.ColdfrontFetchProcessor( - invoice_month, data, name, nonbillable_projects, coldfront_data_filepath + invoice_month, + data, + name, + nonbillable_projects, + coldfront_data_filepath, + supplement_api_data, )