CCI-MOC · QuanMPhm · Mar 21, 2026 · naved001 · Mar 23, 2026 · naved001
diff --git a/process_report/loader.py b/process_report/loader.py
@@ -22,11 +22,23 @@
 ]
 
 
+SUPPLEMENTAL_START_DATE = "Start Date"
+SUPPLEMENTAL_END_DATE = "End Date"
+
+
 @functools.lru_cache
 def get_rates_info():
     return load_from_url()
 
 
+def _is_in_time_range(start, end) -> bool:
+    # Leveraging inherent lexicographical order of YYYY-MM strings
+    return (
+        start <= invoice_settings.invoice_month
+        and invoice_settings.invoice_month <= end
+    )
+
+
 class Loader:
     @functools.lru_cache
     def get_csv_invoice_filepath_list(self) -> list[str]:
@@ -126,13 +138,6 @@ def get_nonbillable_projects(self) -> pandas.DataFrame:
         3. Is Timed: Boolean indicating if the nonbillable status is time-bound
         """
 
-        def _is_in_time_range(timed_object) -> bool:
-            # Leveraging inherent lexicographical order of YYYY-MM strings
-            return (
-                timed_object["start"] <= invoice_settings.invoice_month
-                and invoice_settings.invoice_month <= timed_object["end"]
-            )
-
         project_list = []
         with open(invoice_settings.nonbillable_projects_filepath) as file:
             projects_dict = yaml.safe_load(file)
@@ -142,7 +147,7 @@ def _is_in_time_range(timed_object) -> bool:
             cluster_list = project.get("clusters")
 
             if project.get("start"):
-                if not _is_in_time_range(project):
+                if not _is_in_time_range(project["start"], project["end"]):
                     continue
 
                 if cluster_list:
@@ -154,7 +159,7 @@ def _is_in_time_range(timed_object) -> bool:
                 for cluster in cluster_list:
                     cluster_start_time = cluster.get("start")
                     if cluster_start_time:
-                        if _is_in_time_range(cluster):
+                        if _is_in_time_range(cluster["start"], cluster["end"]):
                             project_list.append((project_name, cluster["name"], True))
                     elif not cluster_start_time:
                         project_list.append((project_name, cluster["name"], False))
@@ -179,5 +184,20 @@ def get_nonbillable_timed_projects(self) -> list[tuple[str, str]]:
             ].itertuples(index=False, name=None)
         )
 
+    def get_supplement_api_data(self) -> pandas.DataFrame:
+        supplemental_df = pandas.DataFrame()
+        if invoice_settings.supplement_api_data_filepath:
+            supplemental_df = pandas.read_csv(
+                invoice_settings.supplement_api_data_filepath
+            )
+            in_time_range_mask = supplemental_df.apply(
+                lambda row: _is_in_time_range(
+                    row[SUPPLEMENTAL_START_DATE], row[SUPPLEMENTAL_END_DATE]
+                ),
+                axis=1,
+            )
+            supplemental_df = supplemental_df[in_time_range_mask]
+        return supplemental_df
+
 
 loader = Loader()
diff --git a/process_report/processors/coldfront_fetch_processor.py b/process_report/processors/coldfront_fetch_processor.py
@@ -26,13 +26,21 @@
 CF_ATTR_INSTITUTION_SPECIFIC_CODE = "Institution-Specific Code"
 CF_ATTR_IS_COURSE = "Is Course?"
 
+SUPPLEMENTAL_PROJECT_ID = "Project - Allocation Name"
+SUPPLEMENTAL_PROJECT_NAME = "Project - Title"
+SUPPLEMENTAL_PI = "Manager (PI)"
+SUPPLEMENTAL_CLUSTER_NAME = "Cluster Name"
+
 
 @dataclass
 class ColdfrontFetchProcessor(processor.Processor):
     nonbillable_projects: pandas.DataFrame = field(
         default_factory=loader.get_nonbillable_projects
     )
     coldfront_data_filepath: str = invoice_settings.coldfront_api_filepath
+    supplement_api_data: pandas.DataFrame = field(
+        default_factory=lambda: loader.get_supplement_api_data()
+    )
 
     @functools.cached_property
     def coldfront_client(self):
@@ -125,6 +133,20 @@ def _get_allocation_data(self, coldfront_api_data):
             except KeyError:
                 continue
 
+        for _, row in self.supplement_api_data.iterrows():
+            project_id = row[SUPPLEMENTAL_PROJECT_ID]
+            project_name = row[SUPPLEMENTAL_PROJECT_NAME]
+            pi_name = row[SUPPLEMENTAL_PI]
+            cluster_name = row[SUPPLEMENTAL_CLUSTER_NAME]
+
+            allocation_data[(project_id, cluster_name)] = {
+                invoice.PROJECT_FIELD: project_name,
+                invoice.PI_FIELD: pi_name,
+                invoice.INSTITUTION_ID_FIELD: "N/A",
+                invoice.CLUSTER_NAME_FIELD: cluster_name,
+                invoice.IS_COURSE_FIELD: False,  # (TODO) Quan Assuming supplemental data does not contain course info?
+            }
+
         return allocation_data
 
     def _validate_allocation_data(self, allocation_data):

diff --git a/process_report/settings.py b/process_report/settings.py
@@ -8,6 +8,7 @@
 class Settings(BaseSettings):
     # Coldfront info
     coldfront_api_filepath: str | None = None
+    supplement_api_data_filepath: str | None = None
     keycloak_client_id: str | None = None
     keycloak_client_secret: str | None = None
 

diff --git a/process_report/tests/unit/processors/test_coldfront_fetch_processor.py b/process_report/tests/unit/processors/test_coldfront_fetch_processor.py
@@ -257,3 +257,47 @@ def test_missing_project_cluster_tuples(self, mock_get_allocation_data):
         assert str(cm.value) == (
             f"Projects {expected_missing} not found in Coldfront and are billable! Please check the project names"
         )
+
+    @mock.patch(
+        "process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
+    )
+    def test_supplement_api_data_used_when_coldfront_missing(
+        self, mock_get_allocation_data
+    ):
+        """Supplement API rows are applied to invoice in _get_allocation_data()."""
+        mock_get_allocation_data.return_value = self._get_mock_allocation_data(
+            ["P1"],
+            ["PI1"],
+            ["IC1"],
+            ["stack"],
+        )
+
+        supplemental_df = pandas.DataFrame(
+            {
+                "Project - Allocation Name": ["P2"],
+                "Project - Title": ["P2-supplement-name"],
+                "Manager (PI)": ["PI2"],
+                "Cluster Name": ["stack"],
+            }
+        )
+
+        test_invoice = self._get_test_invoice(
+            ["P1", "P2"], cluster_name=["stack", "stack"]
+        )
+
+        expected_invoice = self._get_test_invoice(
+            ["P1", "P2"],
+            ["P1-name", "P2-supplement-name"],
+            ["PI1", "PI2"],
+            ["IC1", "N/A"],
+            ["stack", "stack"],
+            [False, False],
+        )
+
+        test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
+            data=test_invoice, supplement_api_data=supplemental_df
+        )
+        test_coldfront_fetch_proc.process()
+        output_invoice = test_coldfront_fetch_proc.data
+
+        assert output_invoice.equals(expected_invoice)
diff --git a/process_report/tests/util.py b/process_report/tests/util.py
@@ -63,15 +63,23 @@ def new_coldfront_fetch_processor(
     data=None,
     nonbillable_projects=None,
     coldfront_data_filepath=None,
+    supplement_api_data=None,
 ):
     if data is None:
         data = pandas.DataFrame()
     if nonbillable_projects is None:
         nonbillable_projects = pandas.DataFrame(
             columns=["Project Name", "Cluster", "Is Timed"]
         )
+    if supplement_api_data is None:
+        supplement_api_data = pandas.DataFrame()
     return coldfront_fetch_processor.ColdfrontFetchProcessor(
-        invoice_month, data, name, nonbillable_projects, coldfront_data_filepath
+        invoice_month,
+        data,
+        name,
+        nonbillable_projects,
+        coldfront_data_filepath,
+        supplement_api_data,
     )