correct types and move large const into utils

johnseekins · johnseekins · commit 00855ca6ee08 · 2025-09-07T18:07:53.000-06:00
Signed-off-by: John Seekins &lt;john@robot-house.us&gt;
diff --git a/enricher.py b/enricher.py
@@ -8,6 +8,7 @@
 import time
 from urllib.parse import quote
 from utils import (
+    facility_sheet_header,
     logger,
     session,
 )
@@ -19,38 +20,6 @@
 WIKIPEDIA_DELAY = 0.5  # Be respectful to Wikipedia
 WIKIDATA_DELAY = 0.5  # Be respectful to Wikidata
 
-# extracted ADP sheet header list 2025-09-07
-facility_sheet_header = [
-    "Name",
-    "Address",
-    "City",
-    "State",
-    "Zip",
-    "AOR",
-    "Type Detailed",
-    "Male/Female",
-    "FY25 ALOS",
-    "Level A",
-    "Level B",
-    "Level C",
-    "Level D",
-    "Male Crim",
-    "Male Non-Crim",
-    "Female Crim",
-    "Female Non-Crim",
-    "ICE Threat Level 1",
-    "ICE Threat Level 2",
-    "ICE Threat Level 3",
-    "No ICE Threat Level",
-    "Mandatory",
-    "Guaranteed Minimum",
-    "Last Inspection Type",
-    "Last Inspection End Date",
-    "Pending FY25 Inspection",
-    "Last Inspection Standard",
-    "Last Final Rating",
-]
-
 
 class ExternalDataEnricher(object):
     def __init__(self):
@@ -67,7 +36,7 @@ def _download_sheet(self) -> None:
                     if chunk:
                         f.write(chunk)
 
-    def _load_sheet(self) -> polars.DataFrame:
+    def _load_sheet(self) -> dict:
         """Convert the detentionstats sheet data into something we can update our facilities with"""
         self._download_sheet()
         df = polars.read_excel(
diff --git a/schemas.py b/schemas.py
@@ -8,7 +8,7 @@
 }
 
 # default keys to "false"-y values so we can merge easier
-facility_schema = {
+facility_schema: dict = {
     "address": {
         "administrative_area": "",
         "country": "",
diff --git a/utils.py b/utils.py
@@ -21,6 +21,38 @@
 default_timestamp = "1970-01-01T00:00:00-+0000"
 timestamp_format = "%Y-%m-%dT%H:%M:%S-%z"
 
+# extracted ADP sheet header list 2025-09-07
+facility_sheet_header = [
+    "Name",
+    "Address",
+    "City",
+    "State",
+    "Zip",
+    "AOR",
+    "Type Detailed",
+    "Male/Female",
+    "FY25 ALOS",
+    "Level A",
+    "Level B",
+    "Level C",
+    "Level D",
+    "Male Crim",
+    "Male Non-Crim",
+    "Female Crim",
+    "Female Non-Crim",
+    "ICE Threat Level 1",
+    "ICE Threat Level 2",
+    "ICE Threat Level 3",
+    "No ICE Threat Level",
+    "Mandatory",
+    "Guaranteed Minimum",
+    "Last Inspection Type",
+    "Last Inspection End Date",
+    "Pending FY25 Inspection",
+    "Last Inspection Standard",
+    "Last Final Rating",
+]
+
 
 def _flatdict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
     """flatten a nested dictionary for nicer printing in CSV"""

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`}`
`9`	`9`
`10`	`10`	`# default keys to "false"-y values so we can merge easier`
`11`		`-facility_schema = {`
	`11`	`+facility_schema: dict = {`
`12`	`12`	`"address": {`
`13`	`13`	`"administrative_area": "",`
`14`	`14`	`"country": "",`