Open-Security-Mapping-Project · HongPong · Oct 26, 2025 · Oct 17, 2025
diff --git a/enrichers/general.py b/enrichers/general.py
@@ -43,13 +43,23 @@ def _enrich_facility(facility_data: tuple) -> tuple:
     wd_res = wikidata.Wikidata(facility_name=facility_name).search()
     osm = openstreetmap.OpenStreetMap(facility_name=facility_name, address=facility.get("address", {}))
     osm_res = osm.search()
-    enriched_facility["wikipedia"]["page_url"] = wiki_res.get("url", "")
+    url = wiki_res.get("url", None)
+    if url:
+        enriched_facility["wikipedia"]["page_url"] = url
     enriched_facility["wikipedia"]["search_query"] = wiki_res.get("search_query_steps", "")
-    enriched_facility["wikidata"]["page_url"] = wd_res.get("url", "")
+    url = wd_res.get("url", None)
+    if url:
+        enriched_facility["wikidata"]["page_url"] = url
     enriched_facility["wikidata"]["search_query"] = wd_res.get("search_query_steps", "")
-    enriched_facility["osm"]["latitude"] = osm_res.get("details", {}).get("latitude", osm.default_coords["latitude"])
-    enriched_facility["osm"]["longitude"] = osm_res.get("details", {}).get("longitude", osm.default_coords["longitude"])
-    enriched_facility["osm"]["url"] = osm_res.get("url", "")
+    lat = osm_res.get("details", {}).get("latitude", None)
+    long = osm_res.get("details", {}).get("longitude", None)
+    if lat:
+        enriched_facility["osm"]["latitude"] = lat
+    if long:
+        enriched_facility["osm"]["longitude"] = lat
+    url = osm_res.get("url", None)
+    if url:
+        enriched_facility["osm"]["url"] = url
     enriched_facility["osm"]["search_query"] = osm_res.get("search_query_steps", "")
 
     logger.debug(enriched_facility)

diff --git a/file_utils.py b/file_utils.py
@@ -23,12 +23,14 @@ def export_to_file(
         match file_type:
             case "xlsx":
                 with xlsxwriter.Workbook(full_name, {"remove_timezone": True}) as wb:
-                    writer.write_excel(workbook=wb, include_header=True, autofit=True)
+                    _ = writer.write_excel(workbook=wb, include_header=True, autofit=True)
             case "csv":
                 with open(full_name, "w", newline="", encoding="utf-8") as f_out:
                     writer.write_csv(file=f_out, include_header=True)
             case "parquet":
                 writer.write_parquet(full_name, use_pyarrow=True)
+            case _:
+                logger.warning("Invalid dataframe output type %s", file_type)
     elif file_type == "json":
         with open(full_name, "w", encoding="utf-8") as f_out:
             json.dump(facilities_data, f_out, indent=2, sort_keys=True, default=str)
@@ -103,7 +105,7 @@ def print_summary(facilities_data: dict) -> None:
         false_positives = 0
         errors = 0
         for facility in facilities_data["facilities"].values():
-            query = facility.get("wikipedia", {}).get("search_query", "")
+            query: str = facility.get("wikipedia", {}).get("search_query", "")
             if "REJECTED" in query:
                 false_positives += 1
             elif "ERROR" in query:

diff --git a/ice_scrapers/__init__.py b/ice_scrapers/__init__.py
@@ -4,67 +4,75 @@
 may call them
 """
 
-# extracted ADP sheet header list 2025-09-07
-facility_sheet_header = [
-    "Name",
-    "Address",
-    "City",
-    "State",
-    "Zip",
-    "AOR",
-    "Type Detailed",
-    "Male/Female",
-    "FY25 ALOS",
-    "Level A",
-    "Level B",
-    "Level C",
-    "Level D",
-    "Male Crim",
-    "Male Non-Crim",
-    "Female Crim",
-    "Female Non-Crim",
-    "ICE Threat Level 1",
-    "ICE Threat Level 2",
-    "ICE Threat Level 3",
-    "No ICE Threat Level",
-    "Mandatory",
-    "Guaranteed Minimum",
-    "Last Inspection Type",
-    "Last Inspection End Date",
-    "Pending FY25 Inspection",
-    "Last Inspection Standard",
-    "Last Final Rating",
-]
-
 ice_inspection_types = {
     # found in https://www.ice.gov/foia/odo-facility-inspections
     "ODO": "Office of Detention Oversight",
     # found in https://ia803100.us.archive.org/16/items/6213032-ORSA-MOU-ICE/6213032-ORSA-MOU-ICE_text.pdf
     "ORSA": "Operational Review Self-Assessment",
 }
 
+# extracted from https://vera-institute.files.svdcdn.com/production/downloads/dashboard_appendix.pdf 2025-09-23
+ice_facility_group_mapping = {
+    "Non-Dedicated": ["IGSA"],
+    "Dedicated": ["DIGSA", "CDF", "SPC"],
+    "Federal": ["BOF", "USMSIGA", "USMS IGA", "USMS CDF", "DOD", "MOC"],
+    "Hold/Staging": ["Hold", "Staging"],
+    "Family/Youth": ["Family", "Juvenile", "FAMILY"],
+    "Medical": ["Hospital"],
+    "Hotel": ["Hotel"],
+    "Other/Unknown": ["Other", "Unknown", "Pending"],
+}
+
 # extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
 ice_facility_types = {
     "BOP": {
         "expanded_name": "Federal Bureau of Prisons",
         "description": "A facility operated by the Federal Bureau of Prisons",
     },
+    "CDF": {
+        "expanded_name": "Contract Detention Facility",
+        "description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
+    },
     "DIGSA": {
         "expanded_name": "Dedicated Intergovernmental Service Agreement",
         "description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees – typically these are operated by private contractors pursuant to their agreements with local governments.",
     },
     "DOD": {
         "expanded_name": "Department of Defense",
-        "description": "Military facility",
+        "description": "Department of Defence facilities - Often Army bases",
+    },
+    "FAMILY": {
+        "expanded_name": "Family",
+        "description": "A facility in which families are able to remain together while awaiting their proceedings",
+    },
+    "Family": {
+        "expanded_name": "Family",
+        "description": "A facility in which families are able to remain together while awaiting their proceedings",
+    },
+    "Hospital": {
+        "expanded_name": "Hospital",
+        "description": "A medical facility",
     },
     "IGSA": {
         "expanded_name": "Intergovernmental Service Agreement",
         "description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts for bed space via an Intergovernmental Service Agreement; or local jails used by ICE pursuant to Intergovernmental Service Agreements, which house both ICE and non-ICE detainees, typically county prisoners awaiting trial or serving short sentences, but sometimes also USMS prisoners.",
     },
+    "Juvenile": {
+        "expanded_name": "Juvenile",
+        "description": "An IGSA facility capable of housing juveniles (separate from adults) for a temporary period of time",
+    },
+    "Other": {
+        "expanded_name": "Other",
+        "description": "Facilities including but not limited to transportation-related facilities, hotels, and/or other facilities",
+    },
     "SPC": {
         "expanded_name": "Service Processing Center",
         "description": "A facility owned by the government and staffed by a combination of federal and contract employees.",
     },
+    "Unknown": {
+        "expanded_name": "Unknown",
+        "description": "A facility whose type could not be identified",
+    },
     "USMS": {
         "expanded_name": "United States Marshals Service",
         "description": "A facility primarily contracted with the USMS for housing of USMS detainees, in which ICE contracts with the USMS for bed space.",
@@ -82,10 +90,6 @@
         "expanded_name": "United States Marshals Service Contract Detention Facility",
         "description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
     },
-    "CDF": {
-        "expanded_name": "Contract Detention Facility",
-        "description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
-    },
     "Staging": {
         "description": "Some facilities in the ICE spreadsheet are marked 'Staging'. Hard to determine why.",
         "expanded_name": "Staging",
@@ -131,6 +135,7 @@
     repair_locality,  # noqa: F401
     repair_street,  # noqa: F401
     repair_zip,  # noqa: F401
+    repair_name,  # noqa: F401
     special_facilities,  # noqa: F401
     update_facility,  # noqa: F401
 )

diff --git a/ice_scrapers/custom_facilities.py b/ice_scrapers/custom_facilities.py
@@ -3,7 +3,7 @@
 """
 Handle manually discovered/managed facilities
 """
-custom_facilities = {
+custom_facilities: dict = {
     "2309 North Highway 83,McCook,NE,69001": {
         "_repaired_record": False,
         "address": {

diff --git a/ice_scrapers/facilities_scraper.py b/ice_scrapers/facilities_scraper.py
@@ -7,6 +7,7 @@
     repair_locality,
     repair_street,
     repair_zip,
+    repair_name,
     special_facilities,
     update_facility,
 )
@@ -29,6 +30,7 @@ def scrape_facilities(facilities_data: dict) -> dict:
     facilities_data["scraped_date"] = datetime.datetime.now(datetime.UTC)
     urls = get_ice_scrape_pages(base_scrape_url)
 
+    scraped_count = 0
     for page_num, url in enumerate(urls):
         logger.info("Scraping page %s/%s...", page_num + 1, len(urls))
         try:
@@ -37,6 +39,7 @@ def scrape_facilities(facilities_data: dict) -> dict:
             logger.error("Error scraping page %s: %s", page_num + 1, e)
         logger.debug("Found %s facilities on page %s", len(facilities), page_num + 1)
         time.sleep(1)  # Be respectful to the server
+        scraped_count += len(facilities)
         for facility in facilities:
             facility = special_facilities(facility)
             addr = facility["address"]
@@ -52,6 +55,10 @@ def scrape_facilities(facilities_data: dict) -> dict:
             if cleaned:
                 addr["locality"] = locality
                 facility["_repaired_record"] = True
+            name, cleaned = repair_name(facility["name"], addr["locality"])
+            if cleaned:
+                facility["name"] = name
+                facility["_repaired_record"] = True
             full_address = ",".join([street, locality, addr["administrative_area"], zcode]).upper()
             if not facility["address_str"]:
                 facility["address_str"] = full_address
@@ -73,12 +80,12 @@ def scrape_facilities(facilities_data: dict) -> dict:
                 facilities_data["facilities"][facility["name"]] = facility  # type: ignore [index]
 
     facilities_data["scrape_runtime"] = time.time() - start_time
-    logger.info("Total facilities scraped: %s", len(list(facilities_data["facilities"].keys())))  # type: ignore [attr-defined]
+    logger.info("Total facilities scraped: %s", scraped_count)
     logger.info(" Completed in %s seconds", facilities_data["scrape_runtime"])
     return facilities_data
 
 
-def _scrape_updated(url: str):
+def _scrape_updated(url: str) -> datetime.datetime:
     """
     Scrape url to get "last updated" time
     Is specifically oriented around ice.gov facility pages
@@ -92,7 +99,7 @@ def _scrape_updated(url: str):
         response.raise_for_status()
     except Exception as e:
         logger.error("  Error parsing %s: %s", url, e)
-        return []
+        return datetime.datetime.strptime(default_timestamp, timestamp_format)
     soup = BeautifulSoup(response.content, "html.parser")
     times = soup.findAll("time")
     if not times:
@@ -176,7 +183,6 @@ def _scrape_page(page_url: str) -> list:
             facilities.append(facility_data)
 
     logger.info("  Extracted %s facilities from page", len(facilities))
-
     return facilities
 
 

diff --git a/ice_scrapers/general.py b/ice_scrapers/general.py
@@ -9,9 +9,9 @@
 from schemas import facilities_schema
 
 
-def facilities_scrape_wrapper() -> dict:
+def facilities_scrape_wrapper(keep_sheet: bool = True, force_download: bool = True) -> dict:
     facilities_data = copy.deepcopy(facilities_schema)
-    facilities = load_sheet()
+    facilities = load_sheet(keep_sheet, force_download)
     facilities_data["facilities"] = copy.deepcopy(facilities)
     facilities_data = scrape_facilities(facilities_data)
     field_offices = scrape_field_offices()