Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
dae7be8
add additional facility types and groupings from Vera
johnseekins Sep 23, 2025
e42f685
start playing with vera.org facility data
johnseekins Sep 23, 2025
6e0648e
fail load on empty sheet
johnseekins Sep 23, 2025
ce87a69
rough pass at adding vera data
johnseekins Sep 24, 2025
601cf57
clean up vera collection and start improving matching
johnseekins Sep 24, 2025
f7c00b4
more Vera matching and store vera facility ID
johnseekins Sep 24, 2025
739e6f7
fix city occasionally, too
johnseekins Sep 24, 2025
c6fe71f
one more match
johnseekins Sep 24, 2025
b48a3cd
fix typo
johnseekins Sep 24, 2025
8a13953
increase matching
johnseekins Sep 25, 2025
7045503
even more matching
johnseekins Sep 25, 2025
219d991
another facility type match
johnseekins Sep 25, 2025
e8c287d
even more matching
johnseekins Sep 25, 2025
f8c1473
possibly the last of the matches
johnseekins Sep 25, 2025
cded7f9
actually add vera url when we update records
johnseekins Sep 25, 2025
886b907
only one job needs facility_sheet_header, keep it with that job
johnseekins Sep 25, 2025
23886dc
update group mappings
johnseekins Sep 25, 2025
3f995ef
fix conflicts and matching for JTF again
johnseekins Sep 29, 2025
e5e8ad6
more vera matching, skip enrichment when it's only vera
johnseekins Sep 30, 2025
5c09f41
more matching
johnseekins Sep 30, 2025
fd6cd40
track repairs with vera data
johnseekins Sep 30, 2025
c135509
slightly more matching
johnseekins Sep 30, 2025
27c0213
slightly restrict zip generation
johnseekins Oct 1, 2025
0c25f4a
slightly nicer typing
johnseekins Oct 1, 2025
95642da
add skip-vera switch
johnseekins Oct 3, 2025
d158481
add tool to find facilities not getting a vera.org ID
johnseekins Oct 4, 2025
2466b23
don't overwrite existing lat/long/etc. unless we have new values
johnseekins Oct 5, 2025
88fbd69
only enrich if there's data to enrich
johnseekins Oct 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion default_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"https://www.ice.gov/detention-facilities?page=0&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/baker-county-facility",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -131,6 +132,7 @@
"total": 1.916666666666666,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -200,6 +202,7 @@
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/northwest-ice-processing-center-nwipc",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -261,6 +264,7 @@
"total": 2.4642857142857095,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -326,6 +330,7 @@
"total": 5.038690476190489,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -395,6 +400,7 @@
"https://www.ice.gov/detention-facilities?page=3&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/grayson-county-detention-center",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -460,6 +466,7 @@
"total": 16.732142857143007,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -525,6 +532,7 @@
"total": 20.55952380952385,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -594,6 +602,7 @@
"https://www.ice.gov/detention-facilities?page=5&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/san-luis-regional-detention-center",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand All @@ -608,7 +617,7 @@
},
"address_str": "409 FM 1144,KARNES CITY,TX,78118",
"facility_type": {
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees \u2013 typically these are operated by private contractors pursuant to their agreements with local governments.",
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees. Typically these are operated by private contractors pursuant to their agreements with local governments.",
"expanded_name": "Dedicated Intergovernmental Service Agreement",
"id": "DIGSA",
},
Expand Down Expand Up @@ -663,6 +672,7 @@
"https://www.ice.gov/detention-facilities?page=3&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/karnes-county-ipc",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -728,6 +738,7 @@
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/delaney-hall-detention-facility",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -793,6 +804,7 @@
"total": 28.62202380952395,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -862,6 +874,7 @@
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/moshannon-valley-processing-center",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -919,6 +932,7 @@
"total": 13.041666666666726,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -984,6 +998,7 @@
"https://www.ice.gov/detention-facilities?page=0&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/butler-county-sheriffs-office",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -1053,6 +1068,7 @@
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/phelps-county-jail",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -1122,6 +1138,7 @@
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/laredo-detention-center",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -1187,6 +1204,7 @@
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/fort-bliss-detention-facility",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -1235,6 +1253,7 @@
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
"https://www.ice.gov/detain/detention-facilities/naval-station-guantanamo-bay",
],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down Expand Up @@ -1296,6 +1315,7 @@
"total": 1.5208333333333308,
},
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
"vera_id": "",
"wikidata": {"page_url": "", "search_query": ""},
"wikipedia": {"page_url": "", "search_query": ""},
},
Expand Down
42 changes: 38 additions & 4 deletions ice_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,57 @@
"ORSA": "Operational Review Self-Assessment",
}

# extracted from https://vera-institute.files.svdcdn.com/production/downloads/dashboard_appendix.pdf 2025-09-23
ice_facility_group_mapping = {
"Non-Dedicated": ["IGSA"],
"Dedicated": ["DIGSA", "CDF", "SPC"],
"Federal": ["BOF", "USMSIGA", "USMS IGA", "USMS CDF", "DOD", "MOC"],
"Hold/Staging": ["Hold", "Staging"],
"Family/Youth": ["Family", "Juvenile"],
"Medical": ["Hospital"],
"Hotel": ["Hotel"],
"Other/Unknown": ["Other", "Unknown"],
}

# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
# and https://vera-institute.files.svdcdn.com/production/downloads/dashboard_appendix.pdf 2025-09-23
ice_facility_types = {
"BOP": {
"expanded_name": "Federal Bureau of Prisons",
"description": "A facility operated by the Federal Bureau of Prisons",
},
"CDF": {
"expanded_name": "Contract Detention Facility",
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
},
"DIGSA": {
"expanded_name": "Dedicated Intergovernmental Service Agreement",
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees – typically these are operated by private contractors pursuant to their agreements with local governments.",
},
"Family": {
"expanded_name": "Family",
"description": "A facility in which families are able to remain together while awaiting their proceedings",
},
"Hospital": {
"expanded_name": "Hospital",
"description": "A medical facility",
},
"IGSA": {
"expanded_name": "Intergovernmental Service Agreement",
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts for bed space via an Intergovernmental Service Agreement; or local jails used by ICE pursuant to Intergovernmental Service Agreements, which house both ICE and non-ICE detainees, typically county prisoners awaiting trial or serving short sentences, but sometimes also USMS prisoners.",
},
"Juvenile": {
"expanded_name": "Juvenile",
"description": "An IGSA facility capable of housing juveniles (separate from adults) for a temporary period of time",
},
"Other": {
"expanded_name": "Other",
"description": "Facilities including but not limited to transportation-related facilities, hotels, and/or other facilities",
},
"Unknown": {
"expanded_name": "Unknown",
"description": "A facility who's type could not be identified",
},
"SPC": {
"expanded_name": "Service Processing Center",
"description": "A facility owned by the government and staffed by a combination of federal and contract employees.",
Expand All @@ -78,10 +115,6 @@
"expanded_name": "United States Marshals Service Contract Detention Facility",
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
},
"CDF": {
"expanded_name": "Contract Detention Facility",
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
},
"Staging": {
"description": "Some facilities in the ICE spreadsheet are marked 'Staging'. Hard to determine why.",
"expanded_name": "Staging",
Expand Down Expand Up @@ -135,5 +168,6 @@
merge_field_offices, # noqa: F401
scrape_field_offices, # noqa: F401
)
from .vera_data import collect_vera_facility_data # noqa: F401,E402
from .custom_facilities import insert_additional_facilities # noqa: F401,E402
from .general import facilities_scrape_wrapper # noqa: F401,E402
6 changes: 4 additions & 2 deletions ice_scrapers/general.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
from ice_scrapers import (
collect_vera_facility_data,
insert_additional_facilities,
load_sheet,
merge_field_offices,
Expand All @@ -9,10 +10,11 @@
from schemas import facilities_schema


def facilities_scrape_wrapper() -> dict:
def facilities_scrape_wrapper(keep_sheet: bool = True, force_download: bool = True) -> dict:
facilities_data = copy.deepcopy(facilities_schema)
facilities = load_sheet()
facilities = load_sheet(keep_sheet, force_download)
facilities_data["facilities"] = copy.deepcopy(facilities)
facilities_data = collect_vera_facility_data(facilities_data, keep_sheet, force_download)
facilities_data = scrape_facilities(facilities_data)
field_offices = scrape_field_offices()
facilities_data = merge_field_offices(facilities_data, field_offices)
Expand Down
25 changes: 13 additions & 12 deletions ice_scrapers/spreadsheet_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
filename = f"{SCRIPT_DIR}{os.sep}detentionstats.xlsx"


def _download_sheet(keep_sheet: bool = True) -> Tuple[polars.DataFrame, str]:
def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> Tuple[polars.DataFrame, str]:
"""Download the detention stats sheet from ice.gov"""
resp = session.get(base_xlsx_url, timeout=120)
resp.raise_for_status()
Expand All @@ -49,19 +49,20 @@ def _download_sheet(keep_sheet: bool = True) -> Tuple[polars.DataFrame, str]:
actual_link = link["href"]
# this seems like tracking into the future...
cur_year = year

logger.debug("Found sheet at: %s", actual_link)
logger.info("Downloading detention stats sheet from %s", actual_link)
resp = session.get(actual_link, timeout=120, stream=True)
size = len(resp.content)
with open(filename, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
logger.debug("Wrote %s byte sheet to %s", size, filename)
if force_download or not os.path.exists(filename):
logger.info("Downloading detention stats sheet from %s", actual_link)
resp = session.get(actual_link, timeout=120, stream=True)
size = len(resp.content)
with open(filename, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
logger.debug("Wrote %s byte sheet to %s", size, filename)
df = polars.read_excel(
drop_empty_rows=True,
has_header=False,
raise_if_empty=True,
# because we're manually defining the header...
read_options={"skip_rows": 7, "column_names": facility_sheet_header},
sheet_name=f"Facilities FY{cur_year}",
Expand All @@ -72,8 +73,8 @@ def _download_sheet(keep_sheet: bool = True) -> Tuple[polars.DataFrame, str]:
return df, actual_link


def load_sheet(keep_sheet: bool = True) -> dict:
df, sheet_url = _download_sheet(keep_sheet)
def load_sheet(keep_sheet: bool = True, force_download: bool = True) -> dict:
df, sheet_url = _download_sheet(keep_sheet, force_download)
"""Convert the detentionstats sheet data into something we can update our facilities with"""
results: dict = {}
# occassionally a phone number shows up in weird places in the spreadsheet.
Expand Down
Loading