Skip to content

Commit a03d96b

Browse files
authored
Merge pull request #38 from johnseekins/scraper-breakout Scraper function break up code into multiple files.
Scraper function break up
2 parents 581b8b1 + 9c790fd commit a03d96b

File tree

12 files changed

+1028
-970
lines changed

12 files changed

+1028
-970
lines changed

default_data.py

Lines changed: 0 additions & 40 deletions
Large diffs are not rendered by default.

field_offices.py

Lines changed: 0 additions & 179 deletions
This file was deleted.

ice_scrapers/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# ICE Facility scrapers
2+
3+
These files maintain the code to collect (and collate) ICE facility data from a number of sources.
4+
5+
## utils.py
6+
7+
Contains most of our collating functions and shared functions that scrapers may need.
8+
9+
## __init__.py
10+
11+
Contains some static objects and import declarations (so we can `from ice_scrapers import` successfully)...
12+
13+
## spreadsheet_load.py
14+
15+
ICE is required by law to produce regular custody data. We can pull that data from here `https://www.ice.gov/detain/detention-management`. Because this spreadsheet is more "complete" than other sources we've found, we use it as our base scrape.
16+
17+
## facilities_scraper.py
18+
19+
Pulls information about ICE detention facilities from `https://www.ice.gov/detention-facilities`. This can add additional (or corrected) data about facilities locations, contact information, and provides facility images.
20+
21+
## field_offices.py
22+
23+
Collects additional data about ICE/DHS field offices from `https://www.ice.gov/contact/field-offices`. Largely basic areas of responsibility and contact info for the field office.
24+
25+
> The field-offices page shows information about a number of different offices. As we are largely focused on detention, ERO (Eforcement and Removal Operations) centers are the most interesting.

ice_scrapers/__init__.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
Import order here is a touch weird, but we need it so
3+
types exist before attempting to import functions that
4+
may call them
5+
"""
6+
7+
# extracted ADP sheet header list 2025-09-07
8+
facility_sheet_header = [
9+
"Name",
10+
"Address",
11+
"City",
12+
"State",
13+
"Zip",
14+
"AOR",
15+
"Type Detailed",
16+
"Male/Female",
17+
"FY25 ALOS",
18+
"Level A",
19+
"Level B",
20+
"Level C",
21+
"Level D",
22+
"Male Crim",
23+
"Male Non-Crim",
24+
"Female Crim",
25+
"Female Non-Crim",
26+
"ICE Threat Level 1",
27+
"ICE Threat Level 2",
28+
"ICE Threat Level 3",
29+
"No ICE Threat Level",
30+
"Mandatory",
31+
"Guaranteed Minimum",
32+
"Last Inspection Type",
33+
"Last Inspection End Date",
34+
"Pending FY25 Inspection",
35+
"Last Inspection Standard",
36+
"Last Final Rating",
37+
]
38+
39+
# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
40+
ice_facility_types = {
41+
"BOP": {
42+
"expanded_name": "Federal Bureau of Prisons",
43+
"description": "A facility operated by the Federal Bureau of Prisons",
44+
},
45+
"DIGSA": {
46+
"expanded_name": "Dedicated Intergovernmental Service Agreement",
47+
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Inter-governmental Service Agreements, which house only ICE detainees – typically these are operated by private contractors pursuant to their agreements with local governments.",
48+
},
49+
"IGSA": {
50+
"expanded_name": "Intergovernmental Service Agreement",
51+
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts for bed space via an Intergovernmental Service Agreement; or local jails used by ICE pursuant to Inter-governmental Service Agreements, which house both ICE and non-ICE detainees, typically county prisoners awaiting trial or serving short sentences, but sometimes also USMS prisoners.",
52+
},
53+
"SPC": {
54+
"expanded_name": "Service Processing Center",
55+
"description": "A facility owned by the government and staffed by a combination of federal and contract employees.",
56+
},
57+
"USMS": {
58+
"expanded_name": "United States Marshals Service",
59+
"description": "A facility primarily contracted with the USMS for housing of USMS detainees, in which ICE contracts with the USMS for bed space.",
60+
},
61+
# two keys for the same thing as it isn't consistently defined
62+
"USMSIGA": {
63+
"expanded_name": "United States Marshal Service Intergovernmental Agreement",
64+
"description": "A USMS Intergovernmental Agreement in which ICE agrees to utilize an already established US Marshal Service contract.",
65+
},
66+
"USMS IGA": {
67+
"expanded_name": "United States Marshal Service Intergovernmental Agreement",
68+
"description": "A USMS Intergovernmental Agreement in which ICE agrees to utilize an already established US Marshal Service contract.",
69+
},
70+
"USMS CDF": {
71+
"expanded_name": "United States Marshal Service Contract Detention Facility",
72+
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
73+
},
74+
"CDF": {
75+
"expanded_name": "Contract Detention Facility",
76+
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
77+
},
78+
}
79+
80+
# ICE AOR mappings
81+
area_of_responsibility = {
82+
"ATL": "Atlanta Field Office",
83+
"BAL": "Baltimore Field Office",
84+
"BOS": "Boston Field Office",
85+
"BUF": "Buffalo Field Office",
86+
"CHI": "Chicago Field Office",
87+
"DAL": "Dallas Field Office",
88+
"DEN": "Denver Field Office",
89+
"DET": "Detroit Field Office",
90+
"ELP": "El Paso Field Office",
91+
"HLG": "Harlingen Field Office",
92+
"HOU": "Houston Field Office",
93+
"LOS": "Los Angeles Field Office",
94+
"MIA": "Miami Field Office",
95+
"NEW": "Newark Field Office",
96+
"NOL": "New Orleans Field Office",
97+
"NYC": "New York City Field Office",
98+
"PHI": "Philadelphia Field Office",
99+
"PHO": "Phoenix Field Office",
100+
"SEA": "Seattle Field Office",
101+
"SFR": "San Francisco Field Office",
102+
"SLC": "Salt Lake City Field Office",
103+
"SNA": "San Antonio Field Office",
104+
"SND": "San Diego Field Office",
105+
"SPM": "St Paul Field Office",
106+
"WAS": "Washington Field Office",
107+
}
108+
field_office_to_aor = {v: k for k, v in area_of_responsibility.items()}
109+
110+
from .utils import ( # noqa: E402
111+
clean_street, # noqa: F401
112+
get_ice_scrape_pages, # noqa: F401
113+
repair_zip, # noqa: F401
114+
repair_locality, # noqa: F401
115+
update_facility, # noqa: F401
116+
)
117+
from .facilities_scraper import scrape_facilities # noqa: F401,E402
118+
from .spreadsheet_load import load_sheet # noqa: F401,E402
119+
from .field_offices import ( # noqa: E402
120+
merge_field_offices, # noqa: F401
121+
scrape_field_offices, # noqa: F401
122+
)

0 commit comments

Comments
 (0)