Skip to content

Commit 99c385d

Browse files
authored
Merge pull request #41 from johnseekins/additional-stats Some additional statistics in the data model
Some additional statistics in the data model
2 parents b1bf3a0 + 6d290af commit 99c385d

File tree

9 files changed

+796
-487
lines changed

9 files changed

+796
-487
lines changed

.github/dependabot.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,12 @@ updates:
55
directory: "/"
66
schedule:
77
interval: "weekly"
8+
9+
- package-ecosystem: "github-actions"
10+
directory: "/"
11+
schedule:
12+
interval: "monthly"
13+
open-pull-requests-limit: 1
14+
groups:
15+
actions:
16+
dependency-type: "production"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2+
name: Ensure Pre-commit services are updated (if possible)
3+
4+
on:
5+
workflow_dispatch: {}
6+
schedule:
7+
- cron: '0 0 1 * *' # Run on midnight on the first of the month
8+
9+
permissions:
10+
pull-requests: write
11+
contents: write
12+
13+
concurrency:
14+
group: precommit-updates
15+
cancel-in-progress: true
16+
17+
jobs:
18+
auto-update:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v5
22+
- id: file-check
23+
run: |
24+
files=$(find . -type f -name ".pre-commit-config.yaml" -print0 | xargs)
25+
if [[ -s "${files}" ]]; then
26+
echo "files_exist=true" >> "$GITHUB_OUTPUT"
27+
else
28+
echo "files_exist=false" >> "$GITHUB_OUTPUT"
29+
fi
30+
# selfhosted nodes have precommit installed by default
31+
- name: Run pre-commit autoupdate
32+
if: steps.file-check.outputs.files_exist == 'true'
33+
run: |
34+
sudo apt-get install -yqq python3-pip python3-wheel
35+
pip3 install -q --disable-pip-version-check pre-commit
36+
pre-commit autoupdate
37+
- name: Create Pull Request
38+
if: steps.file-check.outputs.files_exist == 'true'
39+
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e
40+
with:
41+
token: ${{ github.token }}
42+
branch: update/pre-commit-autoupdate
43+
title: Auto-update pre-commit hooks
44+
commit-message: ND - Auto-update pre-commit hooks
45+
body: |
46+
Update versions of tools in pre-commit
47+
configs to latest version
48+
labels: dependencies
49+
delete-branch: true
50+
sign-commits: true

default_data.py

Lines changed: 699 additions & 483 deletions
Large diffs are not rendered by default.

ice_scrapers/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@
3636
"Last Final Rating",
3737
]
3838

39+
ice_inspection_types = {
40+
# found in https://www.ice.gov/foia/odo-facility-inspections
41+
"ODO": "Office of Detention Oversight",
42+
# found in https://ia803100.us.archive.org/16/items/6213032-ORSA-MOU-ICE/6213032-ORSA-MOU-ICE_text.pdf
43+
"ORSA": "Operational Review Self-Assessment",
44+
}
45+
3946
# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
4047
ice_facility_types = {
4148
"BOP": {

ice_scrapers/field_offices.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _extract_single_office(element: BeautifulSoup, page_url: str) -> dict:
152152
office["email"] = email[0]["href"].split(":", 1)[1]
153153
detail_txt = details.text # type: ignore [union-attr]
154154
logger.debug("Detail text: %s", detail_txt)
155-
aor_match = re.search(r"Area of Responsibility:(.+)Email", detail_txt)
155+
aor_match = re.search(r"Area of Responsibility:(.+)\n?Email", detail_txt)
156156
if aor_match:
157157
office["aor"] = aor_match.group(1).strip().replace("\xa0", " ")
158158

ice_scrapers/spreadsheet_load.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
clean_street,
1313
facility_sheet_header,
1414
ice_facility_types,
15+
ice_inspection_types,
1516
repair_zip,
1617
repair_locality,
1718
)
@@ -99,10 +100,15 @@ def load_sheet(keep_sheet: bool = True) -> dict:
99100
details["address"]["postal_code"] = zcode
100101
details["address"]["street"] = street
101102
details["name"] = row["Name"]
103+
104+
# population statistics
102105
details["population"]["male"]["criminal"] = row["Male Crim"]
103106
details["population"]["male"]["non_criminal"] = row["Male Non-Crim"]
104107
details["population"]["female"]["criminal"] = row["Female Crim"]
105108
details["population"]["female"]["non_criminal"] = row["Female Non-Crim"]
109+
details["population"]["total"] = (
110+
row["Male Crim"] + row["Male Non-Crim"] + row["Female Crim"] + row["Female Non-Crim"]
111+
)
106112
if row["Male/Female"]:
107113
if "/" in row["Male/Female"]:
108114
details["population"]["female"]["allowed"] = True
@@ -117,6 +123,15 @@ def load_sheet(keep_sheet: bool = True) -> dict:
117123
"level_3": row["ICE Threat Level 3"],
118124
"none": row["No ICE Threat Level"],
119125
}
126+
"""
127+
# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx 2025-09-22
128+
Upon admission and periodically thereafter, detainees are categorized into a security level based on a variety of public safety factors, and are housed accordingly. Factors include prior convictions, threat risk, disciplinary record, special vulnerabilities, and special management concerns. Detainees are categorized into one of four classes of security risk: A/low, B/medium low, C/medium high, and D/high.
129+
"""
130+
details["population"]["security_threat"]["low"] = row["Level A"]
131+
details["population"]["security_threat"]["medium_low"] = row["Level B"]
132+
details["population"]["security_threat"]["medium_high"] = row["Level C"]
133+
details["population"]["security_threat"]["high"] = row["Level D"]
134+
120135
details["facility_type"] = {
121136
"id": row["Type Detailed"],
122137
"housing": {
@@ -130,6 +145,8 @@ def load_sheet(keep_sheet: bool = True) -> dict:
130145
details["facility_type"]["expanded_name"] = ft_details["expanded_name"]
131146
details["avg_stay_length"] = row["FY25 ALOS"]
132147
details["inspection"] = {
148+
# fall back to type code
149+
"last_type": ice_inspection_types.get(row["Last Inspection Type"], row["Last Inspection Type"]),
133150
"last_date": row["Last Inspection End Date"],
134151
"last_rating": row["Last Final Rating"],
135152
}

ice_scrapers/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def repair_zip(zip_code: int, locality: str) -> Tuple[str, bool]:
135135
"""
136136
zcode = str(zip_code)
137137
cleaned = False
138-
if len(zcode) == 4:
139-
zcode = f"0{zcode}"
138+
if len(zcode) < 5:
139+
# pad any prefix
140+
zeros = "0" * (5 - len(zcode))
141+
zcode = f"{zeros}{zcode}"
140142
return zcode, cleaned
141143
matches = [
142144
{"match": "89512", "replace": "89506", "locality": "Reno"},

schemas.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@
7979
"level_3": 0,
8080
"none": 0,
8181
},
82+
"total": 0,
83+
"security_threat": {
84+
"low": 0,
85+
"medium_low": 0,
86+
"medium_high": 0,
87+
"high": 0,
88+
},
8289
},
8390
"facility_type": {
8491
"id": "",
@@ -90,6 +97,7 @@
9097
},
9198
},
9299
"inspection": {
100+
"last_type": "",
93101
"last_date": None,
94102
"last_rating": "",
95103
},

utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def _flatdict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
4040
"""flatten a nested dictionary for nicer printing to workbooks (excel/csv/etc.)"""
4141
items: list = []
4242
for k, v in d.items():
43-
new_key = parent_key + sep + str(k) if parent_key else str(k)
43+
new_key = f"{parent_key}{sep}{str(k)}" if parent_key else str(k)
4444
if isinstance(v, dict):
4545
items.extend(_flatdict(v, new_key, sep=sep).items())
4646
else:

0 commit comments

Comments
 (0)