Skip to content

Commit c6d18e3

Browse files
authored
Move to new complaints dataset and remove references to officers (#13)
1 parent 9374293 commit c6d18e3

File tree

7 files changed

+61
-98
lines changed

7 files changed

+61
-98
lines changed

app/app.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,4 @@ def create_app(config_name="default"):
2929

3030
app.register_blueprint(views)
3131

32-
from app.lookup import find_name, find_serial
33-
34-
app.jinja_env.globals.update(find_name=find_name, find_serial=find_serial)
35-
3632
return app

app/lookup.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Dict, List
44

55
import requests
6-
from flask import current_app, g
6+
from flask import current_app
77

88
from app.utils import Regexps, validate
99

@@ -18,37 +18,21 @@ def csv_to_dict(name: str, url: str) -> Dict[str, str]:
1818
return {serial: name for serial, name, *extra in reader}
1919

2020

21-
def find_serial(uid: str) -> str:
22-
if "uid_map" not in g:
23-
g.uid_map = csv_to_dict("uid", current_app.config.get("UID_CSV_URL"))
24-
return g.uid_map.get(uid, "Unknown")
25-
26-
27-
def find_name(serial: str) -> str:
28-
if "roster_map" not in g:
29-
g.roster_map = csv_to_dict("roster", current_app.config.get("ROSTER_CSV_URL"))
30-
return g.roster_map.get(serial, "Unknown")
31-
32-
3321
@dataclass
3422
class CaseResult:
3523
case_num: str
36-
officers: List[str]
3724
allegations: List[str]
3825
disposition: str
3926

4027

4128
def find_case(case_num: str) -> CaseResult:
4229
rows = requests.get(
43-
f"https://data.seattle.gov/api/id/99yi-dthu.json?$query=select * where (upper(`file_number`) = upper('{case_num}'))"
30+
f"https://data.seattle.gov/api/id/hyay-5x7b.json?$query=select * where (upper(`file_number`) = upper('{case_num}'))"
4431
).json()
4532

4633
if not rows:
4734
return None
4835

49-
officers = {
50-
validate(row["named_employee_id"], Regexps.SERIAL, "Unknown") for row in rows
51-
}
5236
allegations = {
5337
validate(row["allegation"], Regexps.STRING, "Unknown") for row in rows
5438
}
@@ -58,7 +42,6 @@ def find_case(case_num: str) -> CaseResult:
5842

5943
return CaseResult(
6044
case_num,
61-
list(officers),
6245
list(allegations),
6346
"".join(disposition) if len(disposition) < 2 else "Partially Sustained",
6447
)

app/templates/case.html

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,6 @@
2424
<th class="text-end" scope="row">Disposition</th>
2525
<td>{{case.disposition}}</td>
2626
</tr>
27-
<tr>
28-
<th class="text-end" scope="row">Officers</th>
29-
<td>
30-
<ul>
31-
{% for officer in case.officers %}
32-
{% set serial = find_serial(officer) %}
33-
{% if serial == "Unknown" %}
34-
<li>Unknown</li>
35-
{% else %}
36-
<li>
37-
<a class="text-secondary" href="https://openoversight.tech-bloc-sea.dev/department/1?badge={{serial}}">
38-
{{ find_name(serial) }} ({{serial}})
39-
</a>
40-
</li>
41-
{% endif %}
42-
{% endfor %}
43-
</ul>
44-
</td>
45-
</tr>
4627
</tbody>
4728
</table>
4829
{% endblock %}

app/templates/update.j2

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,6 @@
1717
<th class="text-end" scope="row">Published</th>
1818
<td>{{update.event_date}}</td>
1919
</tr>
20-
<tr>
21-
<th class="text-end" scope="row">Involved officers</th>
22-
<td>
23-
<ul>
24-
{% for officer in update.officers %}
25-
{% set serial = find_serial(officer) %}
26-
<li>
27-
{% if serial == 'Unknown' %}
28-
Unknown
29-
{% else %}
30-
<a class="text-secondary" href="https://openoversight.tech-bloc-sea.dev/department/1?badge={{serial}}">
31-
{{ find_name(serial) }} ({{serial}})
32-
</a>
33-
{% endif %}
34-
</li>
35-
{% endfor %}
36-
</ul>
37-
</td>
38-
</tr>
3920
<tr>
4021
<th class="text-end" scope="row">Allegations</th>
4122
<td>

app/updater.py

Lines changed: 58 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def get_update_type(self) -> UpdateType:
3434
return UpdateType.CCS_PUBLISHED
3535

3636
def get_update_url(self, last_update_dt) -> str:
37-
return f"https://data.seattle.gov/api/id/m33m-84uk.json?$query=select * where (`posted_date` > '{last_update_dt.isoformat()}') order by `posted_date` desc"
37+
return f"https://data.seattle.gov/api/id/m33m-84uk.json?$query=select * where (`posted_date` > '{last_update_dt.date().isoformat()}') order by `posted_date` desc"
3838

3939
def process_case(self, case, update_dt) -> Update:
4040
# Response:
@@ -66,12 +66,12 @@ def process_case(self, case, update_dt) -> Update:
6666

6767
result = find_case(update.case_num)
6868
if result:
69-
update.officers = result.officers
7069
update.allegations = result.allegations
7170
else:
72-
update.officers = []
7371
update.allegations = []
7472

73+
update.officers = []
74+
7575
return update
7676

7777
def process(self, data, update_dt) -> List[Update]:
@@ -83,60 +83,85 @@ def get_update_type(self) -> UpdateType:
8383
return UpdateType.COMPLAINT_FILED
8484

8585
def get_update_url(self, last_update_dt) -> str:
86-
return f"https://data.seattle.gov/api/id/pafy-bfmu.json?$query=select * where ((`task_creation_date` > '{last_update_dt.isoformat()}') and (upper(`status_description`) = upper('OPA Intake'))) order by `task_creation_date` desc"
86+
return f"https://data.seattle.gov/api/id/hyay-5x7b.json?$query=select * where (`received_date` > '{last_update_dt.date().isoformat()}') order by `received_date` desc"
8787

88-
def process_complaint(self, complaint, update_dt) -> Update:
88+
def process_complaint(self, case_num, rows, update_dt) -> Update:
8989
# Response:
9090
# [
9191
# {
92-
# "opa_case_number":"2022OPA-0134",
93-
# "status":"Done",
94-
# "status_description":"OPA Intake",
95-
# "due_date":"2022-05-20T00:00:00.000",
96-
# "completed_date":"2022-05-04T00:00:00.000",
97-
# "task_creation_date":"2022-05-03T00:00:00.000",
98-
# "due_date_2":"2022-05-20T00:00:00.000",
99-
# "completed_date_2":"2022-05-04T00:00:00.000",
100-
# "task_creation_date_2":"2022-05-03T00:00:00.000",
101-
# "currentstatus":"Done"
92+
# "unique_id":"65217-98589-69722-1595-26426",
93+
# "file_number":"2021OPA-0452",
94+
# "incident_number":"65217",
95+
# "occurred_date":"2021-10-03T00:00:00.000",
96+
# "received_date":"2021-10-04T00:00:00.000",
97+
# "incident_precinct":"-",
98+
# "incident_beat":"-",
99+
# "source":"SPD - Forwarded",
100+
# "incident_type":"OPA Investigation",
101+
# "allegation":"Professionalism",
102+
# "disposition":"-",
103+
# "discipline":"-",
104+
# "named_employee_id":"1595",
105+
# "named_employee_race":"Asian",
106+
# "named_employee_gender":"M",
107+
# "named_employee_age_at":"38",
108+
# "named_employee_title_at":"ACTING POLICE SERGEANT",
109+
# "named_employee_squad_at":"SOUTH PCT 3RD W - R/S RELIEF",
110+
# "complainant_number":"26426",
111+
# "complainant_gender":"Male",
112+
# "complainant_race":"White",
113+
# "complainant_age_complaint":"34",
114+
# "case_status":"Active",
115+
# "finding":"-",
116+
# "investigation_begin_date":"2021-10-03T00:00:00.000",
117+
# "investigation_end_date":"2022-05-01T00:00:00.000"
102118
# },
103119
# ...
120+
# ]
121+
officers = set()
122+
allegations = set()
123+
disposition = set()
124+
125+
for row in rows:
126+
allegations.add(validate(row["allegation"], Regexps.STRING, "Unknown"))
127+
disposition.add(validate(row["disposition"], Regexps.STRING, "Unknown"))
128+
104129
update = Update()
105-
update.case_num = validate(
106-
complaint["opa_case_number"], Regexps.CASE_NUM, "Invalid case number"
107-
)
130+
update.allegations = list(allegations)
131+
update.case_num = validate(case_num, Regexps.CASE_NUM, "Invalid case number")
108132
update.create_date = update_dt
109133
update.event_date = parser.parse(
110-
validate(
111-
complaint["task_creation_date"],
112-
Regexps.TIMESTAMP,
113-
"1970-01-01T00:00:00",
114-
)
134+
validate(rows[0]["received_date"], Regexps.TIMESTAMP, None)
115135
)
136+
update.officers = list(officers)
116137
update.type = self.get_update_type()
117138

118-
result = find_case(update.case_num)
119-
if result:
120-
update.allegations = result.allegations
121-
update.disposition = result.disposition
122-
update.officers = result.officers
139+
if len(disposition) == 1:
140+
update.disposition = "".join(disposition)
123141
else:
124-
update.allegations = []
125-
update.disposition = ""
126-
update.officers = []
142+
update.disposition = "Partially Sustained"
127143

128144
return update
129145

130146
def process(self, data, update_dt) -> List[Update]:
131-
return [self.process_complaint(complaint, update_dt) for complaint in data]
147+
# Since this dataset lists one allegation per row, we need to aggregate by case number
148+
def key_by_case(d):
149+
return d["file_number"]
150+
151+
data = sorted(data, key=key_by_case)
152+
cases = {k: list(v) for k, v in itertools.groupby(data, key=key_by_case)}
153+
return [
154+
self.process_complaint(case_num, rows, update_dt)
155+
for case_num, rows in cases.items()
156+
]
132157

133158

134159
class ClosedInvestigationUpdater(Updater):
135160
def get_update_type(self) -> UpdateType:
136161
return UpdateType.INVESTIGATION_CLOSED
137162

138163
def get_update_url(self, last_update_dt) -> str:
139-
return f"https://data.seattle.gov/api/id/99yi-dthu.json?$query=select * where (`investigation_end_date` > '{last_update_dt.isoformat()}') order by `investigation_end_date` desc"
164+
return f"https://data.seattle.gov/api/id/hyay-5x7b.json?$query=select * where (`investigation_end_date` > '{last_update_dt.date().isoformat()}') order by `investigation_end_date` desc"
140165

141166
def process_case(self, case_num, rows, update_dt) -> Update:
142167
# Response:
@@ -176,7 +201,6 @@ def process_case(self, case_num, rows, update_dt) -> Update:
176201
disposition = set()
177202

178203
for row in rows:
179-
officers.add(validate(row["named_employee_id"], Regexps.SERIAL, "Unknown"))
180204
allegations.add(validate(row["allegation"], Regexps.STRING, "Unknown"))
181205
disposition.add(validate(row["disposition"], Regexps.STRING, "Unknown"))
182206

app/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ class Regexps:
55
CASE_NUM = "\\d{4}OPA-\\d{4}"
66
CCS_DISPOSITION = "((No|All) Allegations Sustained|Partially Sustained|-)"
77
CCS_URL = "https://www.seattle.gov/Documents/Departments/OPA/ClosedCaseSummaries/\\d{4}OPA-\\d{4}ccs\\d{4,10}.pdf"
8-
SERIAL = "\\d{1,4}"
98
STRING = "[\\w \\-,]{1,255}"
109
TIMESTAMP = "\\d{4}(-\\d{2}){2}T\\d{2}(:\\d{2}){2}(.\\d{3})?"
1110

tests/test_updater.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ def test_do_update_failed(flask, db):
112112
@pytest.mark.acceptance
113113
@pytest.mark.parametrize("updater", [updater for (updater, _) in app.updater.updaters])
114114
def test_updater(flask, db, updater):
115-
print(type(updater))
116115
refresh_date = datetime.now()
117-
updates = updater.update(refresh_date - timedelta(weeks=3), refresh_date)
116+
updates = updater.update(refresh_date - timedelta(weeks=10), refresh_date)
118117
assert len(updates) > 0

0 commit comments

Comments
 (0)