Open-Security-Mapping-Project · johnseekins · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 30, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,54 +7,75 @@ on:
 
 jobs:
   python-lint:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-slim
     steps:
       - name: checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
       - name: setup python
         run: |
           curl -SsL https://mise.run | bash > /dev/null
-          mise trust --quiet .config/mise.toml
-          mise install --quiet
-          eval "$(mise activate bash)" > /dev/null
+          ~/.local/bin/mise trust --quiet .config/mise.toml
+          ~/.local/bin/mise install --quiet python
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
           pip install -q --no-cache-dir --upgrade pip wheel uv
           uv sync
       - name: Ruff Format
         run: |
-          eval "$(mise activate bash)"
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
           uv run ruff format --check
       - name: Ruff Check
         run: |
-          eval "$(mise activate bash)"
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
           uv run ruff check
       - name: run mypy
         run: |
-          eval "$(mise activate bash)"
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
           uv run mypy .
 
   markdown:
     name: Markdown linting
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-slim
     steps:
       - name: Checkout
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
+      - name: setup markdownlint
+        run: |
+          curl -SsL https://mise.run | bash > /dev/null
+          ~/.local/bin/mise trust --quiet .config/mise.toml
+          ~/.local/bin/mise install --quiet markdownlint-cli2
       - name: Markdown linting
-        uses: DavidAnson/markdownlint-cli2-action@992badcdf24e3b8eb7e87ff9287fe931bcb00c6e
-        with:
-          config: .markdownlint.json
+        run: |
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
+          markdownlint-cli2 .
 
   yamllint:
     name: Yaml Linting
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-slim
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+      - name: setup yamllint
+        run: |
+          curl -SsL https://mise.run | bash > /dev/null
+          ~/.local/bin/mise trust --quiet .config/mise.toml
+          ~/.local/bin/mise install --quiet yamllint
+      - name: YAML linting
+        run: |
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
+          yamllint -c .yamllint-config.yaml --no-warnings .
+
+  jsonlint:
+    name: JSON linting
+    runs-on: ubuntu-slim
     steps:
       - name: Checkout
-        uses: actions/checkout@v5
-      - name: yaml lint
-        uses: reviewdog/action-yamllint@f01d8a48fd8d89f89895499fca2cff09f9e9e8c0
-        with:
-          github_token: ${{ github.token }}
-          reporter: github-pr-review
-          fail_level: error
-          level: info
-          # we should consistently scan all files
-          yamllint_flags: "-c .yamllint-config.yaml --no-warnings ."
+        uses: actions/checkout@v6
+      - name: setup jq
+        run: |
+          curl -SsL https://mise.run | bash > /dev/null
+          ~/.local/bin/mise trust --quiet .config/mise.toml
+          ~/.local/bin/mise install --quiet jq
+      - name: JSON linting
+        run: |
+          eval "$(~/.local/bin/mise activate bash)" > /dev/null
+          LOG_FILES=true tools/check-json.sh
diff --git a/.github/workflows/pre-commit-updates.yml b/.github/workflows/pre-commit-updates.yml
diff --git a/.lefthook.yml b/.lefthook.yml
@@ -17,8 +17,9 @@ pre-commit:
       stage_fixed: true
 
     - name: yaml linting
-      run: yamllint -c .yamllint-config.yaml .
+      run: yamllint -c .yamllint-config.yaml {staged_files}
       glob: "*.y*ml"
+      stage_fixed: true
 
     - name: Github Action linting
       run: actionlint

diff --git a/README.md b/README.md
@@ -115,7 +115,7 @@ seems wrong.
 
 ## Contributing & Code Standards
 
-We have a [.lefthook.yml](.lefthook.yml) file which enforces some linting / formatting rules.
+We have a [.lefthook.yml](https://lefthook.dev/) file which enforces some linting / formatting rules.
 
 We also rely on [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) for ensuring python coding standards.
 

diff --git a/enrichers/openstreetmap.py b/enrichers/openstreetmap.py
@@ -15,31 +15,23 @@ def search(self) -> dict:
         search_name = self._clean_facility_name(facility_name)
         search_url = "https://nominatim.openstreetmap.org/search"
         self.resp_info["enrichment_type"] = "openstreetmap"
+        self.resp_info["method"] = "nominatim"
         data = []
         if not address:
             logger.debug("No address for %s, simply searching for name", facility_name)
-            params = {
-                "q": search_name,
-                "format": "json",
-                "limit": 5,
-                "dedupe": 1,
+            search_params = {
+                "simple_name": {
+                    "q": search_name,
+                    "format": "json",
+                    "limit": 5,
+                    "dedupe": 1,
+                }
             }
-            logger.debug("Searching OSM for %s", search_name)
-            self.resp_info["search_query_steps"].append(search_name)  # type: ignore [attr-defined]
-            try:
-                response = self._req(search_url, params=params, timeout=15)
-                logger.debug("Response: %s", response.text)
-                data = response.json()
-            except Exception as e:
-                logger.debug(" OSM search error for '%s': %s", facility_name, e)
-                self.resp_info["search_query_steps"].append(f"(Failed -> {e})")  # type: ignore [attr-defined]
-                return self.resp_info
         else:
             full_address = (
                 f"{address['street']} {address['locality']}, {address['administrative_area']} {address['postal_code']}"
             )
             locality = f"{address['locality']}, {address['administrative_area']} {address['postal_code']}"
-            search_url = "https://nominatim.openstreetmap.org/search"
             search_params = {
                 "facility_name": {
                     "q": f"{search_name} {full_address}",
@@ -60,47 +52,34 @@ def search(self) -> dict:
                     "dedupe": 1,
                 },
             }
-            for search_name, params in search_params.items():
-                logger.debug("Searching OSM for %s", params["q"])
-                self.resp_info["search_query_steps"].append(params["q"])  # type: ignore [attr-defined]
-                try:
-                    response = self._req(search_url, params=params, timeout=15)
-                    data = response.json()
-                except Exception as e:
-                    logger.debug(" OSM search error for '%s': %s", facility_name, e)
-                    self.resp_info["search_query_steps"].append(f"(Failed -> {e})")  # type: ignore [attr-defined]
-                    continue
+        for search_name, params in search_params.items():
+            logger.debug("Searching OSM for %s", params["q"])
+            self.resp_info["search_query_steps"].append(params["q"])  # type: ignore [attr-defined]
+            try:
+                response = self._req(search_url, params=params, timeout=15)
+                data.extend(response.json())
+            except Exception as e:
+                logger.debug(" OSM search error for '%s': %s", facility_name, e)
+                self.resp_info["search_query_steps"].append(f"(Failed -> {e})")  # type: ignore [attr-defined]
+                continue
+            # if we find results, don't check for less accurate ones (speeds things up quite a bit)
+            if data:
+                break
         if not data:
             return self.resp_info
-        # when the URL result is a "way" this is usually correct.
-        # checks top five results.
-        match_terms = ["prison", "detention", "correctional", "jail"]
-        for result in data:
-            osm_type = result.get("type", "").lower()
-            lat = result.get("lat", self.default_coords["latitude"])
-            lon = result.get("lon", self.default_coords["longitude"])
-            display_name = result.get("display_name", "").lower()
-            if any(term in osm_type for term in match_terms) or any(term in display_name for term in match_terms):
-                # todo courthouse could be added, or other tags such as "prison:for=migrant" as a clear positive search result.
-                osm_id = result.get("osm_id", "")
-                osm_type_prefix = result.get("osm_type", "")
-                title = result.get("display_name", "")
-                if osm_id and osm_type_prefix:
-                    self.resp_info["url"] = f"https://www.openstreetmap.org/{osm_type_prefix}/{osm_id}"
-                    self.resp_info["details"]["latitude"] = lat  # type: ignore [index]
-                    self.resp_info["details"]["longitude"] = lon  # type: ignore [index]
-                    self.resp_info["title"] = title
-                    return self.resp_info
-        # fallback to first result
+        # The first result in the list is the most detailed, so use that
         first_result = data[0]
-        logger.debug("Address searches didn't directly find anything, just using the first result: %s", first_result)
-        title = first_result.get("display_name", "")
         lat = first_result.get("lat", self.default_coords["latitude"])
         lon = first_result.get("lon", self.default_coords["longitude"])
-        self.resp_info["search_query_steps"].append(f"{lat}&{lon}")  # type: ignore [attr-defined]
-        if lat and lon:
-            self.resp_info["url"] = f"https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom=15"
-            self.resp_info["details"]["latitude"] = lat  # type: ignore [index]
-            self.resp_info["details"]["longitude"] = lon  # type: ignore [index]
-            self.resp_info["title"] = title
+        osm_type = first_result.get("osm_type", "")
+        osm_id = first_result.get("osm_id", "")
+        self.resp_info["details"]["latitude"] = lat  # type: ignore [index]
+        self.resp_info["details"]["longitude"] = lon  # type: ignore [index]
+        self.resp_info["title"] = first_result.get("display_name", "")
+        self.resp_info["details"]["class"] = first_result.get("class", "")  # type: ignore [index]
+        if osm_type == "way":
+            self.resp_info["url"] = f"https://www.openstreetmap.org/way/{osm_id}"
+        else:
+            self.resp_info["search_query_steps"].append(f"{lat}&{lon}")  # type: ignore [attr-defined]
+            self.resp_info["url"] = f"https://www.openstreetmap.org/?mlat={lat}&mlon={lon}"
         return self.resp_info
diff --git a/main.py b/main.py
@@ -129,7 +129,7 @@ def main() -> None:
         logger.error("Can't scrape and load existing data!")
         exit(1)
 
-    facilities_data = {}
+    facilities_data: dict = {}
     if args.scrape:
         facilities_data, agencies = facilities_scrape_wrapper(
             keep_sheet=not args.delete_sheets,

diff --git a/tools/check-json.sh b/tools/check-json.sh
@@ -2,6 +2,7 @@
 
 set -eou pipefail
 
+LOG_FILES=${LOG_FILES:-"false"}
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 pushd "${SCRIPT_DIR}" > /dev/null || exit 1
 pushd "$(git rev-parse --show-toplevel)" > /dev/null || exit 1
@@ -16,6 +17,9 @@ fi
 
 exit_code=0
 for fn in ${FILES}; do
+	if [[ "${LOG_FILES}" == "true" ]]; then
+		echo "Validating ${fn}..."
+	fi
 	set +e
 	error=$(jq '.' "${fn}" 2>&1 > /dev/null)
 	set -e