diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7ec0b4e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## 2025-04-22 +- [HH]: The data now includes the location information from the WFS, filling the `location` field in the database + for the city of Hamburg for the first time diff --git a/jedeschule/spiders/hamburg.py b/jedeschule/spiders/hamburg.py index 7be0d34..d187655 100644 --- a/jedeschule/spiders/hamburg.py +++ b/jedeschule/spiders/hamburg.py @@ -10,15 +10,29 @@ class HamburgSpider(SchoolSpider): name = "hamburg" start_urls = [ - "https://geodienste.hamburg.de/HH_WFS_Schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=de.hh.up:nicht_staatliche_schulen,de.hh.up:staatliche_schulen" + "https://geodienste.hamburg.de/HH_WFS_Schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=de.hh.up:nicht_staatliche_schulen,de.hh.up:staatliche_schulen&srsname=EPSG:4326" ] + def parse(self, response): + namespaces = { + "gml": "http://www.opengis.net/gml", + } + elem = ET.fromstring(response.body) for member in elem: data_elem = {} for attr in member[0]: + if attr.tag == "{https://registry.gdi-de.org/id/de.hh.up}the_geom": + # This nested entry contains the coordinates that we would like to expand + lon, lat = attr.findtext( + "gml:Point/gml:pos", namespaces=namespaces + ).split(" ") + data_elem["lat"] = lat + data_elem["lon"] = lon + continue + # strip the namespace before returning data_elem[attr.tag.split("}", 1)[1]] = attr.text yield data_elem @@ -39,4 +53,6 @@ def normalize(item: Item) -> School: fax=item.get("fax"), phone=item.get("schul_telefonnr"), director=item.get("name_schulleiter"), + latitude=item.get("lat"), + longitude=item.get("lon"), )