Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 36 additions & 33 deletions jedeschule/spiders/bayern.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import xml.etree.ElementTree as ET
import scrapy
import xmltodict
from scrapy import Item

from jedeschule.items import School
Expand All @@ -9,40 +8,44 @@
class BayernSpider(SchoolSpider):
name = "bayern"
start_urls = [
"https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetCapabilities"
"https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?"
"SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&srsname=EPSG:4326&typename="
"schul:SchulstandorteGrundschulen,"
"schul:SchulstandorteMittelschulen,"
"schul:SchulstandorteRealschulen,"
"schul:SchulstandorteGymnasien,"
"schul:SchulstandorteBeruflicheSchulen,"
"schul:SchulstandorteFoerderzentren,"
"schul:SchulstandorteWeitererSchulen"
]

def parse(self, response, **kwargs):
tree = ET.fromstring(response.body)
base_url = "https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&srsname=EPSG:4326&typename="
for feature_type in tree.iter("{http://www.opengis.net/wfs/2.0}FeatureType"):
feature = feature_type.findtext("{http://www.opengis.net/wfs/2.0}Title")
yield scrapy.Request(
f"{base_url}{feature}",
callback=self.parse_resource,
cb_kwargs={"feature": feature},
)

def parse_resource(self, response, feature):
tree = ET.fromstring(response.body)
namespaces = {
"gml": "http://www.opengis.net/gml/3.2",
"schul": "http://gdi.bayern/brbschul",
}
key = "{http://gdi.bayern/brbschul}" + feature
for school in tree.iter(key):
data_elem = {"id": school.attrib["{http://www.opengis.net/gml/3.2}id"]}

for entry in school:
if entry.tag == "{http://gdi.bayern/brbschul}geometry":
lon, lat = entry.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[entry.tag.split("}", 1)[1]] = entry.text
data = xmltodict.parse(response.text)
members = data.get("wfs:FeatureCollection", {}).get("wfs:member", [])

if not isinstance(members, list):
members = [members]

for member in members:
# Each member is a dict with one key = school tag, value = school data dict
school = next(iter(member.values()), {})

data_elem = {
"id": school.get("@gml:id")
}

for key, value in school.items():
if key == "schul:geometry":
point = value.get("gml:Point", {})
pos = point.get("gml:pos", "")
if pos:
lon, lat = pos.split()
data_elem["lat"] = float(lat)
data_elem["lon"] = float(lon)
elif not key.startswith("@"):
clean_key = key.split(":", 1)[-1]
data_elem[clean_key] = value

yield data_elem

@staticmethod
Expand Down
52 changes: 52 additions & 0 deletions test/test_bayern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest

from scrapy.http import TextResponse

from jedeschule.spiders.bayern import BayernSpider


class TestBayernSpider(unittest.TestCase):
def test_parse(self):
xml_response = """<?xml version='1.0' encoding='UTF-8'?>
<wfs:FeatureCollection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.opengis.net/wfs/2.0"
xmlns:wfs="http://www.opengis.net/wfs/2.0" timeStamp="2025-08-11T09:35:15Z"
xmlns:gml="http://www.opengis.net/gml/3.2" numberMatched="unknown" numberReturned="0">
<wfs:member>
<schul:SchulstandorteFoerderzentren xmlns:schul="http://gdi.bayern/brbschul"
gml:id="SCHUL_SCHULSTANDORTEFOERDERZENTREN_3721b800-751d-49a1-a6d2-19d237e7bcc8">
<schul:schulname>Bayerische Landesschule</schul:schulname>
<schul:strasse>Kurzstr. 2</schul:strasse>
<schul:postleitzahl>81547</schul:postleitzahl>
<schul:ort>München</schul:ort>
<schul:schulart>Förderzentren</schul:schulart>
<schul:geometry>
<gml:Point
gml:id="SCHUL_SCHULSTANDORTEFOERDERZENTREN_3721b800-751d-49a1-a6d2-19d237e7bcc8_SCHUL_GEOMETRY"
srsName="EPSG:4326">
<gml:pos>11.5686076923 48.1047906989</gml:pos>
</gml:Point>
</schul:geometry>
</schul:SchulstandorteFoerderzentren>
</wfs:member>
</wfs:FeatureCollection>
"""

spider = BayernSpider()
response = TextResponse(url="https://test.com", body=xml_response, encoding="utf-8")
schools = list(spider.parse(response))
self.assertEqual(len(schools), 1)

school = schools[0]

self.assertEqual(school["schulname"], "Bayerische Landesschule")
self.assertEqual(school["strasse"], "Kurzstr. 2")
self.assertEqual(school["postleitzahl"], "81547")
self.assertEqual(school["ort"], "München")
self.assertEqual(school["schulart"], "Förderzentren")
self.assertEqual(school["lon"], 11.5686076923)
self.assertEqual(school["lat"], 48.1047906989)


if __name__ == "__main__":
unittest.main()