|
1 | | -import xml.etree.ElementTree as ET |
2 | | -import scrapy |
| 1 | +import xmltodict |
3 | 2 | from scrapy import Item |
4 | 3 |
|
5 | 4 | from jedeschule.items import School |
|
9 | 8 | class BayernSpider(SchoolSpider): |
10 | 9 | name = "bayern" |
11 | 10 | start_urls = [ |
12 | | - "https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetCapabilities" |
| 11 | + "https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?" |
| 12 | + "SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&srsname=EPSG:4326&typename=" |
| 13 | + "schul:SchulstandorteGrundschulen," |
| 14 | + "schul:SchulstandorteMittelschulen," |
| 15 | + "schul:SchulstandorteRealschulen," |
| 16 | + "schul:SchulstandorteGymnasien," |
| 17 | + "schul:SchulstandorteBeruflicheSchulen," |
| 18 | + "schul:SchulstandorteFoerderzentren," |
| 19 | + "schul:SchulstandorteWeitererSchulen" |
13 | 20 | ] |
14 | 21 |
|
15 | 22 | def parse(self, response, **kwargs): |
16 | | - tree = ET.fromstring(response.body) |
17 | | - base_url = "https://gdiserv.bayern.de/srv112940/services/schulstandortebayern-wfs?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&srsname=EPSG:4326&typename=" |
18 | | - for feature_type in tree.iter("{http://www.opengis.net/wfs/2.0}FeatureType"): |
19 | | - feature = feature_type.findtext("{http://www.opengis.net/wfs/2.0}Title") |
20 | | - yield scrapy.Request( |
21 | | - f"{base_url}{feature}", |
22 | | - callback=self.parse_resource, |
23 | | - cb_kwargs={"feature": feature}, |
24 | | - ) |
25 | | - |
26 | | - def parse_resource(self, response, feature): |
27 | | - tree = ET.fromstring(response.body) |
28 | | - namespaces = { |
29 | | - "gml": "http://www.opengis.net/gml/3.2", |
30 | | - "schul": "http://gdi.bayern/brbschul", |
31 | | - } |
32 | | - key = "{http://gdi.bayern/brbschul}" + feature |
33 | | - for school in tree.iter(key): |
34 | | - data_elem = {"id": school.attrib["{http://www.opengis.net/gml/3.2}id"]} |
35 | | - |
36 | | - for entry in school: |
37 | | - if entry.tag == "{http://gdi.bayern/brbschul}geometry": |
38 | | - lon, lat = entry.findtext( |
39 | | - "gml:Point/gml:pos", namespaces=namespaces |
40 | | - ).split(" ") |
41 | | - data_elem["lat"] = lat |
42 | | - data_elem["lon"] = lon |
43 | | - continue |
44 | | - # strip the namespace before returning |
45 | | - data_elem[entry.tag.split("}", 1)[1]] = entry.text |
| 23 | + data = xmltodict.parse(response.text) |
| 24 | + members = data.get("wfs:FeatureCollection", {}).get("wfs:member", []) |
| 25 | + |
| 26 | + if not isinstance(members, list): |
| 27 | + members = [members] |
| 28 | + |
| 29 | + for member in members: |
| 30 | + # Each member is a dict with one key = school tag, value = school data dict |
| 31 | + school = next(iter(member.values()), {}) |
| 32 | + |
| 33 | + data_elem = { |
| 34 | + "id": school.get("@gml:id") |
| 35 | + } |
| 36 | + |
| 37 | + for key, value in school.items(): |
| 38 | + if key == "schul:geometry": |
| 39 | + point = value.get("gml:Point", {}) |
| 40 | + pos = point.get("gml:pos", "") |
| 41 | + if pos: |
| 42 | + lon, lat = pos.split() |
| 43 | + data_elem["lat"] = float(lat) |
| 44 | + data_elem["lon"] = float(lon) |
| 45 | + elif not key.startswith("@"): |
| 46 | + clean_key = key.split(":", 1)[-1] |
| 47 | + data_elem[clean_key] = value |
| 48 | + |
46 | 49 | yield data_elem |
47 | 50 |
|
48 | 51 | @staticmethod |
|
0 commit comments