Skip to content

Commit 132be91

Browse files
authored
Merge pull request #192 from Datenschule/saarland-geojson
[SL] Use geojson from Geoportal
2 parents b887a0a + 6c890e9 commit 132be91

File tree

2 files changed

+114
-86
lines changed

2 files changed

+114
-86
lines changed

jedeschule/spiders/saarland.py

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,36 @@
1-
import xmltodict
21
from scrapy import Item
32

43
from jedeschule.items import School
54
from jedeschule.spiders.school_spider import SchoolSpider
5+
from jedeschule.wfs_basic_parsers import parse_geojson_features
66

77

88
class SaarlandSpider(SchoolSpider):
99
name = "saarland"
1010
start_urls = [
11-
"https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?"
12-
"SERVICE=WFS&REQUEST=GetFeature&typeName=Staatliche%5FDienste:Schulen%5FSL&srsname=EPSG:4326"
11+
"https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500"
1312
]
1413

1514
def parse(self, response, **kwargs):
16-
data = xmltodict.parse(response.text)
17-
members = data.get("wfs:FeatureCollection", {}).get("wfs:member", [])
18-
19-
if not isinstance(members, list):
20-
members = [members]
21-
22-
for member in members:
23-
school = member.get("Staatliche_Dienste:Schulen_SL", {})
24-
data_elem = {}
25-
26-
for key, value in school.items():
27-
if key == "Staatliche_Dienste:SHAPE":
28-
pos = (value.get("gml:Point", {})
29-
.get("gml:pos", "")
30-
.strip())
31-
if pos:
32-
lat, lon = pos.split()
33-
data_elem["lat"] = float(lat)
34-
data_elem["lon"] = float(lon)
35-
36-
continue
37-
38-
clean_key = key.split(":")[-1]
39-
if clean_key == "PLZ":
40-
value = value.split(".")[0]
41-
42-
data_elem[clean_key] = value
43-
44-
yield data_elem
15+
yield from parse_geojson_features(response)
4516

4617
@staticmethod
4718
def normalize(item: Item) -> School:
48-
# The data also contains a field called `SCHULKENNZ` which implies that it might be an id
49-
# that could be used, but some schools share ids (especially `0` or `000000`) which makes for collisions
19+
# The data also contains a field called `Schulkennz` which implies that it might be an id
20+
# that could be used, but some schools share ids (especially `0` or `000000`) or
21+
# do not have any set at all which makes for collisions
5022
school_id = item.get("OBJECTID")
5123

5224
return School(
53-
name=item.get("Bezeichnun"),
5425
address=item.get("Straße", "").strip(),
5526
city=item.get("Ort"),
56-
zip=item.get("PLZ"),
57-
school_type=item.get("Schulform"),
27+
fax=item.get("Fax"),
5828
id=f"SL-{school_id}",
29+
latitude=item.get("lat"),
30+
longitude=item.get("lon"),
31+
name=item.get("Bezeichnung"),
32+
phone=item.get("Telefon"),
33+
school_type=item.get("Schulform"),
34+
website=item.get("Homepage"),
35+
zip=item.get("PLZ"),
5936
)

test/test_saarland.py

Lines changed: 100 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,63 +7,114 @@
77

88
class TestSaarlandSpider(unittest.TestCase):
99
def test_parse(self):
10-
xml_response = """<?xml version="1.0" encoding="utf-8" ?>
11-
<wfs:FeatureCollection xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:wfs="http://www.opengis.net/wfs/2.0" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:Staatliche_Dienste="https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" timeStamp="2025-07-20T17:40:21Z" numberMatched="317" numberReturned="1" xsi:schemaLocation="http://www.opengis.net/wfs/2.0 http://schemas.opengis.net/wfs/2.0/wfs.xsd http://www.opengis.net/gml/3.2 http://schemas.opengis.net/gml/3.2.1/gml.xsd https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?service=wfs%26version=2.0.0%26request=DescribeFeatureType">
12-
<wfs:member>
13-
<Staatliche_Dienste:Schulen_SL gml:id="Schulen_SL.1">
14-
<Staatliche_Dienste:SHAPE>
15-
<gml:Point gml:id="Schulen_SL.1.pn.0" srsName="urn:ogc:def:crs:EPSG::4326">
16-
<gml:pos>49.24067452 7.02085050</gml:pos>
17-
</gml:Point>
18-
</Staatliche_Dienste:SHAPE>
19-
<Staatliche_Dienste:OBJECTID>1</Staatliche_Dienste:OBJECTID>
20-
<Staatliche_Dienste:fid>1.00000000</Staatliche_Dienste:fid>
21-
<Staatliche_Dienste:Gemeindenu>1100.00000000</Staatliche_Dienste:Gemeindenu>
22-
<Staatliche_Dienste:PLZ>66123.00000000</Staatliche_Dienste:PLZ>
23-
<Staatliche_Dienste:Ort>Saarbrücken</Staatliche_Dienste:Ort>
24-
<Staatliche_Dienste:Straße>Kohlweg 7</Staatliche_Dienste:Straße>
25-
<Staatliche_Dienste:Bezeichnun>Deutsch-Französiche Hochschule, Université franco-allemande</Staatliche_Dienste:Bezeichnun>
26-
<Staatliche_Dienste:Telefon>0681-93812100</Staatliche_Dienste:Telefon>
27-
<Staatliche_Dienste:Fax>0681-93812111</Staatliche_Dienste:Fax>
28-
<Staatliche_Dienste:Email>[email protected]</Staatliche_Dienste:Email>
29-
<Staatliche_Dienste:Schulform>Hochschule</Staatliche_Dienste:Schulform>
30-
<Staatliche_Dienste:Homepage>https://www.dfh-ufa.org/</Staatliche_Dienste:Homepage>
31-
<Staatliche_Dienste:Schulregio>Saarbrücken</Staatliche_Dienste:Schulregio>
32-
<Staatliche_Dienste:KARTENERST>Hochschule</Staatliche_Dienste:KARTENERST>
33-
<Staatliche_Dienste:Rechtswert>355942.97630000</Staatliche_Dienste:Rechtswert>
34-
<Staatliche_Dienste:Hochwert>5456095.93600000</Staatliche_Dienste:Hochwert>
35-
<Staatliche_Dienste:Aktualisie>20.05.2025</Staatliche_Dienste:Aktualisie>
36-
</Staatliche_Dienste:Schulen_SL>
37-
</wfs:member>
38-
</wfs:FeatureCollection>
10+
json_response = """
11+
{
12+
"serviceTitle": "Staatliche_Dienste",
13+
"collectionId": "1125",
14+
"collectionName": "Staatliche_Dienste:Schulen_SL",
15+
"collectionTitle": "Schulen_SL",
16+
"title": "Schulen_SL",
17+
"id": "Staatliche_Dienste:Schulen_SL",
18+
"description": "Schulen im Saarland",
19+
"extent": {
20+
"spatial": {
21+
"minx": "6.37990222",
22+
"miny": "49.10626268",
23+
"maxx": "7.37397862",
24+
"maxy": "49.61541418"
25+
},
26+
"temporal": []
27+
},
28+
"type": "FeatureCollection",
29+
"links": [
30+
{
31+
"rel": "self",
32+
"type": "application/geo+json",
33+
"title": "this document",
34+
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500"
35+
},
36+
{
37+
"rel": "next",
38+
"type": "application/geo+json",
39+
"title": "next page",
40+
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500&offset=2500"
41+
},
42+
{
43+
"rel": "last",
44+
"type": "application/geo+json",
45+
"title": "last page",
46+
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500&offset=0"
47+
}
48+
],
49+
"numberMatched": 368,
50+
"numberReturned": 368,
51+
"timeStamp": "2025-08-20T12:21:36.7200Z",
52+
"genTime": 0.7143468856811523,
53+
"features": [
54+
{
55+
"type": "Feature",
56+
"properties": {
57+
"gml_id": "Schulen_SL.1",
58+
"OBJECTID": 1,
59+
"Gemeindenr": 1100,
60+
"PLZ": 66123,
61+
"Ort": "Saarbrücken",
62+
"Straße": "Kohlweg 7",
63+
"Bezeichnung": "Deutsch-Französiche Hochschule, Université franco-allemande",
64+
"Telefon": "0681-93812100",
65+
"Fax": "0681-93812111",
66+
"Email": "[email protected]",
67+
"Schulform": "Hochschule",
68+
"Homepage": "https://www.dfh-ufa.org/",
69+
"Schulregion": "Saarbrücken",
70+
"KARTENERST": "Hochschule",
71+
"Ost": 355942.9763,
72+
"Nord": 5456095.936,
73+
"ERFASSUNG": "20.05.2025"
74+
},
75+
"bbox": [
76+
7.0208505,
77+
49.24067452,
78+
7.0208505,
79+
49.24067452
80+
],
81+
"geometry": {
82+
"type": "Point",
83+
"coordinates": [
84+
7.0208505,
85+
49.24067452
86+
]
87+
},
88+
"$schema": null,
89+
"$context": null
90+
}
91+
]
92+
}
3993
"""
4094

4195
spider = SaarlandSpider()
42-
response = TextResponse(url="https://test.com", body=xml_response, encoding="utf-8")
96+
response = TextResponse(url="https://test.com", body=json_response, encoding="utf-8")
4397
schools = list(spider.parse(response))
4498
self.assertEqual(len(schools), 1)
4599

46100
school = schools[0]
101+
parsed_school = spider.normalize(school)
102+
103+
self.assertEqual(parsed_school["id"], "SL-1")
104+
self.assertEqual(parsed_school["name"], "Deutsch-Französiche Hochschule, Université franco-allemande")
105+
self.assertEqual(parsed_school["address"], "Kohlweg 7")
106+
self.assertEqual(parsed_school["city"], "Saarbrücken")
107+
self.assertEqual(parsed_school["fax"], "0681-93812111")
108+
self.assertEqual(parsed_school["phone"], "0681-93812100")
109+
self.assertEqual(parsed_school["school_type"], "Hochschule")
110+
self.assertEqual(parsed_school["website"], "https://www.dfh-ufa.org/")
111+
self.assertEqual(parsed_school["zip"], 66123)
112+
self.assertEqual(parsed_school["latitude"], 49.24067452)
113+
self.assertEqual(parsed_school["longitude"], 7.0208505)
114+
115+
116+
47117

48-
self.assertEqual(school["OBJECTID"], "1")
49-
self.assertEqual(school["fid"], "1.00000000")
50-
self.assertEqual(school["Gemeindenu"], "1100.00000000")
51-
self.assertEqual(school["PLZ"], "66123")
52-
self.assertEqual(school["Ort"], "Saarbrücken")
53-
self.assertEqual(school["Straße"], "Kohlweg 7")
54-
self.assertEqual(school["Bezeichnun"], "Deutsch-Französiche Hochschule, Université franco-allemande")
55-
self.assertEqual(school["Telefon"], "0681-93812100")
56-
self.assertEqual(school["Fax"], "0681-93812111")
57-
self.assertEqual(school["Email"], "[email protected]")
58-
self.assertEqual(school["Schulform"], "Hochschule")
59-
self.assertEqual(school["Homepage"], "https://www.dfh-ufa.org/")
60-
self.assertEqual(school["Schulregio"], "Saarbrücken")
61-
self.assertEqual(school["KARTENERST"], "Hochschule")
62-
self.assertEqual(school["Rechtswert"], "355942.97630000")
63-
self.assertEqual(school["Hochwert"], "5456095.93600000")
64-
self.assertEqual(school["Aktualisie"], "20.05.2025")
65-
self.assertAlmostEqual(school["lat"], 49.24067452)
66-
self.assertAlmostEqual(school["lon"], 7.02085050)
67118

68119

69120
if __name__ == "__main__":

0 commit comments

Comments
 (0)