Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 6 additions & 29 deletions jedeschule/spiders/berlin.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,19 @@
import xml.etree.ElementTree as ET

from jedeschule.items import School
from scrapy import Item

from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.utils.wfs_basic_parsers import parse_geojson_features


class BerlinSpider(SchoolSpider):
name = "berlin"
start_urls = [
"https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326&typename=fis:schulen"
"https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326"
"&typename=fis:schulen&outputFormat=application/json"
]

def parse(self, response):
tree = ET.fromstring(response.body)

namespaces = {
"gml": "http://www.opengis.net/gml",
"fis": "http://www.berlin.de/broker",
}
for school in tree.find("gml:featureMembers", namespaces).findall(
"{schulen}schulen", namespaces
):
data_elem = {}
for entry in school:
if entry.tag == "{http://www.opengis.net/gml}boundedBy":
continue
if entry.tag == "{schulen}geom":
# This nested entry contains the coordinates that we would like to expand
lon, lat = entry.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[entry.tag.split("}", 1)[1]] = entry.text
yield data_elem
def parse(self, response, **kwargs):
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
Expand Down
29 changes: 5 additions & 24 deletions jedeschule/spiders/brandenburg.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,20 @@
import xml.etree.ElementTree as ET

from scrapy import Item

from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.utils.wfs_basic_parsers import parse_geojson_features


class BrandenburgSpider(SchoolSpider):
name = "brandenburg"

start_urls = [
"https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte&srsname=epsg:4326"
"https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte"
"&srsname=epsg:4326&outputFormat=application/json"
]

def parse(self, response):
tree = ET.fromstring(response.body)

namespaces = {
"gml": "http://www.opengis.net/gml",
"ms": "http://mapserver.gis.umn.edu/mapserver",
}
for school in tree.findall("gml:featureMember", namespaces):
data_elem = {}
for entry in school[0]:
if entry.tag == "{http://mapserver.gis.umn.edu/mapserver}msGeometry":
# This nested entry contains the coordinates that we would like to expand
lat, lon = entry.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[entry.tag.split("}", 1)[1]] = entry.text
yield data_elem
def parse(self, response, **kwargs):
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
Expand Down
2 changes: 1 addition & 1 deletion jedeschule/spiders/nordrhein_westfalen_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import requests

from jedeschule.utils import singleton
from jedeschule.utils.utils import singleton


@singleton
Expand Down
2 changes: 1 addition & 1 deletion jedeschule/spiders/sachsen_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests

from jedeschule.utils import singleton
from jedeschule.utils.utils import singleton


@singleton
Expand Down
Empty file added jedeschule/utils/__init__.py
Empty file.
File renamed without changes.
17 changes: 17 additions & 0 deletions jedeschule/utils/wfs_basic_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json

from scrapy.http import Response


def parse_geojson_features(response: Response):
geojson = json.loads(response.text)

for feature in geojson.get("features", []):
properties = feature.get("properties", {})
coords = feature.get("geometry", {}).get("coordinates", [])

properties["lon"] = coords[0]
properties["lat"] = coords[1]


yield properties
Empty file added test/__init__.py
Empty file.
89 changes: 89 additions & 0 deletions test/test_berlin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import unittest
from scrapy.http import TextResponse
from jedeschule.spiders.berlin import BerlinSpider


class TestBerlinSpider(unittest.TestCase):
def test_parse(self):
json_response = """
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": "schulen.01A04",
"geometry": {
"type": "Point",
"coordinates": [13.33391576, 52.52672359]
},
"geometry_name": "geom",
"properties": {
"bsn": "01A04",
"schulname": "Berlin-Kolleg",
"schulart": "Kolleg",
"traeger": "öffentlich",
"schultyp": "Andere Schule",
"bezirk": "Mitte",
"ortsteil": "Moabit",
"plz": "10551",
"strasse": "Turmstraße",
"hausnr": "75",
"telefon": "+49 30 901838210",
"fax": "+49 30 901838222",
"email": "sekretariat@berlin-kolleg.de",
"internet": "https://www.berlin-kolleg.de",
"schuljahr": "2024/25"
},
"bbox": [
13.33391576,
52.52672359,
13.33391576,
52.52672359
]
}
],
"totalFeatures": 925,
"numberMatched": 925,
"numberReturned": 1,
"timeStamp": "2025-06-13T14:59:35.045Z",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:EPSG::4326"
}
},
"bbox": [
13.33391576,
52.52672359,
13.33391576,
52.52672359
]
}
"""

spider = BerlinSpider()
response = TextResponse(
url="http://test_webserver.com",
body=json_response.encode("utf-8"),
encoding="utf-8",
)

schools = list(spider.parse(response))
self.assertEqual(len(schools), 1)

school = schools[0]
self.assertAlmostEqual(school["lon"], 13.33391576)
self.assertAlmostEqual(school["lat"], 52.52672359)
self.assertEqual(school["bsn"], "01A04")
self.assertEqual(school["schulname"], "Berlin-Kolleg")
self.assertEqual(school["plz"], "10551")
self.assertEqual(school["strasse"], "Turmstraße")
self.assertEqual(school["hausnr"], "75")
self.assertEqual(school["telefon"], "+49 30 901838210")
self.assertEqual(school["fax"], "+49 30 901838222")
self.assertEqual(school["email"], "sekretariat@berlin-kolleg.de")
self.assertEqual(school["internet"], "https://www.berlin-kolleg.de")


if __name__ == "__main__":
unittest.main()
79 changes: 79 additions & 0 deletions test/test_brandenburg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import unittest
from scrapy.http import TextResponse
import json

from jedeschule.spiders.brandenburg import BrandenburgSpider


class TestBrandenburgSpider(unittest.TestCase):

def test_parse(self):
json_text = json.dumps({
"type": "FeatureCollection",
"name": "Schul_Standorte",
"features": [
{
"type": "Feature",
"properties": {
"schul_nr": "100020",
"schulname": "Grundschule Forst Mitte",
"strasse_hausnr": "Max-Fritz-Hammer-Straße 15",
"plz": "03149",
"ort": "Forst (Lausitz)",
"telefonnummer": "(03562) 7163",
"faxnummer": "(03562) 691288",
"dienst_email": "s100020@schulen.brandenburg.de",
"homepage": "http://www.grundschule-forst-mitte.de",
"schulamtname": "Staatliches Schulamt Cottbus",
"kreis": "Spree-Neiße",
"schulform_kurzbez": "G",
"schulform": "Grundschule",
"traeger": "Gemeinde",
"schultraeger_grp": "o",
"schueler": "288 (Stand: 2022)",
"besonderheiten_sl": "(763),(561),(132),(201)",
"besonderheiten": [
"Einstiegsphase Startchancen",
"Schule mit Nutzung Schul-Cloud Brandenburg",
"verlässliche Halbtagsschule und Hort",
"FLEX - Optimierung des Schulanfangs"
],
"studienseminar": "2",
"fremdsprachen": ["Englisch"],
"fremdsprachen_sl": "(EN)",
"fremdsprachen_timestmp": "(Schuljahr: 2020/2021)"
},
"geometry": {
"type": "Point",
"coordinates": [14.651148207215728, 51.74023651973522]
}
}
]
})

spider = BrandenburgSpider()
response = TextResponse(
url="http://test_webserver.com",
body=json_text.encode("utf-8"),
encoding="utf-8",
)

results = list(spider.parse(response))

self.assertEqual(len(results), 1)
school = results[0]

self.assertAlmostEqual(school["lat"], 51.74023651973522)
self.assertAlmostEqual(school["lon"], 14.651148207215728)

self.assertEqual(school["schul_nr"], "100020")
self.assertEqual(school["schulname"], "Grundschule Forst Mitte")
self.assertEqual(school["plz"], "03149")
self.assertEqual(school["ort"], "Forst (Lausitz)")
self.assertEqual(school["dienst_email"], "s100020@schulen.brandenburg.de")
self.assertEqual(school["schulform"], "Grundschule")
self.assertEqual(school["traeger"], "Gemeinde")


if __name__ == '__main__':
unittest.main()