Skip to content

Commit a73f4c3

Browse files
committed
fix swimming pool scraper
1 parent 4ab3e54 commit a73f4c3

File tree

3 files changed

+100
-49
lines changed

3 files changed

+100
-49
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"title":"Grünauer Welle","address":{"leisure":"Grünauer Welle","house_number":"7","road":"Stuttgarter Allee","neighbourhood":"Wohnkomplex 4","suburb":"Grünau-Siedlung","city_district":"West","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04209","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/gruenauer-welle","type":"indoor_pool","lat":"51.31507295","lon":"12.291681015468672"},{"title":"Sachsen-Therme","address":{"road":"Schongauerstraße","suburb":"Paunsdorf","city_district":"Ost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04329","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sachsen-therme","type":"indoor_pool","lat":"51.3497763","lon":"12.4739067"},{"title":"Schwimmhalle Mitte","address":{"leisure":"Schwimmhalle Mitte","house_number":"84","road":"Kirschbergstraße","suburb":"Gohlis-Süd","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04155","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-mitte","type":"indoor_pool","lat":"51.36225665","lon":"12.353394654258464"},{"title":"Schwimmhalle Nord","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-nord","type":"indoor_pool","lat":"51.37231965","lon":"12.38134070842299"},{"title":"Schwimmhalle Nordost","address":{"leisure":"Schwimmhalle Nordost","house_number":"26","road":"Schönefelder Allee","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04347","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-nordost","type":"indoor_pool","lat":"51.358358550000005","lon":"12.404955916520343"},{"title":"Schwimmhalle Süd","address":{"leisure":"Schwimmhalle Süd","house_number":"10","road":"Tarostraße","suburb":"Zentrum-Südost","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04103","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-sued","type":"indoor_pool","lat":"51.3239909","lon":"12.385783649999993"},{"title":"Schwimmhalle Südost","address":{"leisure":"Schwimmhalle Südost","house_number":"35","road":"Kolmstraße","suburb":"Probstheida","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-suedost","type":"indoor_pool","lat":"51.31632955","lon":"12.42836830011111"},{"title":"Schwimmhalle Universität Leipzig","address":{"leisure":"Universitätsschwimmhalle","house_number":"4","road":"Mainzer Straße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-universitaet-leipzig","type":"indoor_pool","lat":"51.336847199999994","lon":"12.353675175513555"},{"title":"Schwimmhalle West","address":{"leisure":"Schwimmhalle West","house_number":"52a","road":"Hans-Driesch-Straße","suburb":"Leutzsch","city_district":"Altwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04179","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-west","type":"indoor_pool","lat":"51.34652235","lon":"12.311952829626495"},{"title":"Sportbad an der Elster","address":{"leisure":"Sportbad an der Elster","house_number":"8","road":"Antonienstraße","suburb":"Plagwitz","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sportbad-an-der-elster","type":"indoor_pool","lat":"51.319954300000006","lon":"12.336005139793688"},{"title":"Sprunghalle der Universität Leipzig","address":{"leisure":"Universitätsschwimmhalle","house_number":"4","road":"Mainzer Straße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sprunghalle-der-universitaet-leipzig","type":"indoor_pool","lat":"51.336847199999994","lon":"12.353675175513555"},{"title":"Kinderfreibecken \"Robbe\"","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/kinderfreibecken-robbe","type":"outdoor_pool","lat":"51.37231965","lon":"12.38134070842299"},{"title":"Ökobad Lindenthal","address":{"house_number":"3","road":"Am Freibad","suburb":"Lindenthal","city_district":"Nordwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04158","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/oekobad-lindenthal","type":"outdoor_pool","lat":"51.3935438","lon":"12.3269022"},{"title":"Schreberbad","address":{"leisure":"Schreberbad","house_number":"15","road":"Schreberstraße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schreberbad","type":"outdoor_pool","lat":"51.338967100000005","lon":"12.358802401994044"},{"title":"Sommerbad Kleinzschocher","address":{"house_number":"75","road":"Küchenholzallee","suburb":"Kleinzschocher","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-kleinzschocher","type":"outdoor_pool","lat":"51.3163021","lon":"12.338389"},{"title":"Sommerbad Schönefeld","address":{"house_number":"39","road":"Volbedingstraße","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04357","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-schoenefeld","type":"outdoor_pool","lat":"51.3621058","lon":"12.4110359"},{"title":"Sommerbad Südost","address":{"house_number":"173","road":"Oststraße","suburb":"Stötteritz","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-suedost","type":"outdoor_pool","lat":"51.3262935","lon":"12.4198936"}]
1+
[{"title":"Grünauer Welle","address":{"leisure":"Grünauer Welle","house_number":"7","road":"Stuttgarter Allee","neighbourhood":"Wohnkomplex 4","suburb":"Grünau-Siedlung","city_district":"West","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04209","country":"Deutschland","country_code":"de"},"lat":"51.31507295","lon":"12.291681015468672","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-gruenauer-welle"},{"title":"Schwimmhalle Mitte","address":{"leisure":"Schwimmhalle Mitte","house_number":"84","road":"Kirschbergstraße","suburb":"Gohlis-Süd","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04155","country":"Deutschland","country_code":"de"},"lat":"51.36225665","lon":"12.353394654258464","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-mitte"},{"title":"Schwimmhalle Nord","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"lat":"51.37231965","lon":"12.38134070842299","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-nord"},{"title":"Schwimmhalle Nordost","address":{"leisure":"Schwimmhalle Nordost","house_number":"26","road":"Schönefelder Allee","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04347","country":"Deutschland","country_code":"de"},"lat":"51.358358550000005","lon":"12.404955916520343","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-nordost"},{"title":"Schwimmhalle Süd","address":{"leisure":"Schwimmhalle Süd","house_number":"10","road":"Tarostraße","suburb":"Zentrum-Südost","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04103","country":"Deutschland","country_code":"de"},"lat":"51.3239909","lon":"12.385783649999993","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-sued"},{"title":"Schwimmhalle Südost","address":{"leisure":"Schwimmhalle Südost","house_number":"35","road":"Kolmstraße","suburb":"Probstheida","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"lat":"51.31632955","lon":"12.42836830011111","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-suedost"},{"title":"Schwimmhalle West","address":{"leisure":"Schwimmhalle West","house_number":"52a","road":"Hans-Driesch-Straße","suburb":"Leutzsch","city_district":"Altwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04179","country":"Deutschland","country_code":"de"},"lat":"51.34652235","lon":"12.311952829626495","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-west"},{"title":"Sportbad an der Elster","address":{"leisure":"Sauna im Sportbad an der Elster","house_number":"8","road":"Antonienstraße","suburb":"Plagwitz","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"lat":"51.3196487","lon":"12.3356885","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/sportbad-an-der-elster"},{"title":"Sommerbad Kleinzschocher","address":{"shop":"Sommerbad Kleinzschocher","house_number":"75","road":"Küchenholzallee","suburb":"Kleinzschocher","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"lat":"51.3165758","lon":"12.3384644","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-kleinzschocher"},{"title":"Schreberbad","address":{"leisure":"Schreberbad","house_number":"15","road":"Schreberstraße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"lat":"51.338967100000005","lon":"12.358802401994044","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-schreberbad"},{"title":"Sommerbad Schönefeld","address":{"leisure":"Sommerbad Schönefeld","house_number":"39","road":"Volbedingstraße","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04357","country":"Deutschland","country_code":"de"},"lat":"51.3630321","lon":"12.411970819689838","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-schoenefeld"},{"title":"Sommerbad Südost","address":{"house_number":"173","road":"Oststraße","suburb":"Stötteritz","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"lat":"51.3262935","lon":"12.4198936","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-suedost"},{"title":"Ökobad Lindenthal","address":{"house_number":"3","road":"Am Freibad","suburb":"Lindenthal","city_district":"Nordwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04158","country":"Deutschland","country_code":"de"},"lat":"51.3930018","lon":"12.3270532","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/oekobad-lindenthal"}]

src/scrapers/leipzig-swimming-pools.js

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,70 +8,75 @@ const url_outdoor_pool = `${domain}/freizeit-kultur-und-tourismus/sport/sportsta
88

99
const __dirname = new URL('.', import.meta.url).pathname;
1010

11-
Promise.all([scrapeIt(url_indoor_pool, {
12-
list: {
13-
listItem: '.address-list-item',
14-
data: {
15-
title: {
16-
selector: '.link_intern',
17-
attr: 'title'
18-
},
19-
address: {
20-
selector: '.list.left',
21-
convert: address => address.split('\n').map(d => d.trim()).filter(d => !!d)
22-
},
23-
link: {
24-
attr: "href",
25-
selector: '.link_intern',
26-
convert: href => `${domain}/${href}`
27-
}
28-
}
29-
}
30-
}), scrapeIt(url_outdoor_pool, {
11+
const scrapeDetailsUrl = {
3112
list: {
32-
listItem: '.address-list-item',
13+
listItem: '.project-attributes',
3314
data: {
3415
title: {
35-
selector: '.link_intern',
36-
attr: 'title'
16+
selector: 'h3 a'
3717
},
38-
address: {
39-
selector: '.list.left',
40-
convert: address => address.split('\n').map(d => d.trim()).filter(d => !!d)
41-
},
42-
link: {
43-
attr: "href",
44-
selector: '.link_intern',
45-
convert: href => `${domain}/${href}`
18+
detailsUrl: {
19+
selector: 'h3 a',
20+
attr: "href"
4621
}
4722
}
4823
}
49-
})]).then(async ([indoor_pool, outdoor_pool]) => {
50-
let data = [
51-
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
52-
...outdoor_pool.data.list.map(sh => ({ ...sh, type: 'outdoor_pool' })),
53-
]
24+
}
5425

55-
let newList = [];
56-
for (let i = 0; i < data.length; i++) {
57-
let element = data[i];
58-
let q = `${element.address.join(' ').replace(/ \(.*\)/, '').replace('an der Schwimmhalle Nord', '').replace('Vollbedingstraße', 'Volbedingstraße').replace('Kirschbergstraße 84', 'Schwimmhalle Mitte')}`;
59-
console.log(q)
60-
let resp = await search(q)
61-
let search_results = resp.filter(r => ['water_park', 'sports_centre'].includes(r.type));
26+
const scrapeDetailsData = {
27+
address: {
28+
selector: '.t3booking-t3booking-main-content p',
29+
eq: 0,
30+
convert: value => value.replace(' 04', ', 04').replace('Im Stadtplan anzeigen', '')
31+
}
32+
}
33+
34+
const enrichWithCoords = async (element) => {
35+
console.log('search for ' + element.address)
36+
try {
37+
const resp = await search(element.address)
38+
const search_results = resp.filter(r => ['water_park', 'sports_centre'].includes(r.type));
6239
if(search_results.length > 0) {
63-
element = {
40+
return {
6441
...element,
6542
address: resp[0].address,
6643
lat: resp[0].lat,
6744
lon: resp[0].lon
68-
}
45+
}
6946
} else {
7047
console.log(element.title, element.address, 'could not be found')
7148
console.log(resp)
49+
return element;
7250
}
73-
newList.push(element)
51+
} catch(e) {
52+
console.log(element.title, element.address, 'could not be resolved')
53+
console.log(resp)
54+
return element;
55+
}
56+
}
57+
58+
const scrapeDetailsUrlIndoor = scrapeIt(url_indoor_pool, scrapeDetailsUrl);
59+
const scrapeDetailsUrlOutdoor = scrapeIt(url_outdoor_pool, scrapeDetailsUrl);
60+
61+
const handleScrapeResponse = async ([indoor_pool, outdoor_pool]) => {
62+
const data = [
63+
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
64+
...outdoor_pool.data.list.map(sh => ({ ...sh, type: 'outdoor_pool' })),
65+
]
66+
const promises = [];
67+
for (let i = 0; i < data.length; i++) {
68+
const element = data[i];
69+
const detailsUrl = `${domain}${element.detailsUrl}`;
70+
const details = await scrapeIt(detailsUrl, scrapeDetailsData);
71+
const result = await enrichWithCoords(details.data);
72+
promises.push({
73+
title: element.title,
74+
...result,
75+
link: detailsUrl
76+
});
7477
}
78+
const details = await Promise.all(promises);
79+
fs.writeFileSync(`${__dirname}../../public/data/leipzig-swimming-pools.json`, JSON.stringify(details), 'utf8')
80+
};
7581

76-
fs.writeFileSync(`${__dirname}../../public/data/leipzig-swimming-pools.json`, JSON.stringify(newList, null, 2), 'utf8')
77-
})
82+
Promise.all([scrapeDetailsUrlIndoor, scrapeDetailsUrlOutdoor]).then(handleScrapeResponse)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import scrapeIt from "scrape-it";
2+
3+
const domain = `https://www.leipzig.de`;
4+
const url_indoor_pool = `${domain}/freizeit-kultur-und-tourismus/sport/sportstaetten/schwimmhallen`;
5+
6+
const scrapeDetailsUrl = {
7+
list: {
8+
listItem: '.project-attributes',
9+
data: {
10+
title: {
11+
selector: 'h3 a'
12+
},
13+
detailsUrl: {
14+
selector: 'h3 a',
15+
attr: "href"
16+
}
17+
}
18+
}
19+
}
20+
21+
const scrapeDetailsData = {
22+
address: {
23+
selector: '.t3booking-t3booking-main-content p',
24+
eq: 0,
25+
convert: value => value.replace(' 04', ', 04').replace('Im Stadtplan anzeigen', '')
26+
}
27+
}
28+
29+
const scrapeDetailsUrlIndoor = scrapeIt(url_indoor_pool, scrapeDetailsUrl);
30+
31+
const handleScrapeResponse = async ([indoor_pool]) => {
32+
let data = [
33+
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
34+
]
35+
let element = data[0];
36+
scrapeIt(`${domain}${element.detailsUrl}`, scrapeDetailsData).then(details => {
37+
console.log(details.data);
38+
const result = {
39+
title: element.title,
40+
address: details.data.address,
41+
}
42+
console.log(JSON.stringify(result, null, 2));
43+
})
44+
};
45+
46+
Promise.all([scrapeDetailsUrlIndoor]).then(handleScrapeResponse)

0 commit comments

Comments
 (0)