-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
174 lines (144 loc) · 5.75 KB
/
app.py
File metadata and controls
174 lines (144 loc) · 5.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Author: Karen Black
# Last Modified: May 30, 2021
# Description: Server-side processing for GeoNP webapp
from flask import Flask, render_template, jsonify, request
# from bs4 import BeautifulSoup
import requests
# import re
# import json
import urllib
app = Flask(__name__)
@app.route("/")
def home():
return render_template("index.html")
# to render compare.html page
@app.route('/compare', methods = ['POST', 'GET'])
def compare():
# to store data for each park to render in compare page
image_urls=[] # to store image urls
geol = [] # to store geology paragraph text
estab = [] # to store infobox text - Established
visitors = [] # to store infobox text - Park visitors
maps = [] # to store map coordinates
headers = [] # headers for comparison page
nps_web = [] # NPS websites
states=[] # state for hiking map
hikes = [] # hiking titles
# get the park names submitted for comparison
if request.method == 'POST':
titles_all = request.form.getlist('parkToggle')
for item in titles_all:
title_list = item.split(",")
title= title_list[0]
state = title_list[1]
# **** GEOLOGY PARAGRAPHS (text-scraper) *****
text = geology_text(title)
geol.append(text)
# **** INFOBOX QUERY FOR TITLES (text-scraper) ****
park_title = park_titles(title)
headers.append(park_title)
# *** INFOBOX SCRAPING (my API) ****
infobox_data = infobox(title)
estab.append(infobox_data[0])
visitors.append(infobox_data[1])
nps_web.append(infobox_data[2])
# **** GPS COORDINATES (text scraper) ****
park_coords=coords(title)
maps.append(park_coords)
# **** IMAGES (image-scraper) *****
url=images(title)
image_urls.append(url)
# ***** HIKING (widget) ********
modified_title = hiking(title)
hikes.append(modified_title)
if state == 'american-samoa' or state == 'us-virgin-islands':
states.append(state)
else:
states.append('us/' + state)
#render the webpage
if len(titles_all) == 2:
return render_template("compareTwo.html", urls=image_urls, title=headers, web=nps_web, vis=visitors, est=estab, geo=geol, map=maps, hike=hikes, state=states)
else:
return render_template("compare.html", urls=image_urls, title=headers, web=nps_web, vis=visitors, est=estab, geo=geol, map=maps, hike=hikes, state=states)
def park_titles(title):
"""Accepts a park title and formats appropriately for display in 'comparison' page"""
try:
infobox = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/infobox')
info_json = infobox.json()
except:
info_json = [title]
# get park Name
parkName = info_json[0]
tags = ['National Park of','National Park and Preserve', 'National Park']
for tag in tags:
if tag in parkName:
parkName = parkName.replace(tag, '')
return parkName
def geology_text(title):
"""Accepts a part title and calls the Wikipedia Text Scraper microservice for text from 'Geology' section"""
try:
geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Geology')
print("****GEO*****", title)
geo_json = geo.json()
geo = geo_json["Geology"]
geo_text = geo.replace('\n', '<br> </br>')
except:
try:
error = "<i> No distinct Geology Section available. Please refer to the National Park Service Website or the Park and Other Resources Menu.</i><br> </br> <b>Geography</b><br>"
geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Geography')
geo_json = geo.json()
geo = geo_json["Geography"]
geo_clean = geo.replace('\n', '<br> </br>')
geo_text = error + geo_clean
except:
error = "<i> No distinct Geology Section available. Please refer to the National Park Service Website or the Park and Other Resources Menu.</i><br> </br> <b>About</b><br>"
geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Intro')
geo_json = geo.json()
geo = geo_json["Intro"]
geo_clean = geo.replace('\n', '<br> </br>')
geo_text = error + geo_clean
return geo_text
def infobox(title):
"""Accepts a park title and calls the Wikipedia infobox scraper API to obtain specific Park information"""
# get estabilished date
try:
resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=est')
resp_json = resp.json()
estab_date=resp_json["infobox"]
except:
estab_date='Not Provided'
# get park visitors
try:
resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=vis')
resp_json = resp.json()
visit=resp_json["infobox"]
except:
visit='Not Provided'
# get NPS websites
try:
resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=web')
resp_json = resp.json()
nps_url=resp_json["infobox"]
except:
nps_url='Not Provided'
return [estab_date, visit, nps_url]
def coords(title):
"""Accepts a park title and call Wikipedia text scraper microservice to retrieve GPS coordinates"""
resp = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/coords')
resp_json = resp.json()
coords = [float(resp_json["lat"]), float(resp_json["lon"])]
return coords
def images(title):
"""Accepts a park title and requests an image from the Wikipedia image scraping microservice"""
image= requests.get('https://wiki-image-scraper.herokuapp.com/api/images/?title=' + title + '&ct=main')
image_json = image.json()
image_url = {'url': image_json["images"]}
transform_url = 'https://create-a-map.herokuapp.com/api/picture?ht=400&wid=400&fit=crop&' + urllib.parse.urlencode(image_url) # image transformer
return transform_url
def hiking(title):
"""Accepts a park title and formats the title for use in the All Trails widget"""
hiking_title = title.lower()
hiking_title=hiking_title.replace('_', '-')
return hiking_title
if __name__ == "__main__":
app.run(debug=False)