GeoNP/app.py at main · karenblack/GeoNP · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Author: Karen Black
# Last Modified: May 30, 2021
# Description: Server-side processing for GeoNP webapp

from flask import Flask, render_template, jsonify, request
# from bs4 import BeautifulSoup
import requests
# import re
# import json
import urllib

app = Flask(__name__)

@app.route("/")
def home():
	return render_template("index.html")


# to render compare.html page
@app.route('/compare', methods = ['POST', 'GET'])
def compare():

	# to store data for each park to render in compare page
	image_urls=[]					# to store image urls
	geol = []					    # to store geology paragraph text
	estab = []						# to store infobox text - Established
	visitors = []					# to store infobox text - Park visitors
	maps = []						# to store map coordinates
	headers = []					# headers for comparison page
	nps_web = []					# NPS websites
	states=[]						# state for hiking map
	hikes = []						# hiking titles

	# get the park names submitted for comparison
	if request.method == 'POST':
		titles_all = request.form.getlist('parkToggle')

	for item in titles_all:
		title_list = item.split(",")
		title= title_list[0]
		state = title_list[1]


		# **** GEOLOGY PARAGRAPHS (text-scraper) *****
		text = geology_text(title)
		geol.append(text)

		# **** INFOBOX QUERY FOR TITLES (text-scraper) ****
		park_title = park_titles(title)
		headers.append(park_title)

		# *** INFOBOX SCRAPING (my API) ****
		infobox_data = infobox(title)
		estab.append(infobox_data[0])
		visitors.append(infobox_data[1])
		nps_web.append(infobox_data[2])

		# **** GPS COORDINATES (text scraper) ****
		park_coords=coords(title)
		maps.append(park_coords)

		# **** IMAGES (image-scraper) *****
		url=images(title)
		image_urls.append(url)

		# ***** HIKING (widget) ********
		modified_title = hiking(title)
		hikes.append(modified_title)
		if state == 'american-samoa' or state == 'us-virgin-islands':
			states.append(state)
		else:
			states.append('us/' + state)

	#render the webpage
	if len(titles_all) == 2:
		return render_template("compareTwo.html", urls=image_urls, title=headers, web=nps_web, vis=visitors, est=estab, geo=geol, map=maps, hike=hikes, state=states)
	else:
		return render_template("compare.html", urls=image_urls, title=headers, web=nps_web, vis=visitors, est=estab, geo=geol, map=maps, hike=hikes, state=states)


def park_titles(title):
	"""Accepts a park title and formats appropriately for display in 'comparison' page"""
	try:
		infobox = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/infobox')
		info_json = infobox.json()
	except:
		info_json = [title]

	# get park Name
	parkName = info_json[0]
	tags = ['National Park of','National Park and Preserve', 'National Park']
	for tag in tags:
		if tag in parkName:
			parkName = parkName.replace(tag, '')
	return parkName

def geology_text(title):
	"""Accepts a part title and calls the Wikipedia Text Scraper microservice for text from 'Geology' section"""
	try:
		geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Geology')
		print("****GEO*****", title)
		geo_json = geo.json()
		geo = geo_json["Geology"]
		geo_text = geo.replace('\n', '<br> </br>')
	except:
		try:
			error = "<i> No distinct Geology Section available. Please refer to the National Park Service Website or the Park and Other Resources Menu.</i><br> </br> <b>Geography</b><br>"
			geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Geography')
			geo_json = geo.json()
			geo = geo_json["Geography"]
			geo_clean = geo.replace('\n', '<br> </br>')
			geo_text = error + geo_clean
		except:
			error = "<i> No distinct Geology Section available. Please refer to the National Park Service Website or the Park and Other Resources Menu.</i><br> </br> <b>About</b><br>"
			geo = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/Intro')
			geo_json = geo.json()
			geo = geo_json["Intro"]
			geo_clean = geo.replace('\n', '<br> </br>')
			geo_text = error + geo_clean
	return geo_text

def infobox(title):
	"""Accepts a park title and calls the Wikipedia infobox scraper API to obtain specific Park information"""
	# get estabilished date
	try:
		resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=est')
		resp_json = resp.json()
		estab_date=resp_json["infobox"]
	except:
		estab_date='Not Provided'

	# get park visitors
	try:
		resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=vis')
		resp_json = resp.json()
		visit=resp_json["infobox"]
	except:
		visit='Not Provided'

	# get NPS websites
	try:
		resp = requests.get('https://wiki-image-scraper.herokuapp.com/api/infobox/?title=' + title + '&fld=web')
		resp_json = resp.json()
		nps_url=resp_json["infobox"]
	except:
		nps_url='Not Provided'

	return [estab_date, visit, nps_url]

def coords(title):
	"""Accepts a park title and call Wikipedia text scraper microservice to retrieve GPS coordinates"""

	resp = requests.get('https://wiki-text-scraper.herokuapp.com/wiki/' + title + '/coords')
	resp_json = resp.json()
	coords = [float(resp_json["lat"]), float(resp_json["lon"])]
	return coords

def images(title):
	"""Accepts a park title and requests an image from the Wikipedia image scraping microservice"""
	image= requests.get('https://wiki-image-scraper.herokuapp.com/api/images/?title=' + title + '&ct=main')
	image_json = image.json()
	image_url = {'url': image_json["images"]}
	transform_url = 'https://create-a-map.herokuapp.com/api/picture?ht=400&wid=400&fit=crop&' + urllib.parse.urlencode(image_url)	# image transformer
	return transform_url

def hiking(title):
	"""Accepts a park title and formats the title for use in the All Trails widget"""
	hiking_title = title.lower()
	hiking_title=hiking_title.replace('_', '-')
	return hiking_title


if __name__ == "__main__":
	app.run(debug=False)