Skip to content

Commit cb431f7

Browse files
seanmorley15github-advanced-security[bot]dependabot[bot]Copilot
authored
Fix Wikipedia API with User-Agent (#822)
* refactor(serializers): remove unused gpxpy and geojson imports * fix(generate_description): improve error handling and response validation for Wikipedia API calls * Potential fix for code scanning alert no. 42: Information exposure through an exception Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix(generate_description): improve error logging for Wikipedia API data fetch failures * chore(deps): bump devalue (#823) Bumps the npm_and_yarn group with 1 update in the /frontend directory: [devalue](https://github.com/sveltejs/devalue). Updates `devalue` from 5.1.1 to 5.3.2 - [Release notes](https://github.com/sveltejs/devalue/releases) - [Changelog](https://github.com/sveltejs/devalue/blob/main/CHANGELOG.md) - [Commits](sveltejs/devalue@v5.1.1...v5.3.2) --- updated-dependencies: - dependency-name: devalue dependency-version: 5.3.2 dependency-type: indirect dependency-group: npm_and_yarn ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sean Morley <98704938+seanmorley15@users.noreply.github.com> * Refactor help documentation link in settings page - Updated the condition to display the help documentation link based on the `wandererEnabled` flag. - Removed the conditional rendering for staff users and Strava integration status. - Changed the documentation link to point to the Immich integration documentation. * fix(locations): update include_collections parameter handling for default behavior * Update backend/server/adventures/views/generate_description_view.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent a3f0eda commit cb431f7

File tree

7 files changed

+1175
-1069
lines changed

7 files changed

+1175
-1069
lines changed

backend/server/adventures/serializers.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from geopy.distance import geodesic
88
from integrations.models import ImmichIntegration
99
from adventures.utils.geojson import gpx_to_geojson
10-
import gpxpy
11-
import geojson
1210
import logging
1311

1412
logger = logging.getLogger(__name__)

backend/server/adventures/views/generate_description_view.py

Lines changed: 124 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,137 @@
33
from rest_framework.permissions import IsAuthenticated
44
from rest_framework.response import Response
55
import requests
6+
from django.conf import settings
7+
import urllib.parse
8+
import logging
9+
10+
logger = logging.getLogger(__name__)
611

712
class GenerateDescription(viewsets.ViewSet):
813
permission_classes = [IsAuthenticated]
14+
15+
# User-Agent header required by Wikipedia API
16+
HEADERS = {
17+
'User-Agent': f'AdventureLog/{getattr(settings, "ADVENTURELOG_RELEASE_VERSION", "unknown")}'
18+
}
919

10-
@action(detail=False, methods=['get'],)
20+
@action(detail=False, methods=['get'])
1121
def desc(self, request):
1222
name = self.request.query_params.get('name', '')
13-
# un url encode the name
14-
name = name.replace('%20', ' ')
15-
name = self.get_search_term(name)
16-
url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles=%s' % name
17-
response = requests.get(url)
18-
data = response.json()
19-
data = response.json()
20-
page_id = next(iter(data["query"]["pages"]))
21-
extract = data["query"]["pages"][page_id]
22-
if extract.get('extract') is None:
23-
return Response({"error": "No description found"}, status=400)
24-
return Response(extract)
25-
@action(detail=False, methods=['get'],)
23+
if not name:
24+
return Response({"error": "Name parameter is required"}, status=400)
25+
26+
# Properly URL decode the name
27+
name = urllib.parse.unquote(name)
28+
search_term = self.get_search_term(name)
29+
30+
if not search_term:
31+
return Response({"error": "No matching Wikipedia article found"}, status=404)
32+
33+
# Properly URL encode the search term for the API
34+
encoded_term = urllib.parse.quote(search_term)
35+
url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles={encoded_term}'
36+
37+
try:
38+
response = requests.get(url, headers=self.HEADERS, timeout=10)
39+
response.raise_for_status()
40+
data = response.json()
41+
42+
pages = data.get("query", {}).get("pages", {})
43+
if not pages:
44+
return Response({"error": "No page data found"}, status=404)
45+
46+
page_id = next(iter(pages))
47+
page_data = pages[page_id]
48+
49+
# Check if page exists (page_id of -1 means page doesn't exist)
50+
if page_id == "-1":
51+
return Response({"error": "Wikipedia page not found"}, status=404)
52+
53+
if not page_data.get('extract'):
54+
return Response({"error": "No description found"}, status=404)
55+
56+
return Response(page_data)
57+
58+
except requests.exceptions.RequestException as e:
59+
logger.exception("Failed to fetch data from Wikipedia")
60+
return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
61+
except ValueError as e: # JSON decode error
62+
return Response({"error": "Invalid response from Wikipedia API"}, status=500)
63+
64+
@action(detail=False, methods=['get'])
2665
def img(self, request):
2766
name = self.request.query_params.get('name', '')
28-
# un url encode the name
29-
name = name.replace('%20', ' ')
30-
name = self.get_search_term(name)
31-
url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles=%s' % name
32-
response = requests.get(url)
33-
data = response.json()
34-
page_id = next(iter(data["query"]["pages"]))
35-
extract = data["query"]["pages"][page_id]
36-
if extract.get('original') is None:
37-
return Response({"error": "No image found"}, status=400)
38-
return Response(extract["original"])
67+
if not name:
68+
return Response({"error": "Name parameter is required"}, status=400)
69+
70+
# Properly URL decode the name
71+
name = urllib.parse.unquote(name)
72+
search_term = self.get_search_term(name)
73+
74+
if not search_term:
75+
return Response({"error": "No matching Wikipedia article found"}, status=404)
76+
77+
# Properly URL encode the search term for the API
78+
encoded_term = urllib.parse.quote(search_term)
79+
url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles={encoded_term}'
80+
81+
try:
82+
response = requests.get(url, headers=self.HEADERS, timeout=10)
83+
response.raise_for_status()
84+
data = response.json()
85+
86+
pages = data.get("query", {}).get("pages", {})
87+
if not pages:
88+
return Response({"error": "No page data found"}, status=404)
89+
90+
page_id = next(iter(pages))
91+
page_data = pages[page_id]
92+
93+
# Check if page exists
94+
if page_id == "-1":
95+
return Response({"error": "Wikipedia page not found"}, status=404)
96+
97+
original_image = page_data.get('original')
98+
if not original_image:
99+
return Response({"error": "No image found"}, status=404)
100+
101+
return Response(original_image)
102+
103+
except requests.exceptions.RequestException as e:
104+
logger.exception("Failed to fetch data from Wikipedia")
105+
return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
106+
except ValueError as e: # JSON decode error
107+
return Response({"error": "Invalid response from Wikipedia API"}, status=500)
39108

40109
def get_search_term(self, term):
41-
response = requests.get(f'https://en.wikipedia.org/w/api.php?action=opensearch&search={term}&limit=10&namespace=0&format=json')
42-
data = response.json()
43-
if data[1] and len(data[1]) > 0:
44-
return data[1][0]
110+
if not term:
111+
return None
112+
113+
# Properly URL encode the search term
114+
encoded_term = urllib.parse.quote(term)
115+
url = f'https://en.wikipedia.org/w/api.php?action=opensearch&search={encoded_term}&limit=10&namespace=0&format=json'
116+
117+
try:
118+
response = requests.get(url, headers=self.HEADERS, timeout=10)
119+
response.raise_for_status()
120+
121+
# Check if response is empty
122+
if not response.text.strip():
123+
return None
124+
125+
data = response.json()
126+
127+
# OpenSearch API returns an array with 4 elements:
128+
# [search_term, [titles], [descriptions], [urls]]
129+
if len(data) >= 2 and data[1] and len(data[1]) > 0:
130+
return data[1][0] # Return the first title match
131+
132+
return None
133+
134+
except requests.exceptions.RequestException:
135+
# If search fails, return the original term as fallback
136+
return term
137+
except ValueError: # JSON decode error
138+
# If JSON parsing fails, return the original term as fallback
139+
return term

frontend/pnpm-lock.yaml

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)