Skip to content

Commit cf690f1

Browse files
authored
Merge pull request #47 from Xpirix/fetch_all_resources
Fetch all resources
2 parents 2a450b0 + a92df37 commit cf690f1

File tree

6 files changed

+176
-54
lines changed

6 files changed

+176
-54
lines changed

REQUIREMENTS.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
feedparser==6.0.11
22
requests==2.32.3
33
pillow==11.0.0
4-
python-dateutil==2.9.0.post0
4+
python-dateutil==2.9.0.post0
5+
beautifulsoup4==4.13.3

fetch_feeds.py

Lines changed: 86 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
import os
55
import json
66
from urllib.parse import urlparse
7+
import string
8+
import random
79
import requests
810
import shutil
911
from datetime import datetime
10-
from scripts.resize_image import resize_image
12+
from scripts.resize_image import resize_image, convert_to_webp, is_valid_image, is_valid_svg
1113
from dateutil.parser import parse as date_parse
14+
from bs4 import BeautifulSoup
1215

1316
# Path to the subscribers.json file
1417
SUBSCRIBERS_JSON_PATH = os.path.join(os.path.dirname(__file__), 'data', 'subscribers.json')
@@ -59,15 +62,75 @@ def fetch_and_create_post(self):
5962
except Exception as e:
6063
print(f"Failed to process feed for {self.subscriber_name}: {e}")
6164

65+
def fetch_all_images(self, content, subscriber_shortname, post_name):
66+
img_folder = os.path.join("img", "subscribers", subscriber_shortname, post_name)
67+
soup = BeautifulSoup(content, 'html.parser')
68+
unknown_img_folder = os.path.join("static", img_folder, "unknown")
69+
70+
if os.path.exists(unknown_img_folder):
71+
shutil.rmtree(unknown_img_folder)
72+
os.makedirs(unknown_img_folder, exist_ok=True)
73+
74+
for img in soup.find_all('img'):
75+
img_url = img['src']
76+
file_name = self.get_image_name(img_url.split('?')[0])
77+
try:
78+
downloaded_img = self.download_and_process_image(img_url, file_name, img_folder, unknown_img_folder)
79+
img['src'] = downloaded_img
80+
except Exception as e:
81+
img['src'] = ""
82+
print(f"Failed to process image: {e}")
83+
84+
for video in soup.find_all('video'):
85+
video_url = video.find('source')['src']
86+
video.replace_with(soup.new_tag('a', href=video_url, target="_blank", string="Watch Video"))
87+
88+
return str(soup)
89+
90+
def download_and_process_image(self, img_url, file_name, img_folder, unknown_img_folder):
91+
no_param_url = img_url.split('?')[0] # Remove query parameters
92+
if no_param_url.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')):
93+
downloaded_img = self.download_image(no_param_url, file_name, os.path.join("static", img_folder))
94+
if not is_valid_image(downloaded_img):
95+
os.remove(downloaded_img)
96+
raise Exception(f"Invalid image: {downloaded_img}")
97+
resize_image(downloaded_img, max_height=600)
98+
webp_img_path = convert_to_webp(downloaded_img, replace=True)
99+
return os.path.join("/", img_folder, os.path.basename(webp_img_path))
100+
elif no_param_url.lower().endswith('.svg'):
101+
downloaded_img = self.download_image(no_param_url, file_name, os.path.join("static", img_folder))
102+
if not is_valid_svg(downloaded_img):
103+
os.remove(downloaded_img)
104+
raise Exception(f"Invalid image: {downloaded_img}")
105+
return os.path.join("/", img_folder, file_name)
106+
else:
107+
downloaded_img = self.handle_unknown_image_format(img_url, unknown_img_folder)
108+
return os.path.join("/", img_folder, "unknown", os.path.basename(downloaded_img))
109+
110+
def handle_unknown_image_format(self, img_url, dest_folder):
111+
"""
112+
Handle unknown image formats by downloading the image and converting it to webp format.
113+
"""
114+
prefix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
115+
file_name = f"image_{prefix}.png"
116+
117+
downloaded_img = self.download_image(
118+
img_url,
119+
file_name,
120+
dest_folder,
121+
is_unknown=True
122+
)
123+
if not is_valid_image(downloaded_img):
124+
os.remove(downloaded_img)
125+
raise Exception(f"Invalid image: {downloaded_img}")
126+
resize_image(downloaded_img, max_height=600)
127+
return convert_to_webp(downloaded_img, replace=True)
128+
129+
62130
def process_entry(self, entry):
63131
try:
64132
dest_folder = self.get_dest_folder()
65133
title = entry.title
66-
# I don't think we need to download images because the images are already in the feed
67-
# image_url = next((link.href for link in entry.links if 'image' in link.type), entry.links[-1].href)
68-
# if image_url.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')):
69-
# file_name = self.get_image_name(image_url)
70-
# self.download_image(image_url, file_name, dest_folder)
71134

72135
post_url = entry.link
73136

@@ -81,6 +144,7 @@ def process_entry(self, entry):
81144

82145
are_tags_present = any(str(category).lower() in tags for category in self.filter_categories)
83146
if are_tags_present:
147+
content = self.fetch_all_images(content, self.shortname, file_name)
84148
content = self.generate_markdown_content(title, entry_date, post_url, content, tags)
85149

86150
# Copy the markdown file to the posts folder
@@ -169,12 +233,20 @@ def write_to_file(self, filename, content):
169233
with open(filename, "w", encoding="utf=8") as f:
170234
f.write(content)
171235

172-
def download_image(self, image_url, image_name, dest_folder):
173-
response = requests.get(image_url, stream=True)
236+
def download_image(self, image_url, image_name, dest_folder, is_unknown=False):
237+
os.makedirs(dest_folder, exist_ok=True)
174238
image_filename = os.path.join(dest_folder, image_name)
175-
with open(image_filename, 'wb') as out_file:
176-
shutil.copyfileobj(response.raw, out_file)
177-
print(f"Writing: {image_filename}")
239+
if is_unknown:
240+
response = requests.get(image_url, stream=True)
241+
with open(image_filename, "wb") as file:
242+
for chunk in response.iter_content(1024):
243+
file.write(chunk)
244+
else:
245+
response = requests.get(image_url, stream=True)
246+
content = response.raw
247+
with open(image_filename, 'wb') as out_file:
248+
shutil.copyfileobj(content, out_file)
249+
return image_filename
178250

179251

180252
class FunderProcessor:
@@ -265,10 +337,11 @@ def process_funder(item):
265337
print(f"Failed to delete {file_path}. Reason: {e}")
266338

267339
# Iterate over the subscribers and fetch posts for active ones
340+
i = 1
268341
for subscriber in subscribers:
269342
if not subscriber.get('is_active'):
270343
continue
271-
344+
print(f"{i}/{len(subscribers)}: Processing feed for {subscriber['name']}")
272345
languages = subscriber.get('languages', {})
273346
available_lang = languages.get('available', DEFAULT_AVAILABLE_LANG)
274347
main_lang = languages.get('main', DEFAULT_MAIN_LANG)
@@ -283,5 +356,6 @@ def process_funder(item):
283356
filter_categories
284357
)
285358
processor.fetch_and_create_post()
359+
i += 1
286360

287361
# FunderProcessor.fetch_funders()

scripts/resize_image.py

Lines changed: 84 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from PIL import Image
22
import os
3+
import xml.etree.ElementTree as ET
34

45

56
def resize_image(image_filename, max_height=120):
@@ -8,38 +9,87 @@ def resize_image(image_filename, max_height=120):
89
The image is resized in place.
910
param image_filename: The image file to resize
1011
param max_height: The maximum height in pixels
12+
TODO: Add support for other image formats
1113
"""
12-
if (
13-
image_filename.lower().endswith('.png') or
14-
image_filename.lower().endswith('.jpg')
15-
):
16-
if os.path.exists(image_filename):
17-
print(f'Processing: {image_filename}')
18-
with Image.open(image_filename) as img:
19-
width, height = img.size
20-
if height > max_height:
21-
new_height = max_height
22-
new_width = int((new_height / height) * width)
23-
24-
img_resized = img.resize(
25-
(new_width, new_height), Image.LANCZOS
26-
)
27-
28-
# Determine the file format
29-
file_format = (
30-
'PNG' if image_filename.lower().endswith('.png')
31-
else 'JPEG'
32-
)
33-
34-
# Save the resized image with optimization
35-
img_resized.save(
36-
image_filename,
37-
format=file_format,
38-
optimize=True,
39-
quality=85
40-
)
41-
print(f'Resized and optimized: {image_filename}')
42-
else:
43-
print(f'No resizing needed for: {image_filename}')
44-
else:
45-
print(f'File not found: {image_filename}')
14+
if os.path.exists(image_filename):
15+
with Image.open(image_filename) as img:
16+
width, height = img.size
17+
if height > max_height:
18+
new_height = max_height
19+
new_width = int((new_height / height) * width)
20+
21+
img_resized = img.resize(
22+
(new_width, new_height), Image.LANCZOS
23+
)
24+
25+
# Determine the file format
26+
file_format = image_filename.split('.')[-1].upper()
27+
if file_format == 'JPG':
28+
file_format = 'JPEG'
29+
30+
# Save the resized image with optimization
31+
img_resized.save(
32+
image_filename,
33+
format=file_format,
34+
optimize=True,
35+
quality=85
36+
)
37+
else:
38+
print(f'File not found: {image_filename}')
39+
40+
# Transform an image into webp format
41+
def convert_to_webp(image_filename, replace=False):
42+
"""
43+
Convert an image to webp format.
44+
The image is converted in place.
45+
param image_filename: The image file to convert
46+
"""
47+
supported_formats = ['.png', '.jpg', '.jpeg', '.tiff']
48+
image_ext = os.path.splitext(image_filename)[1].lower()
49+
if image_ext not in supported_formats:
50+
return image_filename
51+
if os.path.exists(image_filename):
52+
with Image.open(image_filename) as img:
53+
# Determine the file format
54+
file_format = image_filename.split('.')[-1].upper()
55+
56+
# Save the image in webp format with optimization
57+
webp_filename = image_filename.replace(file_format.lower(), 'webp')
58+
img.save(
59+
webp_filename,
60+
format='WEBP',
61+
optimize=True,
62+
quality=85
63+
)
64+
if replace:
65+
os.remove(image_filename)
66+
return webp_filename
67+
else:
68+
print(f'File not found: {image_filename}')
69+
raise FileNotFoundError
70+
71+
# Check if the image is valid
72+
def is_valid_image(image_filename):
73+
"""
74+
Check if the image file is valid.
75+
param image_filename: The image file to check
76+
return: True if the image is valid, False otherwise
77+
"""
78+
try:
79+
img = Image.open(image_filename)
80+
img.verify()
81+
return True
82+
except Exception as e:
83+
print(f'Invalid image: {image_filename}')
84+
85+
def is_valid_svg(svg_filename):
86+
"""
87+
Check if the svg file is valid.
88+
param svg_filename: The svg file to check
89+
return: True if the svg is valid, False otherwise
90+
"""
91+
try:
92+
ET.parse(svg_filename) # Try to parse the XML
93+
return True # No error means it's valid
94+
except ET.ParseError:
95+
return False # If parsing fails, it's invalid

themes/hugo-bulma-blocks-theme/assets/sass/bulma.sass

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
{{ $truenoSBd := resources.Get "webfonts/TruenoSBd.otf" }}
3333
{{ $truenoBd := resources.Get "webfonts/TruenoBd.otf" }}
3434
{{ $truenoUltBlk := resources.Get "webfonts/TruenoUltBlk.otf" }}
35+
{{ $countryFlagsEmoji := resources.Get "webfonts/TwemojiCountryFlags.woff2" }}
3536

3637
@font-face
3738
font-family: 'Montserrat'
@@ -62,6 +63,9 @@
6263
src: url("{{ $truenoUltBlk.RelPermalink }}") format("opentype")
6364
font-weight: 700
6465

66+
@font-face
67+
font-family: "Twemoji Country Flags"
68+
src: url("{{ $countryFlagsEmoji.RelPermalink }}") format("opentype")
6569

6670
{{ $worksans := resources.Get "webfonts/worksans.woff2" }}
6771

Binary file not shown.

themes/hugo-bulma-blocks-theme/layouts/partials/header.html

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,6 @@
175175
src="{{ .Site.Params.uniNavHeaderUrl }}"
176176
></script>
177177

178-
<!-- Countries Flag for windows -->
179-
<!-- Added by Lova -->
180-
<!-- See https://github.com/talkjs/country-flag-emoji-polyfill -->
181-
<script type="module" defer>
182-
import { polyfillCountryFlagEmojis } from "https://cdn.skypack.dev/country-flag-emoji-polyfill";
183-
polyfillCountryFlagEmojis();
184-
</script>
185178
</head>
186179

187180
<body></body>

0 commit comments

Comments
 (0)