Skip to content

Commit f094753

Browse files
authored
fix: add support for newer medium rss feed format (#150)
1 parent a71bd6d commit f094753

File tree

2 files changed

+42
-13
lines changed

2 files changed

+42
-13
lines changed

app/util/medium.ts

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,20 @@ export const getArticle = async (index: string, username: string) => {
1111

1212
// @ts-ignore
1313
res?.items.forEach(element => {
14-
const thumbnail = extractFirstImageFromHTML(element.content)
14+
const thumbnail = extractFirstImageFromHTML(element.content || element.description)
1515
if (thumbnail) {
1616
element.thumbnail = thumbnail
1717
fixItem.push(element)
1818
}
1919
});
2020

21-
const { title, published: pubDate, link: url, thumbnail, content: description } = fixItem[
21+
const { title, published: pubDate, link: url, thumbnail, content: content, description: desc } = fixItem[
2222
// @ts-ignore
2323
index || 0
2424
];
2525

26+
const description = content || desc;
27+
2628

2729
const responseThumbnail = await axios(thumbnail.src, { responseType: 'arraybuffer' });
2830
const base64Img = Buffer.from(responseThumbnail.data, 'binary').toString('base64');
@@ -32,19 +34,27 @@ export const getArticle = async (index: string, username: string) => {
3234
const imgType = imgTypeArr[imgTypeArr.length - 1];
3335

3436
const convertedThumbnail = `data:image/${imgType};base64,${base64Img}`;
37+
38+
39+
const cleanedDescription = stripHTML(description);
3540
return {
3641
title: title.length > 80 ? title.substring(0, 80) + ' ...' : title,
3742
thumbnail: convertedThumbnail,
3843
url,
3944
date: moment(pubDate).format('DD MMM YYYY, HH:mm'),
4045
description:
41-
description
42-
.replace(/<h3>.*<\/h3>|<figcaption>.*<\/figcaption>|<[^>]*>/gm, '')
46+
cleanedDescription
4347
.substring(0, 60) + '...',
4448
};
4549
};
4650

4751

52+
function stripHTML(text: string) {
53+
const dom = new JSDOM(text);
54+
const textContent = dom.window.document.body.textContent || '';
55+
const cleanText = textContent.trim();
56+
return cleanText;
57+
}
4858

4959

5060
// Define a type for the image data
@@ -58,15 +68,21 @@ function extractFirstImageFromHTML(html: string): ImageData | null {
5868
const dom = new JSDOM(html);
5969
const document = dom.window.document;
6070

61-
// Select the first figure that contains an image
62-
const figure = document.querySelector('figure img');
63-
if (figure) {
64-
const img = figure as HTMLImageElement; // Ensure it's treated as an image element
65-
// const figcaption = figure.parentElement ? figure.parentElement.querySelector('figcaption') : null;
66-
return {
67-
src: img.src,
68-
alt: img.alt || '', // Use an empty string if alt is not present
69-
};
71+
// Try different strategies to find the first image
72+
const imageSelectors = [
73+
'figure img', // Case 1: Image inside figure
74+
'.medium-feed-image img', // Case 2: Medium feed specific
75+
'img' // Case 3: Any image as fallback
76+
];
77+
78+
for (const selector of imageSelectors) {
79+
const img = document.querySelector(selector) as HTMLImageElement;
80+
if (img) {
81+
return {
82+
src: img.src,
83+
alt: img.alt || '', // Use empty string if alt is not present
84+
};
85+
}
7086
}
7187

7288
return null; // Return null if no images are found

test.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<html>
2+
<head>
3+
<title>Test</title>
4+
</head>
5+
<body>
6+
<h1>Test</h1>
7+
<h2>Iman Example for Medium Feed Older Version</h2>
8+
<img src="http://localhost:3000/medium/@imantumorang/0">
9+
<h2>aarafat27 Example for Medium Newer Version</h2>
10+
<img src="http://localhost:3000/medium/@aarafat27/0">
11+
</body>
12+
</html>
13+

0 commit comments

Comments
 (0)