Skip to content

Commit 4047e34

Browse files
authored
fix: quanta magazine (#21031)
* exclude economist * fix: use String.fromCodePoint instead of fromCharCode * delete old version * Update lib/routes/quantamagazine/archive.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * obtain the author information differently ---------
1 parent e6598a0 commit 4047e34

File tree

3 files changed

+146
-81
lines changed

3 files changed

+146
-81
lines changed

lib/routes-deprecated/quantamagazine/archive.js

Lines changed: 0 additions & 81 deletions
This file was deleted.
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import { load } from 'cheerio';
2+
3+
import type { Route } from '@/types';
4+
import cache from '@/utils/cache';
5+
import ofetch from '@/utils/ofetch';
6+
import { parseDate } from '@/utils/parse-date';
7+
8+
const rootUrl = 'https://www.quantamagazine.org';
9+
10+
const processArticleContent = (html: string | null, articleLink?: string): string => {
11+
if (!html) {
12+
return '';
13+
}
14+
15+
// Handle LaTeX formulas
16+
let processed = html.replaceAll(/\$latex([\S\s]+?)\$/g, '<img align="center" src="https://latex.codecogs.com/png.latex?$1"/>');
17+
18+
// Handle embedded images with captions
19+
processed = processed.replaceAll(/<div id=[\S\s]+?"src":"(https?:?[\S\s]+?)",[\S\s]+?"caption":"([\S\s]*?)",[\S\s]+?<\/div>?/g, (_match, src, cap) => {
20+
const imgUrl = src.replaceAll(/\\([^nu])/g, '$1');
21+
const img = `<img src="${imgUrl}" />`;
22+
23+
const noBS = cap.replaceAll(/\\([^nu])/g, '$1');
24+
const removeNL = noBS.replaceAll(String.raw`\n`, '');
25+
const caption = removeNL.replaceAll(/\\u(\d{1,3}[a-z]\d?|\d{4}?)/g, (_omit, s) => String.fromCodePoint(Number.parseInt(s, 16)));
26+
27+
return `<figure>${img}<figcaption>${caption}</figcaption></figure>`;
28+
});
29+
30+
// Handle lottie-player animations
31+
// Multiple lottie-players might exist (desktop/mobile versions) - replace all with placeholders first
32+
const lottieMatches = [...processed.matchAll(/<lottie-player[^>]*src="([^"]+)"[^>]*><\/lottie-player>/g)];
33+
const uniqueAnimations = new Set();
34+
35+
// Replace each lottie-player, but track unique animations by filename
36+
for (const match of lottieMatches) {
37+
const src = match[1];
38+
// Extract animation name (without Desktop/Mobile suffix)
39+
const animName =
40+
src
41+
.split('/')
42+
.pop()
43+
?.replace(/-(Desktop|Mobile).*\.json$/, '') || 'animation';
44+
45+
if (uniqueAnimations.has(animName)) {
46+
// Duplicate (mobile/desktop variant): just remove it
47+
processed = processed.replace(match[0], '');
48+
} else {
49+
// First occurrence: replace with badge that links to the article
50+
uniqueAnimations.add(animName);
51+
const linkUrl = articleLink || rootUrl;
52+
const badgeImg = 'https://img.shields.io/badge/🎬-View_Interactive_Animation-0066CC?style=for-the-badge';
53+
const replacement = `<p style="text-align: center; margin: 20px 0;"><a href="${linkUrl}" target="_blank"><img src="${badgeImg}" alt="View Interactive Animation" /></a></p>`;
54+
processed = processed.replace(match[0], replacement);
55+
}
56+
}
57+
58+
return processed;
59+
};
60+
61+
export const handler = async (ctx) => {
62+
const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 20;
63+
64+
const apiUrl = `${rootUrl}/wp-json/wp/v2/posts`;
65+
const posts = await ofetch(apiUrl, {
66+
query: {
67+
per_page: limit,
68+
page: 1,
69+
_embed: 'author',
70+
},
71+
});
72+
73+
const items = await Promise.all(
74+
posts.map((item) =>
75+
cache.tryGet(item.link, async () => {
76+
// Get author name from embedded data
77+
const authorName = item._embedded?.author?.[0]?.name || '';
78+
79+
// Fetch full article content from the page
80+
const response = await ofetch(item.link, {
81+
parseResponse: (txt) => txt,
82+
});
83+
const $ = load(response);
84+
85+
// Remove unnecessary elements
86+
$('.header-spacer, .scale1.mha, .post__title__author-date, .post__aside--divider').remove();
87+
$('.hide-on-print, .post__aside__pullquote, aside.post__sidebar.hide, nav[data-glide-el]').remove();
88+
$('.post__footer, .post__title__author-date').remove();
89+
// Remove video placeholder images (the poster is already in the video element)
90+
$('.iframe-placeholder').remove();
91+
92+
const contents = processArticleContent($('#postBody').html(), item.link);
93+
94+
return {
95+
title: item.title.rendered,
96+
author: authorName,
97+
description: contents,
98+
link: item.link,
99+
guid: item.link,
100+
pubDate: parseDate(item.date),
101+
};
102+
})
103+
)
104+
);
105+
106+
return {
107+
title: 'Quanta Magazine',
108+
link: rootUrl,
109+
item: items,
110+
};
111+
};
112+
113+
export const route: Route = {
114+
path: '/archive',
115+
name: 'Archive',
116+
url: 'quantamagazine.org',
117+
maintainers: ['emdoe'],
118+
handler,
119+
example: '/quantamagazine/archive',
120+
parameters: {},
121+
description: 'Get the latest articles from Quanta Magazine.',
122+
categories: ['new-media'],
123+
124+
features: {
125+
requireConfig: false,
126+
requirePuppeteer: false,
127+
antiCrawler: false,
128+
supportRadar: true,
129+
supportBT: false,
130+
supportPodcast: false,
131+
supportScihub: false,
132+
},
133+
radar: [
134+
{
135+
source: ['quantamagazine.org'],
136+
target: '/archive',
137+
},
138+
],
139+
};
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: 'Quanta Magazine',
5+
url: 'quantamagazine.org',
6+
lang: 'en',
7+
};

0 commit comments

Comments
 (0)