Skip to content

Commit 5634303

Browse files
authored
feat(route/bbc): improve bbc (cn) (#21066)
1 parent 982193c commit 5634303

File tree

2 files changed

+129
-35
lines changed

2 files changed

+129
-35
lines changed

lib/routes/bbc/utils.ts

Lines changed: 0 additions & 35 deletions
This file was deleted.

lib/routes/bbc/utils.tsx

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import { renderToString } from 'hono/jsx/dom/server';
2+
3+
const processImageAttributes = ($img) => {
4+
if (!$img.attr('src') && $img.attr('srcSet')) {
5+
const srcs = $img.attr('srcSet').split(', ');
6+
const lastSrc = srcs.at(-1);
7+
if (lastSrc) {
8+
$img.attr('src', lastSrc.split(' ')[0]);
9+
}
10+
}
11+
$img.removeAttr('srcset').removeAttr('sizes');
12+
};
13+
14+
const buildCleanFigure = (src, alt, figcaptionContent) =>
15+
renderToString(
16+
<figure>
17+
<img src={src} alt={alt} referrerpolicy="no-referrer" />
18+
{figcaptionContent && <figcaption>{figcaptionContent}</figcaption>}
19+
</figure>
20+
);
21+
22+
const cleanFigureElement = ($, figure) => {
23+
const $figure = $(figure);
24+
const $img = $figure.find('img');
25+
26+
if ($img.length === 0) {
27+
return;
28+
}
29+
30+
processImageAttributes($img);
31+
32+
let sourceText = '';
33+
let captionText = '';
34+
35+
// extract image source: (simp chinese/trad chinese)
36+
const $sourceP = $figure.find('p[class*="css-"]').first();
37+
if ($sourceP.length > 0) {
38+
const sourceSpans = $sourceP.find('span');
39+
if (sourceSpans.length >= 2) {
40+
sourceText = sourceSpans.eq(1).text().trim();
41+
}
42+
}
43+
44+
let $figcaption = $figure.find('figcaption');
45+
46+
// english version
47+
if ($figcaption.length === 0) {
48+
const $next = $figure.next();
49+
if ($next.length > 0) {
50+
$figcaption = $next.find('figcaption');
51+
// if found, remove the sibling div after extracting caption
52+
if ($figcaption.length > 0) {
53+
$next.remove();
54+
}
55+
}
56+
}
57+
58+
if ($figcaption.length > 0) {
59+
// try to find caption in specific elements, excluding visually-hidden labels
60+
const $captionParagraph = $figcaption.find('[data-testid="caption-paragraph"]');
61+
62+
if ($captionParagraph.length > 0) {
63+
captionText = $captionParagraph.text().trim();
64+
} else {
65+
// remove visually-hidden elements (like "Image caption, " labels)
66+
const $figcaptionClone = $figcaption.clone();
67+
$figcaptionClone.find('.visually-hidden, [class*="VisuallyHidden"]').remove();
68+
captionText = $figcaptionClone.text().trim();
69+
}
70+
}
71+
72+
const parts = [sourceText, captionText].filter(Boolean);
73+
const figcaptionContent = parts.join(' / ');
74+
75+
$figure.replaceWith(buildCleanFigure($img.attr('src'), $img.attr('alt') || '', figcaptionContent));
76+
};
77+
78+
const ProcessFeed = ($) => {
79+
// by default treat it as a hybrid news with video and story-body__inner
80+
let content = $('#main-content article');
81+
82+
if (content.length === 0) {
83+
// it's a video news with video and story-body
84+
content = $('div.story-body');
85+
}
86+
87+
if (content.length === 0) {
88+
// chinese version has different structure
89+
content = $('main[role="main"]');
90+
}
91+
92+
// remove useless DOMs
93+
content.find('header, section, [data-testid="bbc-logo-wrapper"]').remove();
94+
95+
// remove article title as it's already in RSS item title
96+
content.find('h1').remove();
97+
98+
content.find('noscript').each((i, e) => {
99+
$(e).parent().html($(e).html());
100+
});
101+
102+
// clean up figure elements with images
103+
content.find('figure').each((i, figure) => cleanFigureElement($, figure));
104+
105+
// handle standalone images with figcaption siblings (English version)
106+
content
107+
.find('img')
108+
.not('figure img')
109+
.each((i, img) => {
110+
const $img = $(img);
111+
processImageAttributes($img);
112+
113+
// check for figcaption sibling
114+
const $next = $img.next();
115+
if ($next.length > 0 && $next.find('figcaption').length > 0) {
116+
const captionText = $next.find('figcaption').first().text().trim();
117+
if (captionText) {
118+
$img.replaceWith(buildCleanFigure($img.attr('src'), $img.attr('alt') || '', captionText));
119+
$next.remove();
120+
}
121+
}
122+
});
123+
124+
content.find('[data-component="media-block"] figcaption').prepend('<span>View video in browser: </span>');
125+
126+
return content.html();
127+
};
128+
129+
export default { ProcessFeed };

0 commit comments

Comments
 (0)