Skip to content

Commit dc68ecb

Browse files
authored
feat(route): add support for f95zone.to (#21042)
* feat(route): add f95zone routes * fix(route): fix f95zone threads route for folo parse * fix(route): parse f95zone thread guid from [YYYY-MM-DD] in title * fix(route): use update date in thread title as pubDate * fix(route/f95zone): revert image URL processing * fix(route/f95zone): reenable image URL processing for telegram-bot * feat(route/f95zone): support post and thread route * feat(route/f95zone): optimize title * feat(route/f95zone): optimize image display * feat(route/f95zone): optimize image display * feat(route/f95zone): reoptimize img-elements processing * feat(route/f95zone): reset to the original route pattern * feat(route/f95zone): optimize routes * feat(route/f95zone): optimize content processing * feat(route/f95zone): reduce code * fix(route/f95zone): use parseDate for proper date handling * fix(route/f95zone): remove broken radar * fix(route/f95zone): remove invalid radar config and improve docs - Remove radar config from post route (postId is in URL hash, cannot be extracted) - Add empty radar array to prevent Folo parsing error - Add detailed URL format documentation for both routes - Explain how to convert browser URL to subscription URL * fix(route/f95zone): fix invalid example URL * fix(route/f95zone): use internally generated UA * fix(route/f95zone): only fetch the first and the last page * fix(route/f95zone): remove unnecessary sorting logic
1 parent 5ae169a commit dc68ecb

File tree

5 files changed

+274
-0
lines changed

5 files changed

+274
-0
lines changed

lib/config.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ type ConfigEnvKeys =
102102
| 'EH_STAR'
103103
| 'EH_IMG_PROXY'
104104
| `EMAIL_CONFIG_${string}`
105+
| 'F95ZONE_COOKIE'
105106
| 'FANBOX_SESSION_ID'
106107
| 'FANFOU_CONSUMER_KEY'
107108
| 'FANFOU_CONSUMER_SECRET'
@@ -392,6 +393,9 @@ export type Config = {
392393
email: {
393394
config: Record<string, string | undefined>;
394395
};
396+
f95zone: {
397+
cookie?: string;
398+
};
395399
fanbox: {
396400
session?: string;
397401
};
@@ -871,6 +875,9 @@ const calculateValue = () => {
871875
email: {
872876
config: email_config,
873877
},
878+
f95zone: {
879+
cookie: envs.F95ZONE_COOKIE,
880+
},
874881
fanbox: {
875882
session: envs.FANBOX_SESSION_ID,
876883
},

lib/routes/f95zone/namespace.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: 'F95zone',
5+
url: 'f95zone.to',
6+
description: 'F95zone is a community for adult games and animations.',
7+
};

lib/routes/f95zone/post.ts

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import { load } from 'cheerio';
2+
3+
import { config } from '@/config';
4+
import type { Route } from '@/types';
5+
import ofetch from '@/utils/ofetch';
6+
import { parseDate } from '@/utils/parse-date';
7+
8+
import { processContent } from './utils';
9+
10+
export const route: Route = {
11+
path: '/post/:thread/:postId',
12+
name: 'Post',
13+
maintainers: ['wsmbsbbz'],
14+
example: '/f95zone/post/vicineko-collection-2025-06-14-vicineko.84596/post-5909830',
15+
categories: ['game'],
16+
description: `Track content changes of a specific post. Uses the date \`[yyyy-mm-dd]\` in title for update detection.
17+
18+
URL format: \`https://f95zone.to/threads/{thread}/#post-{id}\` → replace \`#\` with \`/\` to get \`/f95zone/post/{thread}/post-{id}\`
19+
20+
Example: \`https://f95zone.to/threads/vicineko-collection-2025-06-14-vicineko.84596/#post-5909830\` → \`/f95zone/post/vicineko-collection-2025-06-14-vicineko.84596/post-5909830\`
21+
22+
Note: This route does not support Radar auto-detection because the post ID is in the URL hash (after \`#\`), which cannot be extracted by Radar. You need to manually construct the subscription URL.`,
23+
parameters: {
24+
thread: 'Thread slug with ID',
25+
postId: 'Post ID with `post-` prefix, replace `#` with `/` from browser URL',
26+
},
27+
features: {
28+
requireConfig: [
29+
{
30+
name: 'F95ZONE_COOKIE',
31+
optional: true,
32+
description: 'F95zone cookie for accessing restricted content.',
33+
},
34+
],
35+
requirePuppeteer: false,
36+
antiCrawler: false,
37+
supportBT: false,
38+
supportPodcast: false,
39+
supportScihub: false,
40+
nsfw: true,
41+
},
42+
radar: [],
43+
handler: async (ctx) => {
44+
const { thread, postId } = ctx.req.param();
45+
const baseUrl = 'https://f95zone.to';
46+
const link = `${baseUrl}/threads/${thread}/#${postId}`;
47+
48+
const response = await ofetch(link, {
49+
headers: {
50+
referer: baseUrl,
51+
...(config.f95zone.cookie ? { cookie: config.f95zone.cookie } : {}),
52+
},
53+
});
54+
55+
const $ = load(response);
56+
const title = $('h1.p-title-value').text().trim();
57+
const post = $(`article[data-content="${postId}"]`);
58+
const content = post.find('.bbWrapper').html() || '';
59+
const author = post.attr('data-author') || '';
60+
const postDate = post.find('time.u-dt').first().attr('datetime');
61+
const tags = $('a.tagItem')
62+
.toArray()
63+
.map((el) => $(el).text().trim());
64+
65+
// Extract [yyyy-mm-dd] from title for update tracking
66+
const dateMatch = title.match(/\[(\d{4}-\d{2}-\d{2})\]/);
67+
const updateDate = dateMatch?.[1];
68+
69+
return {
70+
title: `[F95zone] ${title}`,
71+
link,
72+
item: [
73+
{
74+
title: `[Updated] ${title}`,
75+
link,
76+
guid: updateDate ? `${link}_${updateDate}` : link,
77+
description: processContent(content),
78+
pubDate: updateDate ? parseDate(updateDate) : postDate ? parseDate(postDate) : undefined,
79+
author,
80+
category: tags,
81+
},
82+
],
83+
};
84+
},
85+
};

lib/routes/f95zone/thread.ts

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import { load } from 'cheerio';
2+
3+
import { config } from '@/config';
4+
import type { DataItem, Route } from '@/types';
5+
import ofetch from '@/utils/ofetch';
6+
import { parseDate } from '@/utils/parse-date';
7+
8+
import { processContent } from './utils';
9+
10+
export const route: Route = {
11+
path: '/thread/:thread',
12+
name: 'Thread',
13+
maintainers: ['wsmbsbbz'],
14+
example: '/f95zone/thread/ubermation-collection-2026-01-19-uebermation-uebermation.231247',
15+
categories: ['game'],
16+
description: `Track replies in a thread. Fetches the first page and the last page.
17+
18+
URL format: \`https://f95zone.to/threads/{thread}/\` → use \`{thread}\` as the parameter.
19+
20+
Example: \`https://f95zone.to/threads/ubermation-collection-2026-01-19-uebermation-uebermation.231247/\` → \`/f95zone/thread/ubermation-collection-2026-01-19-uebermation-uebermation.231247\`
21+
22+
Note: If you want to track a specific post's content changes (e.g., first post with download links), use the \`/f95zone/post\` route instead.`,
23+
parameters: {
24+
thread: 'Thread slug with ID, copy from browser URL after `/threads/`',
25+
},
26+
features: {
27+
requireConfig: [
28+
{
29+
name: 'F95ZONE_COOKIE',
30+
optional: true,
31+
description: 'F95zone cookie for accessing restricted content.',
32+
},
33+
],
34+
requirePuppeteer: false,
35+
antiCrawler: false,
36+
supportBT: false,
37+
supportPodcast: false,
38+
supportScihub: false,
39+
nsfw: true,
40+
},
41+
radar: [
42+
{
43+
source: ['f95zone.to/threads/:thread/*'],
44+
target: '/thread/:thread',
45+
},
46+
],
47+
handler: async (ctx) => {
48+
const { thread } = ctx.req.param();
49+
const baseUrl = 'https://f95zone.to';
50+
const threadLink = `${baseUrl}/threads/${thread}/`;
51+
52+
const headers = {
53+
referer: baseUrl,
54+
...(config.f95zone.cookie ? { cookie: config.f95zone.cookie } : {}),
55+
};
56+
57+
const firstPageResponse = await ofetch(threadLink, { headers });
58+
const $firstPage = load(firstPageResponse);
59+
const title = $firstPage('h1.p-title-value').text().trim();
60+
61+
const lastPageLink = $firstPage('ul.pageNav-main li.pageNav-page:last-child a').attr('href');
62+
const totalPages = lastPageLink ? Number.parseInt(lastPageLink.match(/page-(\d+)/)?.[1] || '1', 10) : 1;
63+
64+
const extractPosts = ($: ReturnType<typeof load>): DataItem[] =>
65+
$('article.message')
66+
.toArray()
67+
.flatMap((article) => {
68+
const $article = $(article);
69+
const postId = $article.attr('data-content')?.replace('post-', '');
70+
if (!postId) {
71+
return [];
72+
}
73+
74+
const author = $article.find('.message-name a').text().trim();
75+
const postDate = $article.find('time.u-dt').attr('datetime');
76+
const content = $article.find('.bbWrapper').html() || '';
77+
const postLink = `${threadLink}post-${postId}`;
78+
79+
// Get post number from the attribution list (e.g., "#717")
80+
const postNumber = $article.find('.message-attribution-opposite--list li:last-child a').text().trim().replace('#', '') || postId;
81+
82+
return {
83+
title: `#${postNumber} by ${author}`,
84+
link: postLink,
85+
guid: postLink,
86+
description: processContent(content),
87+
pubDate: postDate ? parseDate(postDate) : undefined,
88+
author,
89+
};
90+
});
91+
92+
// Extract posts from the first page
93+
const allPosts: DataItem[] = [...extractPosts($firstPage)];
94+
95+
// Fetch the last page if there are multiple pages
96+
if (totalPages > 1) {
97+
const lastPageResponse = await ofetch(`${threadLink}page-${totalPages}`, { headers });
98+
allPosts.push(...extractPosts(load(lastPageResponse)));
99+
}
100+
101+
return {
102+
title: `[F95zone] ${title}`,
103+
link: threadLink,
104+
item: allPosts,
105+
};
106+
},
107+
};

lib/routes/f95zone/utils.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { load } from 'cheerio';
2+
3+
const ALLOWED_TAGS = new Set(['div', 'span', 'p', 'br', 'b', 'strong', 'i', 'em', 'u', 's', 'a', 'img', 'ul', 'ol', 'li', 'blockquote', 'hr', 'pre', 'code']);
4+
const ALLOWED_ATTRS: Record<string, string[]> = {
5+
a: ['href', 'target', 'rel'],
6+
img: ['src', 'alt', 'title', 'style'],
7+
div: ['style'],
8+
span: ['style'],
9+
};
10+
11+
export const processContent = (html: string): string => {
12+
const $ = load(html);
13+
14+
// Process images: use original URLs, remove duplicates
15+
const seenImages = new Set<string>();
16+
$('img').each((_, el) => {
17+
const $img = $(el);
18+
const $parent = $img.parent('a');
19+
let src = $parent.attr('href') || $img.attr('data-src') || $img.attr('src') || '';
20+
src = src.replace('/thumb/', '/');
21+
22+
if (!src || src.startsWith('data:') || seenImages.has(src)) {
23+
$img.remove();
24+
} else {
25+
seenImages.add(src);
26+
$img.attr('src', src).removeAttr('data-src');
27+
if ($parent.length) {
28+
$parent.replaceWith($img);
29+
}
30+
}
31+
});
32+
33+
// Remove unwanted tags completely
34+
$('button, script, style, noscript').remove();
35+
36+
// Remove disallowed tags but keep content
37+
let changed = true;
38+
while (changed) {
39+
changed = false;
40+
$('*').each((_, el) => {
41+
if (el.type === 'tag' && !ALLOWED_TAGS.has(el.name)) {
42+
$(el).replaceWith($(el).html() || '');
43+
changed = true;
44+
return false;
45+
}
46+
});
47+
}
48+
49+
// Clean attributes
50+
$('*').each((_, el) => {
51+
if (el.type !== 'tag') {
52+
return;
53+
}
54+
const allowed = new Set(ALLOWED_ATTRS[el.name] || []);
55+
for (const attr of Object.keys(el.attribs || {})) {
56+
if (!allowed.has(attr)) {
57+
$(el).removeAttr(attr);
58+
}
59+
}
60+
});
61+
62+
// Remove empty divs
63+
$('div')
64+
.filter((_, el) => !$(el).html()?.trim())
65+
.remove();
66+
67+
return $.html() || '';
68+
};

0 commit comments

Comments
 (0)