Skip to content

Commit 70576d0

Browse files
authored
feat(route): fetch full content for changelog items (#20953)
* feat(perplexity): fetch full content for changelog items - Fetch full article content for each changelog item - Use Puppeteer to render JavaScript-rendered pages - Remove hero image from description - Cache parsed content using cache.tryGet * fix(perplexity): resolve review comments - Simplify CSS selector for content extraction - Fix dangling promises issue by properly returning and using Promise results - Ensure cache mechanism works correctly by returning new objects instead of mutating - Remove redundant fallback logic
1 parent 803c6fc commit 70576d0

File tree

1 file changed

+46
-4
lines changed

1 file changed

+46
-4
lines changed

lib/routes/perplexity/changelog.ts

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type { Context } from 'hono';
33

44
import type { Data, DataItem, Route } from '@/types';
55
import { ViewType } from '@/types';
6+
import cache from '@/utils/cache';
67
import logger from '@/utils/logger';
78
import { parseDate } from '@/utils/parse-date';
89
import { getPuppeteerPage } from '@/utils/puppeteer';
@@ -15,17 +16,16 @@ export const handler = async (ctx: Context): Promise<Data> => {
1516

1617
logger.http(`Fetching Perplexity changelog from ${targetUrl}`);
1718

18-
const { page, destory } = await getPuppeteerPage(targetUrl, {
19+
const { page, destory, browser } = await getPuppeteerPage(targetUrl, {
1920
onBeforeLoad: async (page) => {
2021
await page.setRequestInterception(true);
2122
page.on('request', (request) => {
2223
request.resourceType() === 'document' ? request.continue() : request.abort();
2324
});
2425
},
2526
});
26-
const html = await page.evaluate(() => document.documentElement.innerHTML);
27-
await destory();
2827

28+
const html = await page.evaluate(() => document.documentElement.innerHTML);
2929
const $ = load(html);
3030
const language = $('html').attr('lang') ?? 'en';
3131

@@ -94,11 +94,53 @@ export const handler = async (ctx: Context): Promise<Data> => {
9494
})
9595
.filter((item): item is DataItem => item !== null);
9696

97+
// Fetch full content for each item using the same browser session
98+
const resultItems = await Promise.all(
99+
items.slice(0, limit).map(async (item) => {
100+
if (!item.link) {
101+
return item;
102+
}
103+
return await cache.tryGet(item.link, async () => {
104+
logger.http(`Fetching full content for ${item.link!}`);
105+
106+
// Create a new page in the same browser session
107+
const contentPage = await browser.newPage();
108+
109+
// Set request interception for this page
110+
await contentPage.setRequestInterception(true);
111+
contentPage.on('request', (request) => {
112+
request.resourceType() === 'document' ? request.continue() : request.abort();
113+
});
114+
115+
// Navigate to the item link
116+
await contentPage.goto(item.link!, { waitUntil: 'domcontentloaded' });
117+
118+
const contentHtml = await contentPage.evaluate(() => document.documentElement.innerHTML);
119+
await contentPage.close();
120+
121+
const $content = load(contentHtml);
122+
123+
// Find the main article content - RichTextContainer with substantial text
124+
// Look for elements with framer-text class containing actual content
125+
const contentContainer = $content('div#main > div > div > div[data-framer-component-type="RichTextContainer"]').first();
126+
const fullContent = contentContainer.html()?.trim() || '';
127+
128+
return {
129+
...item,
130+
description: fullContent || item.description,
131+
};
132+
});
133+
})
134+
);
135+
136+
// Close the browser session after all requests are done
137+
await destory();
138+
97139
return {
98140
title: $('title').text() || 'Perplexity Changelog',
99141
description: $('meta[name="description"], meta[property="og:description"]').first().attr('content') || 'Latest updates and changes from Perplexity',
100142
link: targetUrl,
101-
item: items.slice(0, limit),
143+
item: resultItems,
102144
allowEmpty: true,
103145
image: $('meta[property="og:image"]').attr('content'),
104146
language: language as 'en',

0 commit comments

Comments
 (0)