Skip to content

Commit 57e0cee

Browse files
committed
Add ChatGPT Atlas release notes route
- Add route: /openai/chatgpt-atlas/release-notes - Parse release notes from OpenAI help center - Use Puppeteer to bypass Cloudflare protection - Extract build version, date, and content sections - Cache results for 24 hours - Proper resource cleanup (browser.close())
1 parent ad3acba commit 57e0cee

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

lib/routes/openai/chatgpt-atlas.ts

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import { load } from 'cheerio';
2+
import dayjs from 'dayjs';
3+
4+
import type { DataItem, Route } from '@/types';
5+
import cache from '@/utils/cache';
6+
import puppeteer from '@/utils/puppeteer';
7+
8+
export const route: Route = {
9+
path: '/chatgpt-atlas/release-notes',
10+
categories: ['program-update'],
11+
example: '/openai/chatgpt-atlas/release-notes',
12+
features: {
13+
requireConfig: false,
14+
requirePuppeteer: true,
15+
antiCrawler: false,
16+
supportBT: false,
17+
supportPodcast: false,
18+
supportScihub: false,
19+
},
20+
name: 'ChatGPT Atlas - Release Notes',
21+
maintainers: ['xbot'],
22+
handler,
23+
};
24+
25+
async function handler() {
26+
const articleUrl = 'https://help.openai.com/en/articles/12591856-chatgpt-atlas-release-notes';
27+
28+
const cacheIn = await cache.tryGet(
29+
articleUrl,
30+
async () => {
31+
const browser = await puppeteer();
32+
const page = await browser.newPage();
33+
await page.setRequestInterception(true);
34+
page.on('request', (request) => {
35+
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
36+
});
37+
await page.goto(articleUrl, {
38+
waitUntil: 'domcontentloaded',
39+
});
40+
const html = await page.evaluate(() => document.documentElement.innerHTML);
41+
await page.close();
42+
43+
const $ = load(html);
44+
const articleContent = $('.article-content');
45+
46+
if (articleContent.length === 0) {
47+
throw new Error('Failed to find article content. Possible Cloudflare protection.');
48+
}
49+
50+
const feedTitle = $('h1').first().text();
51+
const feedDesc = 'ChatGPT Atlas Release Notes';
52+
53+
const items: DataItem[] = [];
54+
55+
let currentItem: DataItem | null = null;
56+
let currentSection = '';
57+
let currentSectionContent = '';
58+
59+
articleContent.children().each((_, element) => {
60+
const $elem = $(element);
61+
const tagName = element.tagName;
62+
const text = $elem.text().trim();
63+
64+
if (tagName === 'h1') {
65+
processH1(text);
66+
} else if (tagName === 'h2' && currentItem) {
67+
processH2(text);
68+
} else if (tagName === 'ul' && currentItem) {
69+
processUl($elem);
70+
} else if (tagName === 'p' && currentItem && currentItem.title && text && !text.toLowerCase().includes('public link to lgpl bundle')) {
71+
currentSectionContent += `<p>${text}</p>`;
72+
}
73+
});
74+
75+
function processH1(text: string) {
76+
if (currentItem && currentItem.title) {
77+
if (currentSection) {
78+
currentItem.description += `<h3>${currentSection}</h3>`;
79+
currentItem.description += currentSectionContent;
80+
}
81+
items.push({
82+
guid: articleUrl + '#' + (currentItem.pubDate ? currentItem.pubDate.getTime() : currentItem.title),
83+
title: currentItem.title,
84+
link: articleUrl,
85+
pubDate: currentItem.pubDate,
86+
description: currentItem.description,
87+
});
88+
}
89+
90+
const dateMatch = text.match(/(\w+\s+\d+[stndrh]*,\s+\d{4})/i);
91+
let pubDate: Date | undefined;
92+
if (dateMatch) {
93+
const dateStr = dateMatch[1];
94+
const parsedDate = dayjs(dateStr, ['MMMM Do, YYYY', 'MMMM D, YYYY'], 'en');
95+
if (parsedDate.isValid()) {
96+
pubDate = parsedDate.toDate();
97+
}
98+
}
99+
100+
const buildMatch = text.match(/(?:Build\s*Number\s*:|Build\s*:)\s*(\d+\.\d+\.\d+\.\d+)/i);
101+
let buildInfo = '';
102+
let titleText = text;
103+
104+
if (buildMatch) {
105+
buildInfo = `<p>Build: ${buildMatch[1]}</p>`;
106+
titleText = text.replace(buildMatch[0], '').trim();
107+
}
108+
109+
currentItem = {
110+
title: titleText,
111+
link: articleUrl,
112+
pubDate,
113+
description: buildInfo || '',
114+
};
115+
currentSection = '';
116+
currentSectionContent = '';
117+
}
118+
119+
function processH2(text: string) {
120+
if (!currentItem) {
121+
return;
122+
}
123+
if (currentItem.title && currentSection) {
124+
currentItem.description += `<h3>${currentSection}</h3>`;
125+
currentItem.description += currentSectionContent;
126+
}
127+
128+
currentSection = text;
129+
currentSectionContent = '';
130+
}
131+
132+
function processUl($elem: cheerio.Cheerio<cheerio.Element>) {
133+
const listItems = $elem
134+
.find('li')
135+
.toArray()
136+
.map((li) => {
137+
const $li = $(li);
138+
const liText = $li.find('p.no-margin').length > 0 ? $li.find('p.no-margin').text().trim() : $li.text().trim();
139+
return liText ? `<li>${liText}</li>` : null;
140+
})
141+
.filter(Boolean);
142+
if (listItems.length > 0) {
143+
currentSectionContent += `<ul>${listItems.join('')}</ul>`;
144+
}
145+
}
146+
147+
if (currentItem && currentItem.title) {
148+
if (currentSection) {
149+
currentItem.description += `<h3>${currentSection}</h3>`;
150+
currentItem.description += currentSectionContent;
151+
}
152+
items.push({
153+
guid: articleUrl + '#' + (currentItem.pubDate ? currentItem.pubDate.getTime() : currentItem.title),
154+
title: currentItem.title,
155+
link: articleUrl,
156+
pubDate: currentItem.pubDate,
157+
description: currentItem.description,
158+
});
159+
}
160+
161+
await browser.close();
162+
163+
return { feedTitle, feedDesc, items };
164+
},
165+
86400,
166+
false
167+
);
168+
169+
return {
170+
title: cacheIn.feedTitle,
171+
description: cacheIn.feedDesc,
172+
link: articleUrl,
173+
item: cacheIn.items,
174+
};
175+
}

0 commit comments

Comments
 (0)