Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 44 additions & 71 deletions lib/routes/openai/chatgpt.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
import { load } from 'cheerio';
import dayjs from 'dayjs';
import isSameOrBefore from 'dayjs/plugin/isSameOrBefore.js';

import { config } from '@/config';
import type { Route } from '@/types';
import type { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';

dayjs.extend(isSameOrBefore);
import ofetch from '@/utils/ofetch';

export const route: Route = {
path: '/chatgpt/release-notes',
categories: ['program-update'],
example: '/openai/chatgpt/release-notes',
parameters: {},
features: {
requireConfig: false,
requirePuppeteer: false,
Expand All @@ -23,7 +19,7 @@ export const route: Route = {
supportScihub: false,
},
name: 'ChatGPT - Release Notes',
maintainers: [],
maintainers: ['xbot'],
handler,
};

Expand All @@ -33,79 +29,56 @@ async function handler() {
const cacheIn = await cache.tryGet(
articleUrl,
async () => {
const returns = [];

const pageResponse = await got({
method: 'get',
url: articleUrl,
});

const $ = load(pageResponse.data);
const page = JSON.parse($('script#__NEXT_DATA__').text()); // 页面貌似是用 Next 渲染的,有现成的 JSON 数据可以直接 parse,而避免用 cheerio 去解析主体内容的 HTML
const response = await ofetch(articleUrl);

const feedTitle = page.props.pageProps.articleContent.title;
const feedDesc = page.props.pageProps.articleContent.description;
const $author = page.props.pageProps.articleContent.author;
const authorName = $author.name;
const $ = load(response);
const articleContent = $('.article-content');

const $blocks = page.props.pageProps.articleContent.blocks;

const anchorDay = dayjs();
let heading = null,
articleObj = {};
let year = anchorDay.year();
let prevMonth = -1;
if (articleContent.length === 0) {
throw new Error('Failed to find article content.');
}

for (const block of $blocks) {
const text = (block.text || '').trim();
if (!text) {
continue;
}
if (block.type === 'subheading') {
if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
articleObj = {};
const feedTitle = $('h1').first().text();
const feedDesc = 'ChatGPT Release Notes';

const items = $('h1', articleContent)
.toArray()
.map((element) => {
const $h1 = $(element);
const text = $h1.text().trim();

const dateMatch = text.match(/(\w+\s+\d+[stndrh]*,\s+\d{4})/i);
let pubDate: Date | undefined;
if (dateMatch) {
const dateStr = dateMatch[1];
const parsedDate = dayjs(dateStr, ['MMMM Do, YYYY', 'MMMM D, YYYY'], 'en');
if (parsedDate.isValid()) {
pubDate = parsedDate.toDate();
}
}

heading = text;
const $nextSiblings = $h1.nextUntil('h1');
const $firstH2 = $nextSiblings.filter('h2').first();
const firstH2Text = $firstH2.text().trim();

articleObj.title = heading;
articleObj.author = authorName;
articleObj.category = 'ChatGPT';
articleObj.link = articleUrl + '#' + block.idAttribute;
articleObj.guid = articleUrl + '#' + block.idAttribute;
articleObj.description = '';
const title = firstH2Text || text;

// 目前 ChatGPT Release Notes 页面并没有写入年份,所以只能靠猜
// 当前的正则表达式只支持 (月份英文+空格+日期数字) 的格式
const matchesPubDate = heading.match(/\((\w+\s+\d{1,2})\)$/);
// 实现:当年度交替时,年份减去 1
if (matchesPubDate !== null) {
const curMonth = 1 + 'Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec'.split(',').indexOf(matchesPubDate[1].slice(0, 3));
if (prevMonth !== -1 && prevMonth < curMonth) {
year--; // 年度交替:上一个月份数小于当前月份数;但排除 prevMonth==-1 的初始化情况
}

prevMonth = curMonth;
const pubDay = dayjs(`${matchesPubDate[1]}, ${year}`, ['MMMM D, YYYY', 'MMM D, YYYY'], 'en', true);
// 从 ISO(GMT)时间的字符串(使用字符串替换的方式)替换成 US/Pacific PST 时区的表达
articleObj.pubDate = dayjs(pubDay.toISOString().replace(/\.\d{3}Z$/, '-08:00'));

const linkAnchor = pubDay.format('YYYY_MM_DD');
articleObj.guid = articleUrl + '#' + linkAnchor;
}
} else {
articleObj.description += block.text.trim() + '\n\n';
}
}
const content = $nextSiblings
.toArray()
.map((el) => $(el).prop('outerHTML'))
.join('');
const description = content;

if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
}
return {
guid: `${articleUrl}#${pubDate ? pubDate.getTime() : text}`,
title,
link: articleUrl,
pubDate,
description,
};
}) as DataItem[];

return { feedTitle, feedDesc, items: returns };
return { feedTitle, feedDesc, items };
},
config.cache.routeExpire,
false
Expand Down