Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 42 additions & 74 deletions lib/routes/openai/chatgpt.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
import { load } from 'cheerio';
import dayjs from 'dayjs';
import isSameOrBefore from 'dayjs/plugin/isSameOrBefore.js';

import { config } from '@/config';
import type { Route } from '@/types';
import type { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';

dayjs.extend(isSameOrBefore);
import ofetch from '@/utils/ofetch';

export const route: Route = {
path: '/chatgpt/release-notes',
categories: ['program-update'],
example: '/openai/chatgpt/release-notes',
parameters: {},
features: {
requireConfig: false,
requirePuppeteer: false,
Expand All @@ -23,7 +19,7 @@ export const route: Route = {
supportScihub: false,
},
name: 'ChatGPT - Release Notes',
maintainers: [],
maintainers: ['xbot'],
handler,
};

Expand All @@ -33,79 +29,51 @@ async function handler() {
const cacheIn = await cache.tryGet(
articleUrl,
async () => {
const returns = [];

const pageResponse = await got({
method: 'get',
url: articleUrl,
});

const $ = load(pageResponse.data);
const page = JSON.parse($('script#__NEXT_DATA__').text()); // 页面貌似是用 Next 渲染的,有现成的 JSON 数据可以直接 parse,而避免用 cheerio 去解析主体内容的 HTML

const feedTitle = page.props.pageProps.articleContent.title;
const feedDesc = page.props.pageProps.articleContent.description;
const $author = page.props.pageProps.articleContent.author;
const authorName = $author.name;

const $blocks = page.props.pageProps.articleContent.blocks;

const anchorDay = dayjs();
let heading = null,
articleObj = {};
let year = anchorDay.year();
let prevMonth = -1;

for (const block of $blocks) {
const text = (block.text || '').trim();
if (!text) {
continue;
}
if (block.type === 'subheading') {
if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
articleObj = {};
}
const response = await ofetch(articleUrl);

heading = text;
const $ = load(response);
const articleContent = $('.article-content');

articleObj.title = heading;
articleObj.author = authorName;
articleObj.category = 'ChatGPT';
articleObj.link = articleUrl + '#' + block.idAttribute;
articleObj.guid = articleUrl + '#' + block.idAttribute;
articleObj.description = '';
if (articleContent.length === 0) {
throw new Error('Failed to find article content.');
}

// 目前 ChatGPT Release Notes 页面并没有写入年份,所以只能靠猜
// 当前的正则表达式只支持 (月份英文+空格+日期数字) 的格式
const matchesPubDate = heading.match(/\((\w+\s+\d{1,2})\)$/);
// 实现:当年度交替时,年份减去 1
if (matchesPubDate !== null) {
const curMonth = 1 + 'Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec'.split(',').indexOf(matchesPubDate[1].slice(0, 3));
if (prevMonth !== -1 && prevMonth < curMonth) {
year--; // 年度交替:上一个月份数小于当前月份数;但排除 prevMonth==-1 的初始化情况
const feedTitle = $('h1').first().text();
const feedDesc = 'ChatGPT Release Notes';

const items = $('h1', articleContent)
.toArray()
.map((element) => {
const $h1 = $(element);
const text = $h1.text().trim();

const dateMatch = text.match(/(\w+\s+\d+[stndrh]*,\s+\d{4})/i);
let pubDate: Date | undefined;
if (dateMatch) {
const dateStr = dateMatch[1];
const parsedDate = dayjs(dateStr, ['MMMM Do, YYYY', 'MMMM D, YYYY'], 'en');
if (parsedDate.isValid()) {
pubDate = parsedDate.toDate();
}

prevMonth = curMonth;
const pubDay = dayjs(`${matchesPubDate[1]}, ${year}`, ['MMMM D, YYYY', 'MMM D, YYYY'], 'en', true);
// 从 ISO(GMT)时间的字符串(使用字符串替换的方式)替换成 US/Pacific PST 时区的表达
articleObj.pubDate = dayjs(pubDay.toISOString().replace(/\.\d{3}Z$/, '-08:00'));

const linkAnchor = pubDay.format('YYYY_MM_DD');
articleObj.guid = articleUrl + '#' + linkAnchor;
}
} else {
articleObj.description += block.text.trim() + '\n\n';
}
}

if (heading !== null) {
articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');
returns.push(articleObj);
}

return { feedTitle, feedDesc, items: returns };
const content = $h1
.nextUntil('h1')
.toArray()
.map((el) => $(el).prop('outerHTML'))
.join('');
const description = content;

return {
guid: `${articleUrl}#${pubDate ? pubDate.getTime() : text}`,
title: text,
link: articleUrl,
pubDate,
description,
};
}) as DataItem[];

return { feedTitle, feedDesc, items };
},
config.cache.routeExpire,
false
Expand Down