Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,19 +1,15 @@
  
    import { load } from 'cheerio';

    import dayjs from 'dayjs';

    import isSameOrBefore from 'dayjs/plugin/isSameOrBefore.js';

    import { config } from '@/config';

    import type { Route } from '@/types';

    import type { DataItem, Route } from '@/types';

    import cache from '@/utils/cache';

    import got from '@/utils/got';

    dayjs.extend(isSameOrBefore);

    import ofetch from '@/utils/ofetch';

    export const route: Route = {

        path: '/chatgpt/release-notes',

        categories: ['program-update'],

        example: '/openai/chatgpt/release-notes',

        parameters: {},

        features: {

            requireConfig: false,

            requirePuppeteer: false,

    @@ -23,7 +19,7 @@ export const route: Route = {
  
            supportScihub: false,

        },

        name: 'ChatGPT - Release Notes',

        maintainers: [],

        maintainers: ['xbot'],

        handler,

    };

    @@ -33,79 +29,56 @@ async function handler() {
  
        const cacheIn = await cache.tryGet(

            articleUrl,

            async () => {

                const returns = [];

                const pageResponse = await got({

                    method: 'get',

                    url: articleUrl,

                });

                const $ = load(pageResponse.data);

                const page = JSON.parse($('script#__NEXT_DATA__').text()); // 页面貌似是用 Next 渲染的，有现成的 JSON 数据可以直接 parse，而避免用 cheerio 去解析主体内容的 HTML

                const response = await ofetch(articleUrl);

                const feedTitle = page.props.pageProps.articleContent.title;

                const feedDesc = page.props.pageProps.articleContent.description;

                const $author = page.props.pageProps.articleContent.author;

                const authorName = $author.name;

                const $ = load(response);

                const articleContent = $('.article-content');

                const $blocks = page.props.pageProps.articleContent.blocks;

                const anchorDay = dayjs();

                let heading = null,

                    articleObj = {};

                let year = anchorDay.year();

                let prevMonth = -1;

                if (articleContent.length === 0) {

                    throw new Error('Failed to find article content.');

                }

                for (const block of $blocks) {

                    const text = (block.text || '').trim();

                    if (!text) {

                        continue;

                    }

                    if (block.type === 'subheading') {

                        if (heading !== null) {

                            articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');

                            returns.push(articleObj);

                            articleObj = {};

                const feedTitle = $('h1').first().text();

                const feedDesc = 'ChatGPT Release Notes';

                const items = $('h1', articleContent)

                    .toArray()

                    .map((element) => {

                        const $h1 = $(element);

                        const text = $h1.text().trim();

                        const dateMatch = text.match(/(\w+\s+\d+[stndrh]*,\s+\d{4})/i);

                        let pubDate: Date | undefined;

                        if (dateMatch) {

                            const dateStr = dateMatch[1];

                            const parsedDate = dayjs(dateStr, ['MMMM Do, YYYY', 'MMMM D, YYYY'], 'en');

                            if (parsedDate.isValid()) {

                                pubDate = parsedDate.toDate();

                            }

                        }

                        heading = text;

                        const $nextSiblings = $h1.nextUntil('h1');

                        const $firstH2 = $nextSiblings.filter('h2').first();

                        const firstH2Text = $firstH2.text().trim();

                        articleObj.title = heading;

                        articleObj.author = authorName;

                        articleObj.category = 'ChatGPT';

                        articleObj.link = articleUrl + '#' + block.idAttribute;

                        articleObj.guid = articleUrl + '#' + block.idAttribute;

                        articleObj.description = '';

                        const title = firstH2Text || text;

                        // 目前 ChatGPT Release Notes 页面并没有写入年份，所以只能靠猜

                        // 当前的正则表达式只支持 (月份英文+空格+日期数字) 的格式

                        const matchesPubDate = heading.match(/\((\w+\s+\d{1,2})\)$/);

                        // 实现：当年度交替时，年份减去 1

                        if (matchesPubDate !== null) {

                            const curMonth = 1 + 'Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec'.split(',').indexOf(matchesPubDate[1].slice(0, 3));

                            if (prevMonth !== -1 && prevMonth < curMonth) {

                                year--; // 年度交替：上一个月份数小于当前月份数；但排除 prevMonth==-1 的初始化情况

                            }

                            prevMonth = curMonth;

                            const pubDay = dayjs(`${matchesPubDate[1]}, ${year}`, ['MMMM D, YYYY', 'MMM D, YYYY'], 'en', true);

                            // 从 ISO（GMT）时间的字符串（使用字符串替换的方式）替换成 US/Pacific PST 时区的表达

                            articleObj.pubDate = dayjs(pubDay.toISOString().replace(/\.\d{3}Z$/, '-08:00'));

                            const linkAnchor = pubDay.format('YYYY_MM_DD');

                            articleObj.guid = articleUrl + '#' + linkAnchor;

                        }

                    } else {

                        articleObj.description += block.text.trim() + '\n\n';

                    }

                }

                        const content = $nextSiblings

                            .toArray()

                            .map((el) => $(el).prop('outerHTML'))

                            .join('');

                        const description = content;

                if (heading !== null) {

                    articleObj.description = articleObj.description.trim().replaceAll('\n', '<br/>');

                    returns.push(articleObj);

                }

                        return {

                            guid: `${articleUrl}#${pubDate ? pubDate.getTime() : text}`,

                            title,

                            link: articleUrl,

                            pubDate,

                            description,

                        };

                    }) as DataItem[];

                return { feedTitle, feedDesc, items: returns };

                return { feedTitle, feedDesc, items };

            },

            config.cache.routeExpire,

            false

fix(route/openai): fix chatgpt release notes extraction #20971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

xbot wants to merge 5 commits into DIYgod:master from xbot:fix/openai-chatgpt-release-notes

+44 −71

lib/routes/openai/chatgpt.ts

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix(route/openai): fix chatgpt release notes extraction #20971

Diff view

Diff view

There are no files selected for viewing

fix(route/openai): fix chatgpt release notes extraction #20971

Are you sure you want to change the base?

fix(route/openai): fix chatgpt release notes extraction #20971

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing