Skip to content

Commit de48ec5

Browse files
authored
fix(route): CCDI.gov.cn route broken (#10879)
* fix ccdi site * fix doc * upgrade rate limit countermeasure and sort the router alphabetically * set anticrawler flag in docs
1 parent e48187f commit de48ec5

File tree

8 files changed

+113
-56
lines changed

8 files changed

+113
-56
lines changed

docs/government.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,16 @@ pageClass: routes
13371337

13381338
## 中央纪委国家监委
13391339

1340-
### 审查调查
1340+
### 要闻
13411341

1342-
<Route author="LogicJake" example="/ccdi/scdc" path="/ccdi/scdc"/>
1342+
<Route author="bigfei" example="/gov/ccdi/yaowenn" path="/gov/ccdi/:path+" :paramsDesc="['路径,默认为 要闻']" anticrawler="1">
1343+
1344+
::: tip 提示
1345+
1346+
路径处填写对应页面 URL 中 `http://www.ccdi.gov.cn/` 后的字段。下面是一个例子。
1347+
1348+
若订阅 [审查调查 > 中管干部 > 执纪审查](https://www.ccdi.gov.cn/scdcn/zggb/zjsc/) 则将对应页面 URL <https://www.ccdi.gov.cn/scdcn/zggb/zjsc/>`http://www.ccdi.gov.cn/` 后的字段 `scdcn/zggb/zjsc` 作为路径填入。此时路由为 [`/gov/ccdi/scdcn/zggb/zjsc`](https://rsshub.app/gov/ccdi/scdcn/zggb/zjsc)
1349+
1350+
:::
1351+
1352+
</Route>

lib/router.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,9 +1801,6 @@ router.get('/cninfo/announcement/:column/:code/:orgId/:category?/:search?', lazy
18011801
// 金十数据
18021802
router.get('/jinshi/index', lazyloadRouteHandler('./routes/jinshi/index'));
18031803

1804-
// 中央纪委国家监委网站
1805-
router.get('/ccdi/scdc', lazyloadRouteHandler('./routes/ccdi/scdc'));
1806-
18071804
// 中华人民共和国农业农村部
18081805
router.get('/gov/moa/sjzxfb', lazyloadRouteHandler('./routes/gov/moa/sjzxfb'));
18091806
router.get('/gov/moa/:suburl(.*)', lazyloadRouteHandler('./routes/gov/moa/moa'));

lib/routes/ccdi/scdc.js

Lines changed: 0 additions & 51 deletions
This file was deleted.

lib/v2/gov/ccdi/index.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* eslint-disable no-await-in-loop */
2+
const { rootUrl, parseNewsList, parseArticle } = require('./utils');
3+
4+
const getRandomInt = (min, max) => {
5+
min = Math.ceil(min);
6+
max = Math.floor(max);
7+
return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
8+
};
9+
10+
module.exports = async (ctx) => {
11+
const defaultPath = '/yaowenn/';
12+
13+
let pathname = ctx.path.replace(/(^\/ccdi|\/$)/g, '');
14+
pathname = pathname === '' ? defaultPath : pathname.endsWith('/') ? pathname : pathname + '/';
15+
const currentUrl = `${rootUrl}${pathname}`;
16+
17+
const { list, title } = await parseNewsList(currentUrl, '.list_news_dl li', ctx);
18+
const items = [];
19+
20+
for (const item of list) {
21+
items.push(await parseArticle(item, ctx));
22+
// sleep randomly for anti rate limit on ccdi site
23+
await new Promise((r) => setTimeout(r, getRandomInt(1000, 2500)));
24+
}
25+
26+
ctx.state.data = {
27+
title,
28+
link: currentUrl,
29+
item: items,
30+
};
31+
};

lib/v2/gov/ccdi/utils.js

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
const cheerio = require('cheerio');
2+
const { parseDate } = require('@/utils/parse-date');
3+
const got = require('@/utils/got');
4+
const timezone = require('@/utils/timezone');
5+
6+
const { CookieJar } = require('tough-cookie');
7+
const cookieJar = new CookieJar();
8+
9+
const owner = '中央纪委国家监委网站';
10+
const rootUrl = 'https://www.ccdi.gov.cn';
11+
12+
const parseNewsList = async (url, selector, ctx) => {
13+
const response = await got(url, { cookieJar });
14+
const $ = cheerio.load(response.data);
15+
const list = $(selector)
16+
.slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 8)
17+
.toArray()
18+
.map((item) => {
19+
item = $(item);
20+
return {
21+
title: item.find('a').first().text().trim(),
22+
link: new URL(item.find('a').first().attr('href'), url).href,
23+
pubDate: parseDate(item.find('span').text(), 'YYYY-MM-DD'),
24+
};
25+
});
26+
const title = $('.other_Location')
27+
.text()
28+
.replace(/(.+)/, owner);
29+
return { list, title };
30+
};
31+
32+
const parseArticle = async (item, ctx) =>
33+
await ctx.cache.tryGet(item.link, async () => {
34+
const response = await got(item.link, { cookieJar });
35+
const $ = cheerio.load(response.data);
36+
37+
const title = $('.daty').text().trim();
38+
item.author = title.match(/(.*)/s)?.[1].trim() ?? owner;
39+
item.pubDate = timezone(parseDate(title.match(/(.*)/s)?.[1].trim() ?? item.pubDate), +8);
40+
41+
// Change the img src from relative to absolute for a better compatibility
42+
$('.content')
43+
.find('img')
44+
.each((_, el) => {
45+
$(el).attr('src', new URL($(el).attr('src'), item.link).href);
46+
// oldsrc is causing freshrss imageproxy not to work correctly
47+
$(el).removeAttr('oldsrc').removeAttr('alt');
48+
});
49+
item.description = $('.content').html();
50+
return item;
51+
});
52+
53+
module.exports = {
54+
rootUrl,
55+
parseNewsList,
56+
parseArticle,
57+
};

lib/v2/gov/maintainer.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module.exports = {
22
// ministry
3+
'/ccdi/:path+': ['bigfei'],
34
'/cmse/xwzx/zhxw': ['nczitzk'],
45
'/cmse/xwzx/yzjz': ['nczitzk'],
56
'/cmse/gfgg': ['nczitzk'],

lib/v2/gov/radar.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,17 @@ module.exports = {
297297
},
298298
],
299299
},
300+
'ccdi.gov.cn': {
301+
_name: '中央纪委国家监委',
302+
www: [
303+
{
304+
title: '要闻',
305+
docs: 'https://docs.rsshub.app/government.html#zhong-yang-ji-wei-guo-jia-jian-wei-yao-wen',
306+
source: ['/*'],
307+
target: (params, url) => `/gov/ccdi/${new URL(url).href.match(/ccdi\.gov\.cn\/(.*)/)[1]}`,
308+
},
309+
],
310+
},
300311
'cmse.gov.cn': {
301312
_name: '中国载人航天',
302313
www: [

lib/v2/gov/router.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module.exports = function (router) {
22
// ministry
3+
router.get(/ccdi(\/[\w/-]+)?/, require('./ccdi'));
34
router.get('/cmse/fxrw', require('./cmse/fxrw'));
45
router.get(/cmse(\/[\w/-]+)?/, require('./cmse'));
56
router.get(/cnnic(\/[\w/-]+)?/, require('./cnnic'));

0 commit comments

Comments
 (0)