|
| 1 | +import type { Cheerio, CheerioAPI } from 'cheerio'; |
| 2 | +import { load } from 'cheerio'; |
| 3 | +import type { Element } from 'domhandler'; |
| 4 | +import type { Context } from 'hono'; |
| 5 | + |
| 6 | +import type { Data, DataItem, Route } from '@/types'; |
| 7 | +import { ViewType } from '@/types'; |
| 8 | +import cache from '@/utils/cache'; |
| 9 | +import ofetch from '@/utils/ofetch'; |
| 10 | +import { parseDate } from '@/utils/parse-date'; |
| 11 | + |
| 12 | +export const handler = async (ctx: Context): Promise<Data> => { |
| 13 | + const { filter } = ctx.req.param(); |
| 14 | + const limit: number = Number.parseInt(ctx.req.query('limit') ?? '10', 10); |
| 15 | + |
| 16 | + const baseUrl: string = 'https://www.costar.com'; |
| 17 | + const targetUrl: string = new URL(`products/benchmark/resources/press-releases${filter ? `?${filter}` : ''}`, baseUrl).href; |
| 18 | + |
| 19 | + const response = await ofetch(targetUrl); |
| 20 | + const $: CheerioAPI = load(response); |
| 21 | + const language = $('html').attr('lang') ?? 'en'; |
| 22 | + |
| 23 | + let items: DataItem[] = []; |
| 24 | + |
| 25 | + items = $('div.views-row article') |
| 26 | + .slice(0, limit) |
| 27 | + .toArray() |
| 28 | + .map((el): Element => { |
| 29 | + const $el: Cheerio<Element> = $(el); |
| 30 | + const $aEl: Cheerio<Element> = $el.find('a.coh-link').first(); |
| 31 | + |
| 32 | + const title: string = $aEl.text(); |
| 33 | + const description: string | undefined = $el.find('div.coh-container').eq(3).html() ?? undefined; |
| 34 | + const pubDateStr: string | undefined = $el.find('div.coh-container').eq(4).text(); |
| 35 | + const linkUrl: string | undefined = $aEl.attr('href'); |
| 36 | + const categoryEls: Element[] = $el.find('div.coh-style-tags a').toArray(); |
| 37 | + const categories: string[] = [...new Set(categoryEls.map((el) => $(el).text()).filter(Boolean))]; |
| 38 | + const upDatedStr: string | undefined = pubDateStr; |
| 39 | + |
| 40 | + const processedItem: DataItem = { |
| 41 | + title, |
| 42 | + description, |
| 43 | + pubDate: pubDateStr ? parseDate(pubDateStr) : undefined, |
| 44 | + link: linkUrl, |
| 45 | + category: categories, |
| 46 | + content: { |
| 47 | + html: description, |
| 48 | + text: description, |
| 49 | + }, |
| 50 | + updated: upDatedStr ? parseDate(upDatedStr) : undefined, |
| 51 | + language, |
| 52 | + }; |
| 53 | + |
| 54 | + return processedItem; |
| 55 | + }); |
| 56 | + |
| 57 | + items = await Promise.all( |
| 58 | + items.map((item) => { |
| 59 | + if (!item.link) { |
| 60 | + return item; |
| 61 | + } |
| 62 | + |
| 63 | + return cache.tryGet(item.link, async (): Promise<DataItem> => { |
| 64 | + const detailResponse = await ofetch(item.link); |
| 65 | + const $$: CheerioAPI = load(detailResponse); |
| 66 | + |
| 67 | + const title: string = $$('h1.coh-heading').text(); |
| 68 | + const description: string | undefined = $$('div.coh-body').html() ?? item.description; |
| 69 | + const pubDateStr: string | undefined = detailResponse.match(/"datePublished": "(.*?)",/)?.[1]; |
| 70 | + const upDatedStr: string | undefined = detailResponse.match(/"dateModified": "(.*?)",/)?.[1]; |
| 71 | + |
| 72 | + const processedItem: DataItem = { |
| 73 | + title, |
| 74 | + description, |
| 75 | + pubDate: pubDateStr ? parseDate(pubDateStr) : item.pubDate, |
| 76 | + content: { |
| 77 | + html: description, |
| 78 | + text: description, |
| 79 | + }, |
| 80 | + updated: upDatedStr ? parseDate(upDatedStr) : item.updated, |
| 81 | + language, |
| 82 | + }; |
| 83 | + |
| 84 | + return { |
| 85 | + ...item, |
| 86 | + ...processedItem, |
| 87 | + }; |
| 88 | + }); |
| 89 | + }) |
| 90 | + ); |
| 91 | + |
| 92 | + return { |
| 93 | + title: $('title').text(), |
| 94 | + description: $('meta[property="og:title"]').attr('content'), |
| 95 | + link: targetUrl, |
| 96 | + item: items, |
| 97 | + allowEmpty: true, |
| 98 | + author: $('meta[property="og:site_name"]').attr('content'), |
| 99 | + language, |
| 100 | + id: targetUrl, |
| 101 | + }; |
| 102 | +}; |
| 103 | + |
| 104 | +export const route: Route = { |
| 105 | + path: '/press-releases/:filter{.+}?', |
| 106 | + name: 'Press Releases', |
| 107 | + url: 'www.costar.com', |
| 108 | + maintainers: ['nczitzk'], |
| 109 | + handler, |
| 110 | + example: '/costar/press-releases', |
| 111 | + parameters: { |
| 112 | + filter: { |
| 113 | + description: 'Filter', |
| 114 | + }, |
| 115 | + }, |
| 116 | + description: `:::tip |
| 117 | +To subscribe to [Press Releases - Asia Pacific - Preliminary](https://www.costar.com/products/benchmark/resources/press-releases?region=406&tag=581), where the source URL is \`https://www.costar.com/products/benchmark/resources/press-releases?region=406&tag=581\`, extract the certain parts from this URL to be used as parameters, resulting in the route as [\`/costar/press-releases/region=406&tag=581\`](https://rsshub.app/costar/press-releases/region=406&tag=581). |
| 118 | +::: |
| 119 | +`, |
| 120 | + categories: ['new-media'], |
| 121 | + features: { |
| 122 | + requireConfig: false, |
| 123 | + requirePuppeteer: false, |
| 124 | + antiCrawler: false, |
| 125 | + supportRadar: true, |
| 126 | + supportBT: false, |
| 127 | + supportPodcast: false, |
| 128 | + supportScihub: false, |
| 129 | + }, |
| 130 | + radar: [ |
| 131 | + { |
| 132 | + source: ['www.costar.com'], |
| 133 | + target: (_, url) => { |
| 134 | + const filter: string = new URL(url).search?.replace(/\?/, ''); |
| 135 | + |
| 136 | + return `/costar/press-releases${filter ? `/${filter}` : ''}`; |
| 137 | + }, |
| 138 | + }, |
| 139 | + ], |
| 140 | + view: ViewType.Articles, |
| 141 | +}; |
0 commit comments