1- import { Page } from 'puppeteer-core' ;
1+ import { Page , ElementHandle } from 'puppeteer-core' ;
22import { OrganicResult , Result , SearchOptions } from '../define' ;
33import { getRedirectUrl } from '../common/get-redirect-url' ;
44import { EventType , LogContext } from '../logs' ;
55import { resolveUrlAndContent } from '../common/get-page-content' ;
66
7+ async function findElement ( selectorList : string [ ] , el : ElementHandle < Element > ) : Promise < ElementHandle | null > {
8+ for ( const selector of selectorList ) {
9+ const element = await el . $ ( selector ) ;
10+ if ( element ) {
11+ return element ;
12+ }
13+ }
14+ return null ;
15+ }
16+
717async function extractOrganicResults ( page : Page ) : Promise < OrganicResult [ ] > {
818 const resultList = await page . $$ ( '.result.c-container.new-pmd' ) ;
919
@@ -13,7 +23,12 @@ async function extractOrganicResults(page: Page): Promise<OrganicResult[]> {
1323 for ( const el of resultList ) {
1424 pos ++ ;
1525 let currentPosition = pos ;
16- const title = await el . $ ( '.c-title' ) ;
26+
27+ // 提取标题
28+ // 百度可能会调整结构,这里是可能的几个选择器
29+ const titleSelectorList = [ '.c-title' , 'a[data-module="title"]' , 'h3[class^="struct-title_"]' , 'div[class^="title-box_"]' , 'div[class^="title-wrapper_"]' ] ;
30+
31+ const title = await findElement ( titleSelectorList , el ) ;
1732 if ( ! title ) {
1833 continue ;
1934 }
@@ -23,13 +38,19 @@ async function extractOrganicResults(page: Page): Promise<OrganicResult[]> {
2338 }
2439
2540 // const site = await el.$('div[class^="source_"] a[class^="siteLink_"]');
26- const site = await el . $ ( 'a[class^="siteLink_"]' ) ;
41+ // 提取站点名称
42+ const siteSelectorList = [ 'a[class^="siteLink_"]' , '.cosc-source-link' , '.cosc-source-text' ] ;
43+ const site = await findElement ( siteSelectorList , el ) ;
2744 const siteContent = site ? await site . evaluate ( ( el ) => el . textContent ) : '' ;
2845
29- const link = ( await el . $ ( '.c-title a' ) ) ! ;
46+ // 结果对应的跳转链接
47+ const titleIsLink = await title . evaluate ( ( el ) => el . tagName . toLowerCase ( ) === 'a' ) ;
48+ const link = titleIsLink ? ( title as ElementHandle < HTMLAnchorElement > ) : ( await title . $ ( 'a' ) ) ! ;
3049 const linkContent = await link . evaluate ( ( el ) => el . href ) ;
3150
32- const content = ( await el . $ ( 'span[class^="content-right"]' ) ) ! ;
51+ // 页面展示的内容
52+ const contentSelectorList = [ 'span[class^="content-right"]' , 'div[data-module="abstract"]' , 'span[class^="summary-text_"]' ] ;
53+ const content = ( await findElement ( contentSelectorList , el ) ) ! ;
3354 const contentContent = content ? await content . evaluate ( ( el ) => el . textContent ) : '' ;
3455
3556 ret . push ( {
0 commit comments