|
30 | 30 |
|
31 | 31 | - English document: https://github.com/coder-hxl/x-crawl/blob/v9.0.0/README.md
|
32 | 32 | - Chinese document: https://github.com/coder-hxl/x-crawl/blob/v9.0.0/docs/cn.md
|
| 33 | + |
| 34 | +## Example |
| 35 | + |
| 36 | +The combination of crawler and AI allows the crawler and AI to obtain pictures of high-rated vacation rentals according to our instructions: |
| 37 | + |
| 38 | +```js |
| 39 | +import { createCrawl, createCrawlOpenAI } from 'x-crawl' |
| 40 | + |
| 41 | +//Create a crawler application |
| 42 | +const crawlApp = createCrawl({ |
| 43 | + maxRetry: 3, |
| 44 | + intervalTime: { max: 2000, min: 1000 } |
| 45 | +}) |
| 46 | + |
| 47 | +//Create AI application |
| 48 | +const crawlOpenAIApp = createCrawlOpenAI({ |
| 49 | + clientOptions: { apiKey: process.env['OPENAI_API_KEY'] }, |
| 50 | + defaultModel: { chatModel: 'gpt-4-turbo-preview' } |
| 51 | +}) |
| 52 | + |
| 53 | +// crawlPage is used to crawl pages |
| 54 | +crawlApp.crawlPage('https://www.airbnb.cn/s/select_homes').then(async (res) => { |
| 55 | + const { page, browser } = res.data |
| 56 | + |
| 57 | + // Wait for the element to appear on the page and get the HTML |
| 58 | + const targetSelector = '[data-tracking-id="TOP_REVIEWED_LISTINGS"]' |
| 59 | + await page.waitForSelector(targetSelector) |
| 60 | + const highlyHTML = await page.$eval(targetSelector, (el) => el.innerHTML) |
| 61 | + |
| 62 | + // Let AI obtain the url of img and remove duplicates |
| 63 | + const srcResult = await crawlOpenAIApp.parseElements( |
| 64 | + highlyHTML, |
| 65 | + 'Get the url of img and remove duplicates' |
| 66 | + ) |
| 67 | + |
| 68 | + browser.close() |
| 69 | + |
| 70 | + // crawlFile is used to crawl file resources |
| 71 | + crawlApp.crawlFile({ |
| 72 | + targets: srcResult.elements.map((item) => item.src), |
| 73 | + storeDirs: './upload' |
| 74 | + }) |
| 75 | +}) |
| 76 | +``` |
| 77 | + |
| 78 | +Pictures of highly rated vacation rentals climbed to: |
| 79 | + |
| 80 | + |
| 81 | + |
| 82 | +**warning**: x-crawl is for legal use only. Any illegal activity using this tool is prohibited. Please be sure to comply with the robots.txt file regulations of the target website. This example is only used to demonstrate the use of x-crawl and is not targeted at a specific website. |
0 commit comments