Skip to content

Commit db5aba9

Browse files
committed
feat(docs): add English documents
1 parent b33b8a3 commit db5aba9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2585
-127
lines changed

docs/.vitepress/config.ts

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ export default defineConfig({
2020

2121
editLink: {
2222
pattern: 'https://github.com/coder-hxl/x-crawl/edit/main/docs/:path',
23-
text: '为此页提供修改建议'
23+
text: 'Suggest changes to this page'
2424
},
2525

2626
socialLinks: [
@@ -191,6 +191,170 @@ export default defineConfig({
191191
}
192192
},
193193

194-
en: { label: 'English', lang: 'en', link: '/en' }
194+
en: {
195+
label: 'English',
196+
lang: 'en',
197+
link: '/en/',
198+
199+
themeConfig: {
200+
nav: [
201+
{ text: 'Guide', link: '/en/guide/index' },
202+
{ text: 'API', link: '/en/api/index' },
203+
{ text: 'Type', link: '/en/type/index' },
204+
{
205+
text: 'About',
206+
items: [
207+
{ text: 'FAQ', link: '/en/about/faq' },
208+
{ text: 'Community', link: '/en/about/community' },
209+
{ text: 'Releases', link: '/en/about/releases' },
210+
{ text: 'Old documents', link: '/en/about/old-docs' },
211+
{ text: 'Issues', link: '/en/about/issues' },
212+
{ text: 'Announcements', link: '/en/about/announcements' }
213+
]
214+
}
215+
],
216+
217+
search: {
218+
provider: 'local'
219+
},
220+
221+
sidebar: {
222+
'/en/guide': [
223+
{
224+
text: 'Getting Started',
225+
items: [
226+
{ text: 'Introduction', link: '/en/guide/index' },
227+
{ text: 'Quick Start', link: '/en/guide/quick-start' }
228+
]
229+
},
230+
{
231+
text: 'Essentials',
232+
items: [
233+
{
234+
text: 'Create Application',
235+
link: '/en/guide/create-crawl-application'
236+
},
237+
{ text: 'Crawl Page', link: '/en/guide/crawl-page' },
238+
{ text: 'Crawl HTML', link: '/en/guide/crawl-html' },
239+
{ text: 'Crawl Data', link: '/en/guide/crawl-data' },
240+
{ text: 'Crawl File', link: '/en/guide/crawl-file' },
241+
{ text: 'Interval Time', link: '/en/guide/interval' },
242+
{ text: 'Retry', link: '/en/guide/retry' },
243+
{ text: 'Proxy', link: '/en/guide/proxy' },
244+
{ text: 'Priority Crawl', link: '/en/guide/priority' },
245+
{ text: 'Terminal Prompt', link: '/en/guide/reporters' },
246+
{ text: 'About the Results', link: '/en/guide/results' },
247+
{ text: 'TypeScript', link: '/en/guide/en/typescript' }
248+
]
249+
},
250+
{
251+
text: 'AI Assisted',
252+
items: [
253+
{
254+
text: 'Creating AI applications',
255+
link: '/en/guide/create-ai-application'
256+
},
257+
{
258+
text: 'Intelligent on-demand analysis elements',
259+
link: '/en/guide/parse-elements'
260+
},
261+
{
262+
text: 'Smartly generated element selectors',
263+
link: '/en/guide/get-element-selectors'
264+
},
265+
{
266+
text: 'Intelligent reply to crawler questions',
267+
link: '/en/guide/crawl-openai-help'
268+
},
269+
{
270+
text: 'User-defined AI functions',
271+
link: '/en/guide/crawl-openai-custom'
272+
}
273+
]
274+
},
275+
{
276+
text: 'Advance',
277+
items: [
278+
{ text: 'Crawl mode', link: '/en/guide/crawl-mode' },
279+
{
280+
text: 'device fingerprinting',
281+
link: '/en/guide/fingerprint'
282+
},
283+
{ text: 'configuration', link: '/en/guide/config' }
284+
]
285+
}
286+
],
287+
288+
'/en/api': [
289+
{
290+
text: 'crawler',
291+
items: [
292+
{ text: 'createCrawl', link: '/en/api/index' },
293+
{ text: 'crawlPage', link: '/en/api/crawl-page' },
294+
{ text: 'crawlHTML', link: '/en/api/crawl-html' },
295+
{ text: 'crawlData', link: '/en/api/crawl-data' },
296+
{ text: 'crawlFile', link: '/en/api/crawl-file' }
297+
]
298+
},
299+
{
300+
text: 'AI',
301+
items: [
302+
{
303+
text: 'createCrawlOpenAI',
304+
link: '/en/api/create-crawl-openai'
305+
},
306+
{ text: 'parseElements', link: '/en/api/parse-elements' },
307+
{
308+
text: 'getElementSelectors',
309+
link: '/en/api/get-element-selectors'
310+
},
311+
{ text: 'help', link: '/en/api/help' },
312+
{ text: 'custom', link: '/en/api/custom' }
313+
]
314+
}
315+
],
316+
317+
'/en/type': [
318+
{
319+
text: 'crawler',
320+
items: [
321+
{ text: 'createCrawl', link: '/en/type/index' },
322+
{ text: 'crawlPage', link: '/en/type/crawl-page' },
323+
{ text: 'crawlHTML', link: '/en/type/crawl-html' },
324+
{ text: 'crawlData', link: '/en/type/crawl-data' },
325+
{ text: 'crawlFile', link: '/en/type/crawl-file' },
326+
{
327+
text: 'CrawlOtherConfig',
328+
link: '/en/type/crawl-other-config'
329+
}
330+
]
331+
},
332+
{
333+
text: 'AI',
334+
items: [
335+
{
336+
text: 'createCrawlOpenAI',
337+
link: '/en/type/create-crawl-openai'
338+
},
339+
{ text: 'parseElements', link: '/en/type/parse-elements' },
340+
{
341+
text: 'getElementSelectors',
342+
link: '/en/type/get-element-selectors'
343+
},
344+
{
345+
text: 'CrawlOpenaiOtherConfig',
346+
link: '/en/type/crawl-openai-other-config'
347+
}
348+
]
349+
}
350+
]
351+
},
352+
353+
footer: {
354+
message: 'Released under the MIT license',
355+
copyright: 'Copyright © 2024-present CoderHXL. All rights reserved'
356+
}
357+
}
358+
}
195359
}
196360
})

docs/en/about/announcements.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Precautions
2+
3+
- x-crawl is for legal purposes only. It is prohibited to use this tool to conduct any illegal activities, including but not limited to unauthorized data collection, network attacks, privacy violations, etc.
4+
- Before collecting data, make sure you have explicit authorization from the target website and comply with its robots.txt file and terms of use.
5+
- Avoid placing excessive access pressure on the target website to avoid triggering its anti-crawling strategy or causing server downtime.

docs/en/about/community.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Community
2+
3+
- **Discord Chat:** Ask and discuss with other x-crawl users in real time via [Discord](https://discord.gg/SF7aaebg4E) (keep up to date on x-crawl news in advance).
4+
- **GitHub Discussions:** Use [GitHub Discussions](https://github.com/coder-hxl/x-crawl/discussions) for message board-style questions and discussions.
5+
6+
Questions and discussions related to any illegal activity may not be submitted. x-crawl is for legal purposes only, and it is prohibited to use this tool to conduct any illegal activities, including but not limited to unauthorized data collection, network attacks, privacy violations, etc. Please ensure that your usage behavior always complies with laws, regulations and ethical standards, and jointly maintain a safe and legal network environment.

docs/en/about/faq.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# common problem
2+
3+
## The relationship between crawlPage API and puppeteer
4+
5+
The crawlPage API has built-in [puppeteer](https://github.com/puppeteer/puppeteer). You only need to pass in some configuration options to let x-crawl help you simplify the operation and get intact Brower instances and Pages. instance, x-crawl does not override it.
6+
7+
## Using crawlPage API causes the program to crash
8+
9+
If you need to crawl many pages in one crawlPage, it is recommended that after crawling each page, use [onCrawlItemComplete life cycle function] (#onCrawlItemComplete) to process the results of each target and close the page instance. If no shutdown operation is performed, then The program may crash due to too many pages being opened (related to the performance of the device itself).
10+
11+
```js{11,12,13,14,15,16,,17,,18,35,36,37,38,39,40}
12+
import { createCrawl } from 'x-crawl'
13+
14+
const crawlApp = createCrawl()
15+
16+
// Recommendations with few crawling targets
17+
crawlApp
18+
.crawlPage([
19+
'https://www.example.com/page-1',
20+
'https://www.example.com/page-2'
21+
])
22+
.then((results) => {
23+
for (const itemResult of results) {
24+
const { page } = itemResult.data
25+
26+
//Close if no longer used
27+
page.close()
28+
}
29+
})
30+
31+
// Crawling recommendations with many targets
32+
//onCrawlItemComplete through advanced configuration
33+
crawlApp.crawlPage({
34+
targets: [
35+
'https://www.example.com/page-1',
36+
'https://www.example.com/page-2',
37+
'https://www.example.com/page-3',
38+
'https://www.example.com/page-4',
39+
'https://www.example.com/page-5',
40+
'https://www.example.com/page-6',
41+
'https://www.example.com/page-7',
42+
'https://www.example.com/page-8',
43+
'https://www.example.com/page-9',
44+
'https://www.example.com/page-10'
45+
],
46+
onCrawlItemComplete(crawlPageSingleResult) {
47+
const { page } = crawlPageSingleResult.data
48+
49+
//Close if no longer used
50+
page.close()
51+
}
52+
})
53+
```

docs/en/about/issues.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#Issues
2+
3+
If you have **questions, requirements, and good suggestions**, you can raise **Issues** in [GitHub Issues](https://github.com/coder-hxl/x-crawl/issues).

docs/en/about/old-docs.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Old version documentation
2+
3+
v9.0.0 version: https://github.com/coder-hxl/x-crawl/blob/v9.0.0/docs/cn.md

docs/en/about/releases.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Version release
2+
3+
The complete past release record can be viewed at [GitHub](https://github.com/coder-hxl/x-crawl/blob/main/CHANGELOG.md).

0 commit comments

Comments
 (0)