Skip to content

Commit 1a970df

Browse files
committed
feat: src was renamed packages
1 parent 386341e commit 1a970df

18 files changed

+129
-137
lines changed

jest.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module.exports = {
55
collectCoverage: true,
66
detectOpenHandles: true,
77
moduleNameMapper: {
8-
'^src/(.*)$': '<rootDir>/src/$1',
8+
'^packages/(.*)$': '<rootDir>/packages/$1',
99
'^publish/(.*)$': '<rootDir>/publish/$1'
1010
}
1111
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"flexible",
1717
"multifunction"
1818
],
19-
"main": "src/index.ts",
19+
"main": "packages/index.ts",
2020
"scripts": {
2121
"build": "rollup --config rollup.config.mjs",
2222
"build-dts": "tsc && prettier --write ./publish/src",
File renamed without changes.

src/ai/index.ts renamed to packages/ai/index.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ interface XCrawlOpenAIApp {
8181
custom(): OpenAI
8282
}
8383

84-
function createXCrawlOpenAI(
84+
export function createXCrawlOpenAI(
8585
config: CreateXCrawlOpenAIConfig = {}
8686
): XCrawlOpenAIApp {
8787
const { defaultModel, clientOptions } = config
@@ -200,5 +200,3 @@ function createXCrawlOpenAI(
200200

201201
return app
202202
}
203-
204-
export { createXCrawlOpenAI }

src/api.ts renamed to packages/crawl/api.ts

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import {
3838
CrawlHTMLDetailTargetConfig,
3939
CrawlHTMLAdvancedConfig
4040
} from './types/api'
41-
import { LogConfig, XCrawlInstanceConfig } from './types'
41+
import { LogOptions, CrawlBaseConfig } from './types'
4242
import { fingerprints } from './default'
4343

4444
/* Types */
@@ -49,7 +49,7 @@ export interface InfoCommonConfig {
4949
serialNumber: string
5050
mode: 'async' | 'sync'
5151
type: 'page' | 'html' | 'data' | 'file'
52-
logConfig: LogConfig
52+
logConfig: LogOptions
5353
intervalTime: IntervalTime | undefined
5454
}
5555

@@ -403,7 +403,7 @@ function loaderPageFingerprintToDetailTarget(
403403
}
404404

405405
function loaderCommonConfigToCrawlConfig(
406-
xCrawlInstanceConfig: XCrawlInstanceConfig,
406+
crawlBaseConfig: CrawlBaseConfig,
407407
advancedDetailTargetsConfig:
408408
| CrawlPageAdvancedDetailTargetsConfig
409409
| CrawlHTMLAdvancedDetailTargetsConfig
@@ -429,16 +429,16 @@ function loaderCommonConfigToCrawlConfig(
429429
detail
430430

431431
// 1.1.baseUrl
432-
if (xCrawlInstanceConfig.baseUrl) {
433-
detail.url = xCrawlInstanceConfig.baseUrl + url
432+
if (crawlBaseConfig.baseUrl) {
433+
detail.url = crawlBaseConfig.baseUrl + url
434434
}
435435

436436
// 1.2.timeout
437437
if (isUndefined(timeout)) {
438438
if (!isUndefined(advancedDetailTargetsConfig.timeout)) {
439439
detail.timeout = advancedDetailTargetsConfig.timeout ?? undefined
440440
} else {
441-
detail.timeout = xCrawlInstanceConfig.timeout
441+
detail.timeout = crawlBaseConfig.timeout
442442
}
443443
}
444444

@@ -447,16 +447,16 @@ function loaderCommonConfigToCrawlConfig(
447447
if (!isUndefined(advancedDetailTargetsConfig.maxRetry)) {
448448
detail.maxRetry = advancedDetailTargetsConfig.maxRetry ?? 0
449449
} else {
450-
detail.maxRetry = xCrawlInstanceConfig.maxRetry
450+
detail.maxRetry = crawlBaseConfig.maxRetry
451451
}
452452
}
453453

454454
// 1.4.proxy
455455
if (isUndefined(proxy)) {
456456
if (!isUndefined(advancedDetailTargetsConfig.proxy)) {
457457
detail.proxy = advancedDetailTargetsConfig.proxy
458-
} else if (!isUndefined(xCrawlInstanceConfig.proxy)) {
459-
detail.proxy = xCrawlInstanceConfig.proxy
458+
} else if (!isUndefined(crawlBaseConfig.proxy)) {
459+
detail.proxy = crawlBaseConfig.proxy
460460
}
461461
}
462462

@@ -503,9 +503,9 @@ function loaderCommonConfigToCrawlConfig(
503503
} else if (
504504
isUndefined(fingerprint) &&
505505
!isArray(advancedDetailTargetsConfig.fingerprints) &&
506-
xCrawlInstanceConfig.enableRandomFingerprint
506+
crawlBaseConfig.enableRandomFingerprint
507507
) {
508-
// xCrawlInstanceConfig
508+
// crawlBaseConfig
509509
const fingerprint = fingerprints[random(fingerprints.length)]
510510

511511
loaderCommonFingerprintToDetailTarget(detail, fingerprint)
@@ -519,9 +519,9 @@ function loaderCommonConfigToCrawlConfig(
519519
crawlConfig.intervalTime = advancedDetailTargetsConfig.intervalTime
520520
if (
521521
isUndefined(advancedDetailTargetsConfig.intervalTime) &&
522-
!isUndefined(xCrawlInstanceConfig.intervalTime)
522+
!isUndefined(crawlBaseConfig.intervalTime)
523523
) {
524-
crawlConfig.intervalTime = xCrawlInstanceConfig.intervalTime
524+
crawlConfig.intervalTime = crawlBaseConfig.intervalTime
525525
}
526526

527527
// 3.onCrawlItemComplete
@@ -540,7 +540,7 @@ function loaderCommonConfigToCrawlConfig(
540540
*/
541541

542542
function createCrawlPageConfig(
543-
xCrawlInstanceConfig: XCrawlInstanceConfig,
543+
crawlBaseConfig: CrawlBaseConfig,
544544
originalConfig: UniteCrawlPageConfig
545545
): CrawlPageConfig {
546546
const crawlPageConfig: CrawlPageConfig = {
@@ -577,7 +577,7 @@ function createCrawlPageConfig(
577577

578578
// 装载公共配置
579579
loaderCommonConfigToCrawlConfig(
580-
xCrawlInstanceConfig,
580+
crawlBaseConfig,
581581
advancedDetailTargetsConfig,
582582
crawlPageConfig
583583
)
@@ -618,7 +618,7 @@ function createCrawlPageConfig(
618618
}
619619

620620
function createCrawlHTMLConfig(
621-
xCrawlInstanceConfig: XCrawlInstanceConfig,
621+
crawlBaseConfig: CrawlBaseConfig,
622622
originalConfig: UniteCrawlHTMLConfig
623623
): CrawlHTMLConfig {
624624
const crawlHTMLConfig: CrawlHTMLConfig = {
@@ -658,7 +658,7 @@ function createCrawlHTMLConfig(
658658
}
659659

660660
loaderCommonConfigToCrawlConfig(
661-
xCrawlInstanceConfig,
661+
crawlBaseConfig,
662662
advancedDetailTargetsConfig,
663663
crawlHTMLConfig
664664
)
@@ -667,7 +667,7 @@ function createCrawlHTMLConfig(
667667
}
668668

669669
function createCrawlDataConfig<T>(
670-
xCrawlInstanceConfig: XCrawlInstanceConfig,
670+
crawlBaseConfig: CrawlBaseConfig,
671671
originalConfig: UniteCrawlDataConfig<T>
672672
): CrawlDataConfig {
673673
const crawlDataConfig: CrawlDataConfig = {
@@ -704,7 +704,7 @@ function createCrawlDataConfig<T>(
704704
}
705705

706706
loaderCommonConfigToCrawlConfig(
707-
xCrawlInstanceConfig,
707+
crawlBaseConfig,
708708
advancedDetailTargetsConfig,
709709
crawlDataConfig
710710
)
@@ -713,7 +713,7 @@ function createCrawlDataConfig<T>(
713713
}
714714

715715
function createCrawlFileConfig(
716-
xCrawlInstanceConfig: XCrawlInstanceConfig,
716+
crawlBaseConfig: CrawlBaseConfig,
717717
originalConfig: UniteCrawlFileConfig
718718
): CrawlFileConfig {
719719
const crawlFileConfig: CrawlFileConfig = {
@@ -748,7 +748,7 @@ function createCrawlFileConfig(
748748
}
749749

750750
loaderCommonConfigToCrawlConfig(
751-
xCrawlInstanceConfig,
751+
crawlBaseConfig,
752752
advancedDetailTargetsConfig,
753753
crawlFileConfig
754754
)
@@ -1074,13 +1074,13 @@ function fileSingleResultHandle(
10741074

10751075
/* Create crawl API */
10761076

1077-
export function createCrawlPage(xCrawlInstanceConfig: XCrawlInstanceConfig) {
1077+
export function createCrawlPage(crawlBaseConfig: CrawlBaseConfig) {
10781078
const {
10791079
id: xId,
10801080
mode,
1081-
logConfig,
1082-
crawlPage: crawlPageConfig
1083-
} = xCrawlInstanceConfig
1081+
logOptions: logConfig,
1082+
crawlPage: crawlPageOptions
1083+
} = crawlBaseConfig
10841084

10851085
let id = 0
10861086
let browser: Browser | null = null
@@ -1109,7 +1109,7 @@ export function createCrawlPage(xCrawlInstanceConfig: XCrawlInstanceConfig) {
11091109
if (!haveCreateBrowser) {
11101110
haveCreateBrowser = true
11111111
createBrowserPending = puppeteer
1112-
.launch(crawlPageConfig?.puppeteerLaunch)
1112+
.launch(crawlPageOptions?.puppeteerLaunchOptions)
11131113
.then((result) => {
11141114
browser = result
11151115
})
@@ -1124,7 +1124,7 @@ export function createCrawlPage(xCrawlInstanceConfig: XCrawlInstanceConfig) {
11241124

11251125
// 创建新配置
11261126
const { detailTargets, intervalTime, onCrawlItemComplete } =
1127-
createCrawlPageConfig(xCrawlInstanceConfig, config)
1127+
createCrawlPageConfig(crawlBaseConfig, config)
11281128

11291129
const infoConfig: InfoPageConfig = {
11301130
serialNumber: `${xId}-${type}-${++id}`,
@@ -1154,8 +1154,8 @@ export function createCrawlPage(xCrawlInstanceConfig: XCrawlInstanceConfig) {
11541154
return crawlPage
11551155
}
11561156

1157-
export function createCrawlHTML(xCrawlInstanceConfig: XCrawlInstanceConfig) {
1158-
const { id: xId, mode, logConfig } = xCrawlInstanceConfig
1157+
export function createCrawlHTML(crawlBaseConfig: CrawlBaseConfig) {
1158+
const { id: xId, mode, logOptions: logConfig } = crawlBaseConfig
11591159
let id = 0
11601160
const type = 'html'
11611161

@@ -1177,7 +1177,7 @@ export function createCrawlHTML(xCrawlInstanceConfig: XCrawlInstanceConfig) {
11771177
config: UniteCrawlHTMLConfig
11781178
): Promise<CrawlHTMLSingleResult | CrawlHTMLSingleResult[]> {
11791179
const { detailTargets, intervalTime, onCrawlItemComplete } =
1180-
createCrawlHTMLConfig(xCrawlInstanceConfig, config)
1180+
createCrawlHTMLConfig(crawlBaseConfig, config)
11811181

11821182
const infoConfig: InfoHTMLConfig = {
11831183
serialNumber: `${xId}-${type}-${++id}`,
@@ -1206,8 +1206,8 @@ export function createCrawlHTML(xCrawlInstanceConfig: XCrawlInstanceConfig) {
12061206
return crawlHTML
12071207
}
12081208

1209-
export function createCrawlData(xCrawlInstanceConfig: XCrawlInstanceConfig) {
1210-
const { id: xId, mode, logConfig } = xCrawlInstanceConfig
1209+
export function createCrawlData(crawlBaseConfig: CrawlBaseConfig) {
1210+
const { id: xId, mode, logOptions: logConfig } = crawlBaseConfig
12111211
let id = 0
12121212
const type = 'data'
12131213

@@ -1229,7 +1229,7 @@ export function createCrawlData(xCrawlInstanceConfig: XCrawlInstanceConfig) {
12291229
config: UniteCrawlDataConfig<T>
12301230
): Promise<CrawlDataSingleResult<T> | CrawlDataSingleResult<T>[]> {
12311231
const { detailTargets, intervalTime, onCrawlItemComplete } =
1232-
createCrawlDataConfig(xCrawlInstanceConfig, config)
1232+
createCrawlDataConfig(crawlBaseConfig, config)
12331233

12341234
const infoConfig: InfoDataConfig<T> = {
12351235
serialNumber: `${xId}-${type}-${++id}`,
@@ -1258,8 +1258,8 @@ export function createCrawlData(xCrawlInstanceConfig: XCrawlInstanceConfig) {
12581258
return crawlData
12591259
}
12601260

1261-
export function createCrawlFile(xCrawlInstanceConfig: XCrawlInstanceConfig) {
1262-
const { id: xId, mode, logConfig } = xCrawlInstanceConfig
1261+
export function createCrawlFile(crawlBaseConfig: CrawlBaseConfig) {
1262+
const { id: xId, mode, logOptions: logConfig } = crawlBaseConfig
12631263
let id = 0
12641264
const type = 'file'
12651265

@@ -1283,7 +1283,7 @@ export function createCrawlFile(xCrawlInstanceConfig: XCrawlInstanceConfig) {
12831283
intervalTime,
12841284
onBeforeSaveItemFile,
12851285
onCrawlItemComplete
1286-
} = createCrawlFileConfig(xCrawlInstanceConfig, config)
1286+
} = createCrawlFileConfig(crawlBaseConfig, config)
12871287

12881288
const infoConfig: InfoFileConfig = {
12891289
serialNumber: `${xId}-${type}-${++id}`,
File renamed without changes.
File renamed without changes.
File renamed without changes.

packages/crawl/index.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import {
2+
createCrawlData,
3+
createCrawlFile,
4+
createCrawlHTML,
5+
createCrawlPage
6+
} from './api'
7+
8+
import { CreateCrawlConfig, CrawlApp, CrawlBaseConfig } from './types'
9+
import { isBoolean, isObject } from './utils'
10+
11+
let id = 0
12+
13+
function createInstanceConfig(config: CreateCrawlConfig): CrawlBaseConfig {
14+
const {
15+
mode,
16+
enableRandomFingerprint,
17+
baseUrl,
18+
intervalTime,
19+
log,
20+
crawlPage,
21+
timeout,
22+
proxy,
23+
maxRetry
24+
} = config
25+
26+
const crawlBaseConfig: CrawlBaseConfig = {
27+
id: ++id,
28+
29+
mode: mode ?? 'async',
30+
enableRandomFingerprint: enableRandomFingerprint ?? false,
31+
timeout: timeout ?? 10000,
32+
maxRetry: maxRetry ?? 0,
33+
logOptions: { start: true, process: true, result: true },
34+
35+
baseUrl,
36+
intervalTime,
37+
proxy,
38+
crawlPage
39+
}
40+
41+
// logOptions
42+
if (isObject(log)) {
43+
crawlBaseConfig.logOptions = {
44+
...crawlBaseConfig.logOptions,
45+
...log
46+
}
47+
} else if (isBoolean(log) && !log) {
48+
const keys = Object.keys(crawlBaseConfig.logOptions) as [
49+
'start',
50+
'process',
51+
'result'
52+
]
53+
54+
keys.forEach((key) => (crawlBaseConfig.logOptions[key] = false))
55+
}
56+
57+
return crawlBaseConfig
58+
}
59+
60+
function createnApp(crawlBaseConfig: CrawlBaseConfig): CrawlApp {
61+
const app: CrawlApp = {
62+
crawlPage: createCrawlPage(crawlBaseConfig),
63+
crawlHTML: createCrawlHTML(crawlBaseConfig),
64+
crawlData: createCrawlData(crawlBaseConfig),
65+
crawlFile: createCrawlFile(crawlBaseConfig)
66+
}
67+
68+
return app
69+
}
70+
71+
export function createCrawl(config: CreateCrawlConfig = {}): CrawlApp {
72+
const crawlBaseConfig = createInstanceConfig(config)
73+
74+
const app = createnApp(crawlBaseConfig)
75+
76+
return app
77+
}
File renamed without changes.

0 commit comments

Comments
 (0)