Skip to content

Commit 6a1869d

Browse files
committed
fixes
1 parent 4970c0a commit 6a1869d

File tree

4 files changed

+94
-93
lines changed

4 files changed

+94
-93
lines changed

js/botasaurus-js/package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

js/botasaurus-js/package.json

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,88 @@
11
{
22
"name": "botasaurus",
3-
"version": "4.0.120",
3+
"version": "4.0.122",
44
"description": "controls adder for botasaurus.",
55
"main": "dist/index.js",
66
"types": "dist/index.d.ts",
77
"exports": {
88
".": "./dist/index.js",
9-
"./string-utils": "./dist/string-utils.js",
10-
"./task": "./dist/task.js",
11-
"./beep-utils": "./dist/beep-utils.js",
12-
"./env": "./dist/env.js",
13-
"./paths": "./dist/paths.js",
14-
"./increment": "./dist/increment.js",
15-
"./formats": "./dist/formats.js",
169
"./output": "./dist/output.js",
10+
"./list-utils": "./dist/list-utils.js",
11+
"./botasaurus-storage": "./dist/botasaurus-storage.js",
12+
"./decorators-common": "./dist/decorators-common.js",
1713
"./on-close": "./dist/on-close.js",
14+
"./dontcache": "./dist/dontcache.js",
15+
"./formats": "./dist/formats.js",
1816
"./utils": "./dist/utils.js",
19-
"./cache": "./dist/cache.js",
20-
"./page": "./dist/page.js",
21-
"./botasaurus-storage": "./dist/botasaurus-storage.js",
17+
"./beep-utils": "./dist/beep-utils.js",
2218
"./decorators-utils": "./dist/decorators-utils.js",
23-
"./decorators-common": "./dist/decorators-common.js",
19+
"./page": "./dist/page.js",
20+
"./cache": "./dist/cache.js",
21+
"./increment": "./dist/increment.js",
22+
"./paths": "./dist/paths.js",
23+
"./env": "./dist/env.js",
2424
"./playwright": "./dist/playwright.js",
25-
"./list-utils": "./dist/list-utils.js",
26-
"./dontcache": "./dist/dontcache.js"
25+
"./string-utils": "./dist/string-utils.js",
26+
"./task": "./dist/task.js"
2727
},
2828
"typesVersions": {
2929
"*": {
3030
"*": [
3131
"dist/*"
3232
],
33-
"string-utils": [
34-
"dist/string-utils.d.ts"
33+
"output": [
34+
"dist/output.d.ts"
3535
],
36-
"task": [
37-
"dist/task.d.ts"
36+
"list-utils": [
37+
"dist/list-utils.d.ts"
3838
],
39-
"beep-utils": [
40-
"dist/beep-utils.d.ts"
39+
"botasaurus-storage": [
40+
"dist/botasaurus-storage.d.ts"
4141
],
42-
"env": [
43-
"dist/env.d.ts"
42+
"decorators-common": [
43+
"dist/decorators-common.d.ts"
4444
],
45-
"paths": [
46-
"dist/paths.d.ts"
45+
"on-close": [
46+
"dist/on-close.d.ts"
4747
],
48-
"increment": [
49-
"dist/increment.d.ts"
48+
"dontcache": [
49+
"dist/dontcache.d.ts"
5050
],
5151
"formats": [
5252
"dist/formats.d.ts"
5353
],
54-
"output": [
55-
"dist/output.d.ts"
56-
],
57-
"on-close": [
58-
"dist/on-close.d.ts"
59-
],
6054
"utils": [
6155
"dist/utils.d.ts"
6256
],
63-
"cache": [
64-
"dist/cache.d.ts"
57+
"beep-utils": [
58+
"dist/beep-utils.d.ts"
59+
],
60+
"decorators-utils": [
61+
"dist/decorators-utils.d.ts"
6562
],
6663
"page": [
6764
"dist/page.d.ts"
6865
],
69-
"botasaurus-storage": [
70-
"dist/botasaurus-storage.d.ts"
66+
"cache": [
67+
"dist/cache.d.ts"
7168
],
72-
"decorators-utils": [
73-
"dist/decorators-utils.d.ts"
69+
"increment": [
70+
"dist/increment.d.ts"
7471
],
75-
"decorators-common": [
76-
"dist/decorators-common.d.ts"
72+
"paths": [
73+
"dist/paths.d.ts"
74+
],
75+
"env": [
76+
"dist/env.d.ts"
7777
],
7878
"playwright": [
7979
"dist/playwright.d.ts"
8080
],
81-
"list-utils": [
82-
"dist/list-utils.d.ts"
81+
"string-utils": [
82+
"dist/string-utils.d.ts"
8383
],
84-
"dontcache": [
85-
"dist/dontcache.d.ts"
84+
"task": [
85+
"dist/task.d.ts"
8686
]
8787
}
8888
},

js/botasaurus-server-js/package-lock.json

Lines changed: 10 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

js/botasaurus-server-js/src/task-helper.ts

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { TaskStatus } from "./models";
66
import { NDJSONWriteStream } from "./ndjson"
77
import { isLargeFile } from './utils'
88
import { sleep } from 'botasaurus/utils'
9+
import { normalizeItem } from 'botasaurus/output'
910

1011
export function createProjection(ets: string[]) {
1112
return ets.reduce((acc: any, field) => {
@@ -14,16 +15,11 @@ export function createProjection(ets: string[]) {
1415
}, {});
1516
}
1617

17-
function normalizeKeys(firstObjectKeysMapping: any, item: any) {
18-
for (const key of firstObjectKeysMapping) {
19-
item[key] = item[key] === undefined ? null : item[key]
20-
}
21-
}
2218

23-
function populateMissingKeys(item: any, firstObjectKeysMapping: any) {
24-
for (const key of Object.keys(item)) {
25-
if (!(key in firstObjectKeysMapping)) {
26-
firstObjectKeysMapping[key] = null
19+
function populateMissingKeys(newKeys: string[], allKeysMapping: any) {
20+
for (const key of newKeys) {
21+
if (!(key in allKeysMapping)) {
22+
allKeysMapping[key] = null
2723
}
2824
}
2925
}
@@ -91,51 +87,54 @@ function renameTemporaryFile(tempFilePath: string, taskFilePathTemp: string) {
9187
})
9288
}
9389

94-
function arraysEqual(a:any, b:any) {
95-
if (a === b) return true;
96-
if (a == null || b == null) return false;
97-
if (a.length !== b.length) return false;
98-
99-
// If you don't care about the order of the elements inside
100-
// the array, you should sort both arrays here.
101-
// Please note that calling sort on an array will modify that array.
102-
// you might want to clone your array first.
103-
104-
for (var i = 0; i < a.length; ++i) {
105-
if (a[i] !== b[i]) return false;
106-
}
107-
return true;
108-
}
109-
11090
async function normalizeAndDeduplicateChildrenTasks(ids: number[], parentId:number, removeDuplicatesBy: string | null) {
11191
let itemsCount = 0
112-
let shouldNormalize = false
113-
let firstItem:any = null
114-
let firstObjectKeysMapping: any = null
115-
let firstObjectKeys: any = null
92+
let allKeysMapping: any = null
93+
let allKeys: any = null
94+
let firstItemKeyCount = 0
11695
const taskFilePath = TaskResults.generateTaskFilePath(parentId)
11796

97+
// First pass: collect all unique keys from all objects
11898
await TaskResults.streamMultipleTask(ids, (item) => {
119-
if (firstItem === null) {
120-
firstItem = item
121-
firstObjectKeysMapping = createKeyToNullMapping(firstItem)
122-
firstObjectKeys= Object.keys(firstItem)
123-
}
124-
125-
if (!arraysEqual(firstObjectKeys, Object.keys(item))) {
126-
shouldNormalize = true
127-
populateMissingKeys(item, firstObjectKeysMapping)
99+
const itemKeys = Object.keys(item)
100+
101+
if (allKeysMapping === null) {
102+
// First item: initialize with its keys
103+
allKeysMapping = createKeyToNullMapping(item)
104+
firstItemKeyCount = itemKeys.length
105+
} else {
106+
const currentKeyCount = Object.keys(allKeysMapping).length
107+
108+
if (itemKeys.length !== currentKeyCount) {
109+
// Different number of keys - collect new ones
110+
populateMissingKeys(itemKeys, allKeysMapping)
111+
} else {
112+
// Same number of keys, but check if there are any new keys (different keys, not just different order)
113+
for (const key of itemKeys) {
114+
if (!(key in allKeysMapping)) {
115+
// Found a new key, collect all keys from this item
116+
populateMissingKeys(itemKeys, allKeysMapping)
117+
break
118+
}
119+
}
120+
}
128121
}
129122
})
130123

124+
// After first pass, get the complete list of all keys
125+
if (allKeysMapping !== null) {
126+
allKeys = Object.keys(allKeysMapping)
127+
}
128+
129+
const shouldNormalize = allKeys && allKeys.length !== firstItemKeyCount
131130

132131
const tempfile = taskFilePath + '.temp'
133132
const ndjsonWriteStream = new NDJSONWriteStream(tempfile)
134133
const seen = new Set()
135134
try {
136135
await TaskResults.streamMultipleTask(ids, async (item) => {
137136
if (shouldNormalize) {
138-
normalizeKeys(firstObjectKeys, item)
137+
item = normalizeItem(allKeys, item)
139138
}
140139

141140
if (removeDuplicatesBy) {

0 commit comments

Comments
 (0)