Skip to content

Commit ce563b8

Browse files
authored
Improve performance for parsing wf input (#889)
improve performance for parsing wf input
1 parent c3f9811 commit ce563b8

File tree

2 files changed

+100
-35
lines changed

2 files changed

+100
-35
lines changed

src/utils/data-formatters/__tests__/format-input-payload.test.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ describe('formatInputPayload', () => {
3232
expect(formatInputPayload(input)).toEqual(expected);
3333
});
3434
// end of empty data checks
35+
test('should handle base64 encoded JSON with boolean value', () => {
36+
const input = { data: btoa(`true`) };
37+
const expected = [true];
38+
expect(formatInputPayload(input)).toEqual(expected);
39+
});
3540

3641
test('should parse base64 encoded JSON lines separated by \n correctly', () => {
3742
const input = {
@@ -106,17 +111,44 @@ describe('formatInputPayload', () => {
106111
expect(formatInputPayload(input)).toEqual(expected);
107112
});
108113

114+
test('should handle base64 encoded JSON with exponential numbers', () => {
115+
const input = {
116+
data: btoa(`[333e-10]\n{"number": 1.23e-10} -1.23e+10\n1e20`),
117+
};
118+
const expected = [[333e-10], { number: 1.23e-10 }, -1.23e10, 1e20];
119+
expect(formatInputPayload(input)).toEqual(expected);
120+
});
121+
109122
test('should handle base64 encoded multiple mixed JSON objects', () => {
110123
const input = {
111124
data: btoa(
112-
`{"a": 1, "b": "text"}\n{"c": [1, 2, 3], "d": null}\n42\n"Hello"`
125+
`{"a": 1, "b": "text"} 0\n{"c": [1, 2, 3], "d": null}\n42\n"Hello"\ntrue`
113126
),
114127
};
115128
const expected = [
116129
{ a: 1, b: 'text' },
130+
0,
117131
{ c: [1, 2, 3], d: null },
118132
42,
119133
'Hello',
134+
true,
135+
];
136+
expect(formatInputPayload(input)).toEqual(expected);
137+
});
138+
139+
test('should handle base64 encoded JSON with nested json string', () => {
140+
const stringifiedJson = JSON.stringify(
141+
`{ street: ['Hello', 1, { a: 'b' }] }`
142+
);
143+
const input = {
144+
data: btoa(`{"name": "John", "age": 30, "address": ${stringifiedJson}}`),
145+
};
146+
const expected = [
147+
{
148+
name: 'John',
149+
age: 30,
150+
address: `{ street: ['Hello', 1, { a: 'b' }] }`,
151+
},
120152
];
121153
expect(formatInputPayload(input)).toEqual(expected);
122154
});
Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,95 @@
11
import logger from '@/utils/logger';
22

33
import losslessJsonParse from '../lossless-json-parse';
4+
5+
const separators = ['\n', ' '];
6+
47
const formatInputPayload = (
58
payload: { data?: string | null } | null | undefined
69
) => {
710
const data = payload?.data;
8-
911
if (!data) {
1012
return null;
1113
}
1214

1315
const parsedData = atob(data);
14-
return parseJsonLines(parsedData);
16+
return parseMultipleInputs(parsedData);
1517
};
1618

17-
function parseJsonLines(input: string) {
18-
const jsonArray = [];
19-
let currentJson = '';
20-
const separators = ['\n', ' '];
21-
22-
for (let i = 0; i < input.length; i++) {
23-
const char = input[i];
24-
if (separators.includes(char)) {
25-
// Try to parse the current JSON string
26-
if (currentJson) {
27-
try {
28-
const jsonObject = losslessJsonParse(currentJson);
29-
// If successful, add the object to the array
30-
jsonArray.push(jsonObject);
31-
// Reset currentJson for the next JSON object
32-
currentJson = '';
33-
} catch {
34-
// If parsing fails, treat the separator as part of the currentJson and continue with the next char
35-
currentJson += char;
36-
}
37-
}
38-
} else {
39-
currentJson += char;
19+
const parseMultipleInputs = (input: string) => {
20+
const results = [];
21+
let currentIndex = 0;
22+
23+
while (currentIndex < input.length) {
24+
while (separators.includes(input[currentIndex])) {
25+
currentIndex++;
4026
}
41-
}
27+
if (currentIndex >= input.length) break;
4228

43-
// Handle case where the last JSON object might be malformed
44-
if (currentJson.trim() !== '') {
4529
try {
46-
const jsonObject = losslessJsonParse(currentJson);
47-
jsonArray.push(jsonObject);
48-
} catch {
30+
const { startIndex, endIndex, jsonString } = extractJsonValue(
31+
input,
32+
currentIndex
33+
);
34+
35+
if (endIndex > startIndex) {
36+
// move to the end of the string before parsing to avoid parsing the same string if the parsing fails
37+
currentIndex = endIndex;
38+
results.push(losslessJsonParse(jsonString));
39+
} else {
40+
currentIndex++;
41+
}
42+
} catch (error) {
4943
logger.error(
5044
{
5145
input,
52-
currentJson,
46+
currentIndex,
47+
error,
5348
},
5449
'Error parsing JSON string'
5550
);
5651
}
5752
}
5853

59-
return jsonArray;
60-
}
54+
return results;
55+
};
56+
57+
const extractJsonValue = (input: string, startIndex: number) => {
58+
let endIndex = startIndex;
59+
let openBrackets = 0;
60+
let openBraces = 0;
61+
let inString = false;
62+
let escapeNext = false; // used to handle escaped quotes
63+
64+
while (endIndex < input.length) {
65+
const char = input[endIndex];
6166

67+
if (escapeNext) {
68+
escapeNext = false;
69+
} else if (char === '\\') {
70+
escapeNext = true;
71+
} else if (char === '"' && !inString) {
72+
inString = true;
73+
} else if (char === '"' && inString) {
74+
inString = false;
75+
} else if (!inString) {
76+
if (char === '[') openBrackets++;
77+
if (char === ']') openBrackets--;
78+
if (char === '{') openBraces++;
79+
if (char === '}') openBraces--;
80+
81+
// When a separator is found, this indicates the end of a JSON value if we are not inside any array or object
82+
if (separators.includes(char) && openBrackets === 0 && openBraces === 0) {
83+
break;
84+
}
85+
}
86+
endIndex++;
87+
}
88+
89+
return {
90+
startIndex,
91+
endIndex,
92+
jsonString: input.slice(startIndex, endIndex),
93+
};
94+
};
6295
export default formatInputPayload;

0 commit comments

Comments
 (0)