Skip to content

Commit 34f7409

Browse files
authored
Implement webworkers for parallel data parsing (#49)
1 parent fad62be commit 34f7409

File tree

9 files changed

+452
-79
lines changed

9 files changed

+452
-79
lines changed

package-lock.json

Lines changed: 293 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
]
5454
},
5555
"devDependencies": {
56+
"@craco/craco": "^7.1.0",
5657
"tailwindcss": "latest"
5758
}
5859
}

src/Components/Atoms/DropBox.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ const { Dragger } = Upload;
1010

1111
export function DropBox() {
1212
const { setCpuData, setMemoryData, setSwapData, setBlockData, setNetData, setNetErrData, setFileDetails, setDataLoaded } = useDataContext();
13-
1413

1514
const props = { // props for antd upload component
1615
multiple: false,
@@ -25,7 +24,7 @@ export function DropBox() {
2524
try{
2625
if(fileContent.includes("Linux") && fileContent.includes("all")) { // Check if file is a sar file has the correct content
2726

28-
const dataObj = callParse(fileContent); // Object containing more objects (inception! 🤯)
27+
const dataObj = await callParse(fileContent); // Object containing more objects (inception! 🤯)
2928
// Save data in context
3029
setCpuData(dataObj.cpuObject);
3130
setMemoryData(dataObj.memoryObject);

src/Utils/callParse.js

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
1-
import { parseMemoryData, parseSwapData, parseCPUData, parseDiskIO, parseNetworkData, parseNetErrorData, parseFileDetails } from "./parseData";
1+
import readDataParallel from "./readDataParallel";
2+
import pparseAllParallel from "./parseAllParallel";
23

3-
import readData from "./readData";
4+
export async function callParse(fileContent) {
5+
const sarFileData = await readDataParallel(fileContent); // Note these functions don't need to be async as data has already been read
6+
const results = await pparseAllParallel(sarFileData);
7+
const {
8+
cpuObject,
9+
memoryObject,
10+
swapObject,
11+
blockObject,
12+
networkObject,
13+
networkErrObject,
14+
fileDetails,
15+
} = results;
416

5-
export function callParse(fileContent) {
6-
7-
const sarFileData = readData(fileContent); // Note these functions don't need to be async as data has already been read
8-
const cpuObject = parseCPUData(sarFileData);
9-
const blockObject = parseDiskIO(sarFileData);
10-
const memoryObject = parseMemoryData(sarFileData);
11-
const swapObject = parseSwapData(sarFileData);
12-
const networkObject = parseNetworkData(sarFileData);
13-
const networkErrObject = parseNetErrorData(sarFileData);
14-
const fileDetails = parseFileDetails(sarFileData);
15-
16-
return { cpuObject, memoryObject, swapObject, blockObject, networkObject, networkErrObject, fileDetails }
17-
}
17+
return {
18+
cpuObject,
19+
memoryObject,
20+
swapObject,
21+
blockObject,
22+
networkObject,
23+
networkErrObject,
24+
fileDetails,
25+
};
26+
}

src/Utils/parseAllParallel.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
export default function parseAllParallel(sarFileData) {
2+
// List of parse tasks and their output keys
3+
const tasks = [
4+
"cpuObject",
5+
"blockObject",
6+
"memoryObject",
7+
"swapObject",
8+
"networkObject",
9+
"networkErrObject",
10+
"fileDetails",
11+
];
12+
13+
// Create one worker per task
14+
const promises = tasks.map((key) => {
15+
return new Promise((resolve, reject) => {
16+
// Create a module worker (supports import statements)
17+
const worker = new Worker(new URL("./parseWorker.js", import.meta.url), {
18+
type: "module",
19+
});
20+
21+
worker.onmessage = ({ data }) => {
22+
const { funcKey, result, error } = data;
23+
if (error) {
24+
reject(new Error(`Worker ${funcKey} error: ${error}`));
25+
} else {
26+
resolve({ [funcKey]: result });
27+
}
28+
worker.terminate();
29+
};
30+
31+
worker.onerror = (err) => {
32+
reject(err);
33+
worker.terminate();
34+
};
35+
36+
// Start the worker
37+
worker.postMessage({ funcKey: key, sarData: sarFileData });
38+
});
39+
});
40+
41+
// Combine all results into a single object
42+
return Promise.all(promises).then((resultsArray) =>
43+
Object.assign({}, ...resultsArray)
44+
);
45+
}

src/Utils/parseWorker.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import {
2+
parseCPUData,
3+
parseDiskIO,
4+
parseMemoryData,
5+
parseSwapData,
6+
parseNetworkData,
7+
parseNetErrorData,
8+
parseFileDetails,
9+
} from "./parseData";
10+
11+
onmessage = ({ data }) => {
12+
const { funcKey, sarData } = data;
13+
const funcMap = {
14+
cpuObject: parseCPUData,
15+
blockObject: parseDiskIO,
16+
memoryObject: parseMemoryData,
17+
swapObject: parseSwapData,
18+
networkObject: parseNetworkData,
19+
networkErrObject: parseNetErrorData,
20+
fileDetails: parseFileDetails,
21+
};
22+
23+
if (!(funcKey in funcMap)) {
24+
postMessage({ funcKey, error: "Unknown parsing function key." });
25+
return;
26+
}
27+
28+
try {
29+
const result = funcMap[funcKey](sarData);
30+
postMessage({ funcKey, result });
31+
} catch (err) {
32+
postMessage({ funcKey, error: err.message });
33+
}
34+
};

src/Utils/readData.js

Lines changed: 0 additions & 36 deletions
This file was deleted.

src/Utils/readDataParallel.js

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/**
2+
* readDataParallel
3+
* @param {string} data - raw text data (with \n-separated lines)
4+
* @returns {Promise<Array>} - resolves to parsed and filtered SAR data
5+
*/
6+
export default function readDataParallel(data) {
7+
const rows = data.split("\n");
8+
const total = rows.length;
9+
const numWorkers = navigator.hardwareConcurrency || 4; // Use available CPU cores or default to 4
10+
const chunkSize = Math.ceil(total / numWorkers);
11+
12+
const promises = [];
13+
for (let i = 0; i < numWorkers; i++) {
14+
const start = i * chunkSize;
15+
const end = Math.min(start + chunkSize, total);
16+
const chunk = rows.slice(start, end);
17+
18+
promises.push(
19+
new Promise((resolve, reject) => {
20+
const worker = new Worker(new URL("./readWorker.js", import.meta.url));
21+
worker.onmessage = (msg) => resolve(msg.data);
22+
worker.onerror = (err) => reject(err);
23+
worker.postMessage(chunk);
24+
})
25+
);
26+
}
27+
28+
// Combine results from all workers
29+
return Promise.all(promises).then((results) => {
30+
// URL.revokeObjectURL(woker);
31+
return results.flat();
32+
});
33+
}

src/Utils/readWorker.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
function parseRow(row) {
2+
if (row.includes("RESTART")) return null;
3+
4+
const emptySplit = row.split(" ");
5+
const commaSplit = emptySplit.toString().split(",");
6+
let filtered = commaSplit.filter((e) => e.trim() !== "");
7+
8+
if (filtered[1] === "AM" || filtered[1] === "PM") {
9+
filtered[0] = filtered[0] + " " + filtered[1];
10+
filtered.splice(1, 1);
11+
}
12+
return filtered;
13+
}
14+
15+
onmessage = function (e) {
16+
const rows = e.data;
17+
const parsed = rows.map(parseRow).filter((r) => r && String(r).trim());
18+
postMessage(parsed);
19+
// eslint-disable-next-line
20+
close();
21+
};

0 commit comments

Comments
 (0)