Skip to content

Commit 8095c99

Browse files
committed
Fixed directory imputation during batch processing
Signed-off-by: Omkar Phansopkar <[email protected]>
1 parent 19d3d6a commit 8095c99

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

src/services/workbenchDB.ts

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,12 @@ export class WorkbenchDB {
309309
const stream = fs.createReadStream(jsonFilePath, { encoding: "utf8" });
310310
let files_count = 0;
311311
let dirs_count = 0;
312-
let index = 0;
313312
let rootPath: string | null = null;
314313
let hasRootPath = false;
315314
const batchSize = 1000;
316315
let files: Resource[] = [];
316+
const parsedFilePaths = new Set<string>();
317+
317318
let progress = 0;
318319
let promiseChain: Promise<unknown> = this.sync;
319320

@@ -387,31 +388,38 @@ export class WorkbenchDB {
387388
}
388389
// @TODO: When/if scancode reports directories in its header, this needs
389390
// to be replaced.
390-
if (index === 0) {
391+
if (parsedFilePaths.size === 0) {
391392
dirs_count = file.dirs_count;
392393
}
393-
file.id = index++;
394+
file.id = parsedFilePaths.size;
394395

395396
primaryPromise._parseLicenseDetections(file, TopLevelData);
396397
primaryPromise._parseLicenseClues(file, TopLevelData);
397398

398399
files.push(file);
400+
parsedFilePaths.add(file.path);
401+
399402
if (files.length >= batchSize) {
400403
// Need to set a new variable before handing to promise
401404
this.pause();
402405

403406
promiseChain = promiseChain
404-
.then(() => this._imputeMissingIntermediateDirectories(files))
407+
.then(() =>
408+
primaryPromise._imputeIntermediateDirectories(
409+
files,
410+
parsedFilePaths
411+
)
412+
)
405413
.then(() => primaryPromise._batchCreateFiles(files))
406414
.then(() => {
407415
const currentProgress = Math.round(
408-
(index / (files_count + dirs_count)) * 100
416+
(parsedFilePaths.size / (files_count + dirs_count)) * 100
409417
);
410418
if (currentProgress > progress) {
411419
progress = currentProgress;
412420
console.info(
413421
`Batch-${++batchCount} completed, \n`,
414-
`JSON Import progress @ ${progress} % -- ${index}/${files_count}+${dirs_count}`
422+
`JSON Import progress @ ${progress} % -- ${parsedFilePaths.size}/${files_count}+${dirs_count}`
415423
);
416424
onProgressUpdate(progress);
417425
}
@@ -436,15 +444,18 @@ export class WorkbenchDB {
436444
type: "directory",
437445
files_count: files_count,
438446
});
447+
parsedFilePaths.add(rootPath);
439448
}
440449
})
441-
.then(() => this._imputeIntermediateDirectories(files))
450+
.then(() =>
451+
this._imputeIntermediateDirectories(files, parsedFilePaths)
452+
)
442453
.then(() => this._batchCreateFiles(files))
443454
.then(() => this.db.Header.create(TopLevelData.parsedHeader))
444455
.then(() => {
445456
console.info(
446457
`Batch-${++batchCount} completed, \n`,
447-
`JSON Import progress @ ${progress} % -- ${index}/${files_count}+${dirs_count}`
458+
`JSON Import progress @ ${progress} % -- ${parsedFilePaths.size}/${files_count}+${dirs_count}`
448459
);
449460
onProgressUpdate(90);
450461
})
@@ -884,31 +895,33 @@ export class WorkbenchDB {
884895
}
885896

886897
// Adds & modifies files array in place, adding missing intermediate directories
887-
_imputeIntermediateDirectories(files: Resource[]) {
888-
const availableFiles = new Set(files.map((file) => file.path));
898+
_imputeIntermediateDirectories(
899+
files: Resource[],
900+
parsedFilePaths: Set<string>
901+
) {
889902
const intermediateDirectories: Resource[] = [];
890903

891904
files.forEach((file) => {
892905
file.parent = parentPath(file.path);
893906

894907
// Add intermediate directories if parent not available in files
895-
if (!availableFiles.has(file.parent)) {
908+
if (!parsedFilePaths.has(file.parent)) {
896909
for (
897910
let currentDir = file.parent;
898911
currentDir !== parentPath(currentDir) &&
899-
!availableFiles.has(currentDir);
912+
!parsedFilePaths.has(currentDir);
900913
currentDir = parentPath(currentDir)
901914
) {
902-
availableFiles.add(currentDir);
903915
intermediateDirectories.push({
916+
id: parsedFilePaths.size,
904917
path: currentDir,
905918
parent: parentPath(currentDir),
906919
name: path.basename(currentDir),
907920
type: "directory",
908921
files_count: 0,
909922
});
923+
parsedFilePaths.add(currentDir);
910924
}
911-
availableFiles.add(file.parent);
912925
}
913926
});
914927
files.push(...intermediateDirectories);

0 commit comments

Comments
 (0)