Skip to content

Commit adcdd5f

Browse files
committed
Pre-compile all regex patterns in cms scan at module load time
1 parent c62ee41 commit adcdd5f

File tree

1 file changed

+67
-73
lines changed
  • libs/core-scanner/src/scans

1 file changed

+67
-73
lines changed

libs/core-scanner/src/scans/cms.ts

Lines changed: 67 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,8 @@
11
import * as _ from 'lodash';
22
import { HTTPResponse } from 'puppeteer';
3-
43
import { CmsScan } from 'entities/scan-data.entity';
5-
64
import { Logger } from 'pino';
75

8-
export async function buildCmsResult(
9-
parentLogger: Logger,
10-
mainResponse: HTTPResponse,
11-
): Promise<CmsScan> {
12-
const htmlMatches = mainResponse
13-
? await getHtmlMatches(parentLogger, mainResponse)
14-
: [];
15-
const headerMatches = mainResponse
16-
? await getHeaderMatches(mainResponse)
17-
: [];
18-
19-
let cms = null;
20-
21-
if (htmlMatches.length > 0) {
22-
cms = htmlMatches[0].cms;
23-
} else if (headerMatches.length > 0) {
24-
cms = headerMatches[0].cms;
25-
}
26-
27-
return { cms };
28-
}
29-
30-
const getHtmlMatches = async (logger: Logger, response: HTTPResponse) => {
31-
const actualHtml = response ? await response.text() : '';
32-
33-
return cmsData.filter((obj) => {
34-
if (obj.html) {
35-
if (Array.isArray(obj.html)) {
36-
return (
37-
obj.html.filter((html) => {
38-
if (actualHtml.match(new RegExp(html, 'i'))) {
39-
return obj;
40-
}
41-
}).length > 0
42-
);
43-
} else {
44-
if (actualHtml.match(new RegExp(obj.html, 'i'))) {
45-
return obj;
46-
}
47-
}
48-
}
49-
});
50-
};
51-
52-
const getHeaderMatches = async (response: HTTPResponse) => {
53-
const actualHeaders = response ? await response.headers() : {};
54-
const formattedActualHeaders = _.transform(
55-
actualHeaders,
56-
function (result, val, key) {
57-
result[key.toLowerCase()] = val.toLowerCase();
58-
},
59-
);
60-
61-
return cmsData.filter((obj) => {
62-
if (obj.headers) {
63-
return obj.headers.some((header) => {
64-
const formattedKey = header.key.toLowerCase();
65-
if (Object.keys(formattedActualHeaders).includes(formattedKey)) {
66-
const formattedValue = formattedActualHeaders[formattedKey];
67-
if (
68-
formattedValue.match(new RegExp(header.value, 'i')) ||
69-
header.value === ''
70-
) {
71-
return header;
72-
}
73-
}
74-
});
75-
}
76-
});
77-
};
78-
796
const cmsData = [
807
{
818
cms: 'Adobe Experience Manager',
@@ -391,3 +318,70 @@ const cmsData = [
391318
],
392319
},
393320
];
321+
322+
const compiledCmsData = cmsData.map((entry) => ({
323+
...entry,
324+
htmlPatterns: entry.html
325+
? (Array.isArray(entry.html) ? entry.html : [entry.html]).map(
326+
(p) => new RegExp(p, 'i'),
327+
)
328+
: [],
329+
headerPatterns:
330+
entry.headers?.map((h) => ({
331+
...h,
332+
keyLower: h.key.toLowerCase(),
333+
valueRegex: h.value ? new RegExp(h.value, 'i') : null,
334+
})) ?? [],
335+
}));
336+
337+
export async function buildCmsResult(
338+
parentLogger: Logger,
339+
mainResponse: HTTPResponse,
340+
): Promise<CmsScan> {
341+
const t0 = performance.now();
342+
const htmlMatch = mainResponse
343+
? await getHtmlMatch(parentLogger, mainResponse)
344+
: null;
345+
const t1 = performance.now();
346+
const headerMatch =
347+
!htmlMatch && mainResponse ? await getHeaderMatch(mainResponse) : null;
348+
const t2 = performance.now();
349+
350+
parentLogger.info(
351+
`CMS scan timing: getHtmlMatch=${(t1 - t0).toFixed(2)}ms, getHeaderMatch=${(t2 - t1).toFixed(2)}ms, total=${(t2 - t0).toFixed(2)}ms`,
352+
);
353+
354+
const cms = htmlMatch?.cms ?? headerMatch?.cms ?? null;
355+
356+
return { cms };
357+
}
358+
359+
const getHtmlMatch = async (logger: Logger, response: HTTPResponse) => {
360+
const actualHtml = response ? await response.text() : '';
361+
362+
return compiledCmsData.find((obj) =>
363+
obj.htmlPatterns.some((pattern) => pattern.test(actualHtml)),
364+
);
365+
};
366+
367+
const getHeaderMatch = async (response: HTTPResponse) => {
368+
const actualHeaders = response ? await response.headers() : {};
369+
const formattedActualHeaders = _.transform(
370+
actualHeaders,
371+
function (result, val, key) {
372+
result[key.toLowerCase()] = val.toLowerCase();
373+
},
374+
);
375+
376+
return compiledCmsData.find((obj) =>
377+
obj.headerPatterns.some((header) => {
378+
if (Object.keys(formattedActualHeaders).includes(header.keyLower)) {
379+
const formattedValue = formattedActualHeaders[header.keyLower];
380+
return header.valueRegex
381+
? header.valueRegex.test(formattedValue)
382+
: true;
383+
}
384+
return false;
385+
}),
386+
);
387+
};

0 commit comments

Comments
 (0)