Skip to content

Commit 5ebf1ad

Browse files
committed
perf: offload all data processing to web workers with filter-stage caching and debounced compute
1 parent 9cc0efe commit 5ebf1ad

File tree

4 files changed

+146
-98
lines changed

4 files changed

+146
-98
lines changed

dashboard/lib/analytics.worker.ts

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ let logs: NginxLog[] = [];
6363
let logsWithoutBots: NginxLog[] = [];
6464
let cachedBaseKey = '';
6565
let cachedFilteredData: NginxLog[] = [];
66+
let cachedVersionKey = '';
67+
let cachedVersionFilteredData: NginxLog[] = [];
68+
let cachedDeviceKey = '';
69+
let cachedDeviceFilteredData: NginxLog[] = [];
70+
let cachedHourKey = '';
71+
let cachedHourFilteredData: NginxLog[] = [];
72+
let cachedDayKey = '';
73+
let cachedDayFilteredData: NginxLog[] = [];
6674

6775
function getDayId(date: Date): number {
6876
return new Date(date).setHours(0, 0, 0, 0);
@@ -118,7 +126,11 @@ self.onmessage = (e: MessageEvent<ComputeMessage | ParseAndStoreMessage>) => {
118126
const parsed = parseNginxLogs(data.rawLogs, data.logFormat);
119127
logs = [...logs, ...parsed];
120128
logsWithoutBots = [...logsWithoutBots, ...parsed.filter(row => !isBotOrCrawler(row.userAgent))];
121-
cachedBaseKey = ''; // invalidate cache when logs change
129+
cachedBaseKey = '';
130+
cachedVersionKey = '';
131+
cachedDeviceKey = '';
132+
cachedHourKey = '';
133+
cachedDayKey = '';
122134
return;
123135
}
124136

@@ -167,26 +179,59 @@ self.onmessage = (e: MessageEvent<ComputeMessage | ParseAndStoreMessage>) => {
167179
cachedBaseKey = baseKey;
168180
}
169181

170-
// Version filter
171-
const versionFilteredData = filter.version === null
172-
? filteredData
173-
: filteredData.filter(row => getVersion(row.path) === filter.version);
174-
175-
// Device filter
176-
let deviceFilteredData = versionFilteredData;
177-
if (filter.client !== null) deviceFilteredData = deviceFilteredData.filter(row => getClient(row.userAgent) === filter.client);
178-
if (filter.os !== null) deviceFilteredData = deviceFilteredData.filter(row => getOS(row.userAgent) === filter.os);
179-
if (filter.deviceType !== null) deviceFilteredData = deviceFilteredData.filter(row => getDevice(row.userAgent) === filter.deviceType);
180-
181-
// Hour filter
182-
const hourFilteredData = filter.hour === null
183-
? deviceFilteredData
184-
: deviceFilteredData.filter(row => row.timestamp?.getHours() === filter.hour);
185-
186-
// Day filter
187-
const dayFilteredData = filter.dayOfWeek === null
188-
? hourFilteredData
189-
: hourFilteredData.filter(row => row.timestamp?.getDay() === filter.dayOfWeek);
182+
// Version filter — cached
183+
const versionKey = `${baseKey}|${filter.version ?? ''}`;
184+
let versionFilteredData: NginxLog[];
185+
if (versionKey === cachedVersionKey) {
186+
versionFilteredData = cachedVersionFilteredData;
187+
} else {
188+
versionFilteredData = filter.version === null
189+
? filteredData
190+
: filteredData.filter(row => getVersion(row.path) === filter.version);
191+
cachedVersionFilteredData = versionFilteredData;
192+
cachedVersionKey = versionKey;
193+
}
194+
195+
// Device filter — cached
196+
const deviceKey = `${versionKey}|${filter.client ?? ''}|${filter.os ?? ''}|${filter.deviceType ?? ''}`;
197+
let deviceFilteredData: NginxLog[];
198+
if (deviceKey === cachedDeviceKey) {
199+
deviceFilteredData = cachedDeviceFilteredData;
200+
} else {
201+
let result = versionFilteredData;
202+
if (filter.client !== null) result = result.filter(row => getClient(row.userAgent) === filter.client);
203+
if (filter.os !== null) result = result.filter(row => getOS(row.userAgent) === filter.os);
204+
if (filter.deviceType !== null) result = result.filter(row => getDevice(row.userAgent) === filter.deviceType);
205+
deviceFilteredData = result;
206+
cachedDeviceFilteredData = result;
207+
cachedDeviceKey = deviceKey;
208+
}
209+
210+
// Hour filter — cached
211+
const hourKey = `${deviceKey}|${filter.hour ?? ''}`;
212+
let hourFilteredData: NginxLog[];
213+
if (hourKey === cachedHourKey) {
214+
hourFilteredData = cachedHourFilteredData;
215+
} else {
216+
hourFilteredData = filter.hour === null
217+
? deviceFilteredData
218+
: deviceFilteredData.filter(row => row.timestamp?.getHours() === filter.hour);
219+
cachedHourFilteredData = hourFilteredData;
220+
cachedHourKey = hourKey;
221+
}
222+
223+
// Day filter — cached
224+
const dayKey = `${hourKey}|${filter.dayOfWeek ?? ''}`;
225+
let dayFilteredData: NginxLog[];
226+
if (dayKey === cachedDayKey) {
227+
dayFilteredData = cachedDayFilteredData;
228+
} else {
229+
dayFilteredData = filter.dayOfWeek === null
230+
? hourFilteredData
231+
: hourFilteredData.filter(row => row.timestamp?.getDay() === filter.dayOfWeek);
232+
cachedDayFilteredData = dayFilteredData;
233+
cachedDayKey = dayKey;
234+
}
190235

191236
// Aggregations
192237
const endpointCounts = new Map<string, number>();

dashboard/lib/components/dashboard.tsx

Lines changed: 39 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ import Users from "@/lib/components/users";
1010
import { Version, getVersion } from "@/lib/components/version";
1111
import { Location } from "@/lib/components/location";
1212
import { type Location as LocationType } from "@/lib/location"
13-
import { parseNginxLogs } from "@/lib/parse";
1413
import { useCallback, useEffect, useMemo, useRef, useState, useTransition } from "react";
1514
import type { WorkerResult } from '@/lib/analytics.worker';
15+
import type { ParseWorkerResult } from '@/lib/parse.worker';
1616
import { Device } from "@/lib/components/device/device";
1717
import { type Filter, newFilter } from "@/lib/filter";
1818
import { Period, periodStart } from "@/lib/period";
@@ -32,13 +32,9 @@ import dynamic from "next/dynamic";
3232
const NetworkBackground = dynamic(() => import("./network-background"), { ssr: false });
3333
const FileUpload = dynamic(() => import("./file-upload"));
3434

35-
const PARSE_CHUNK_SIZE = 5000;
36-
3735
export default function Dashboard({ fileUpload, demo, logFormat }: { fileUpload: boolean, demo: boolean, logFormat?: string }) {
3836
const [accessLogs, setAccessLogs] = useState<string[]>([]);
3937
const [logs, setLogs] = useState<NginxLog[]>([]);
40-
const parsedAccessCount = useRef(0);
41-
const parseCancelRef = useRef(false);
4238

4339
const [errorLogs, setErrorLogs] = useState<string[]>([]);
4440

@@ -96,13 +92,15 @@ export default function Dashboard({ fileUpload, demo, logFormat }: { fileUpload:
9692
startTransition(() => setFilter((previous) => ({ ...previous, dayOfWeek })))
9793
}, [])
9894

99-
// Worker ref and result state
95+
// Worker refs and result state
10096
const workerRef = useRef<Worker | null>(null);
10197
const workerSeqRef = useRef(0);
10298
const workerRawCountRef = useRef(0);
99+
const computeDebounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
100+
const parseWorkerRef = useRef<Worker | null>(null);
103101
const [workerResult, setWorkerResult] = useState<WorkerResult | null>(null);
104102

105-
// Create worker once on mount
103+
// Create workers once on mount
106104
useEffect(() => {
107105
const worker = new Worker(new URL('../analytics.worker.ts', import.meta.url));
108106
workerRef.current = worker;
@@ -111,25 +109,49 @@ export default function Dashboard({ fileUpload, demo, logFormat }: { fileUpload:
111109
setWorkerResult(e.data);
112110
}
113111
};
114-
return () => worker.terminate();
112+
113+
const parseWorker = new Worker(new URL('../parse.worker.ts', import.meta.url));
114+
parseWorkerRef.current = parseWorker;
115+
parseWorker.onmessage = (e: MessageEvent<ParseWorkerResult>) => {
116+
const { logs: newLogs, maxTimestamp, isFirstBatch } = e.data;
117+
if (newLogs.length === 0) return;
118+
if (isFirstBatch && maxTimestamp !== null) {
119+
const maxDate = new Date(maxTimestamp);
120+
if (inPeriod(maxDate, 'week')) setPeriod('week');
121+
else if (inPeriod(maxDate, 'month')) setPeriod('month');
122+
else if (inPeriod(maxDate, '6 months')) setPeriod('6 months');
123+
else setPeriod('all time');
124+
}
125+
setLogs(prev => [...prev, ...newLogs]);
126+
};
127+
128+
return () => {
129+
worker.terminate();
130+
parseWorker.terminate();
131+
};
115132
}, []);
116133

117-
// Send raw logs to worker for parsing (fires before compute effect)
134+
// Send raw logs to both workers for parsing (fires before compute effect)
118135
useEffect(() => {
119-
if (!workerRef.current || accessLogs.length <= workerRawCountRef.current) return;
136+
if (accessLogs.length <= workerRawCountRef.current) return;
120137
const newRawLogs = accessLogs.slice(workerRawCountRef.current);
138+
const isFirstBatch = workerRawCountRef.current === 0;
121139
workerRawCountRef.current = accessLogs.length;
122-
workerRef.current.postMessage({ type: 'parseAndStore', rawLogs: newRawLogs, logFormat });
140+
workerRef.current?.postMessage({ type: 'parseAndStore', rawLogs: newRawLogs, logFormat });
141+
parseWorkerRef.current?.postMessage({ rawLogs: newRawLogs, logFormat, isFirstBatch });
123142
}, [accessLogs]);
124143

125-
// Trigger computation when inputs change
144+
// Trigger computation when inputs change — debounced to avoid redundant work on rapid changes
126145
useEffect(() => {
127146
if (!workerRef.current) return;
128-
const seq = ++workerSeqRef.current;
129-
const locationMapEntries: [string, string][] = filter.location !== null
130-
? Array.from(locationMap.entries()).map(([ip, loc]) => [ip, loc.country])
131-
: [];
132-
workerRef.current.postMessage({ type: 'compute', seq, filter, settings, locationMap: locationMapEntries });
147+
if (computeDebounceRef.current) clearTimeout(computeDebounceRef.current);
148+
computeDebounceRef.current = setTimeout(() => {
149+
const seq = ++workerSeqRef.current;
150+
const locationMapEntries: [string, string][] = filter.location !== null
151+
? Array.from(locationMap.entries()).map(([ip, loc]) => [ip, loc.country])
152+
: [];
153+
workerRef.current?.postMessage({ type: 'compute', seq, filter, settings, locationMap: locationMapEntries });
154+
}, 50);
133155
}, [accessLogs, filter, settings, locationMap]);
134156

135157
useEffect(() => {
@@ -196,63 +218,6 @@ export default function Dashboard({ fileUpload, demo, logFormat }: { fileUpload:
196218
return url;
197219
}
198220

199-
useEffect(() => {
200-
if (accessLogs.length <= parsedAccessCount.current) return;
201-
202-
const newRawLogs = accessLogs.slice(parsedAccessCount.current);
203-
const isFirstBatch = parsedAccessCount.current === 0;
204-
parsedAccessCount.current = accessLogs.length;
205-
206-
const initPeriod = (parsed: ReturnType<typeof parseNginxLogs>) => {
207-
let maxDate = parsed[0].timestamp;
208-
for (const log of parsed) {
209-
if (log.timestamp && (!maxDate || log.timestamp > maxDate)) {
210-
maxDate = log.timestamp;
211-
}
212-
}
213-
if (maxDate) {
214-
if (inPeriod(maxDate, 'week')) setPeriod('week');
215-
else if (inPeriod(maxDate, 'month')) setPeriod('month');
216-
else if (inPeriod(maxDate, '6 months')) setPeriod('6 months');
217-
else setPeriod('all time');
218-
}
219-
};
220-
221-
// Small batches: parse synchronously
222-
if (newRawLogs.length <= PARSE_CHUNK_SIZE) {
223-
const newParsed = parseNginxLogs(newRawLogs, logFormat);
224-
if (newParsed.length === 0) return;
225-
if (isFirstBatch) initPeriod(newParsed);
226-
setLogs(prev => [...prev, ...newParsed]);
227-
return;
228-
}
229-
230-
// Large batches: chunk with setTimeout to avoid blocking the main thread,
231-
// but accumulate all results and update state only once at the end.
232-
parseCancelRef.current = false;
233-
let offset = 0;
234-
const allParsed: ReturnType<typeof parseNginxLogs> = [];
235-
const processChunk = () => {
236-
if (parseCancelRef.current) return;
237-
const chunk = newRawLogs.slice(offset, offset + PARSE_CHUNK_SIZE);
238-
if (chunk.length === 0) return;
239-
const parsed = parseNginxLogs(chunk, logFormat);
240-
if (parsed.length > 0) allParsed.push(...parsed);
241-
offset += PARSE_CHUNK_SIZE;
242-
if (offset < newRawLogs.length) {
243-
setTimeout(processChunk, 0);
244-
} else {
245-
// All chunks done — single state update
246-
if (allParsed.length > 0) {
247-
if (isFirstBatch) initPeriod(allParsed);
248-
setLogs(prev => [...prev, ...allParsed]);
249-
}
250-
}
251-
};
252-
processChunk();
253-
254-
return () => { parseCancelRef.current = true; };
255-
}, [accessLogs])
256221

257222

258223

dashboard/lib/components/location.tsx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Dispatch, SetStateAction, useEffect, useState } from "react";
1+
import { Dispatch, SetStateAction, useEffect, useRef, useState } from "react";
22
import { type Location } from '@/lib/location'
33
import { generateDemoLocations } from "../demo";
44

@@ -26,6 +26,7 @@ export function Location({
2626
}) {
2727
const [loading, setLoading] = useState(false);
2828
const [endpointDisabled, setEndpointDisabled] = useState(false);
29+
const attemptedIPsRef = useRef(new Set<string>());
2930

3031
const fetchLocations = async (ipAddresses: string[]) => {
3132
const response = await fetch('/api/location', {
@@ -77,15 +78,18 @@ export function Location({
7778

7879
useEffect(() => {
7980
if (noFetch || endpointDisabled || unknownIPs.length === 0) return;
81+
const toFetch = unknownIPs.filter(ip => !attemptedIPsRef.current.has(ip));
82+
if (toFetch.length === 0) return;
83+
toFetch.forEach(ip => attemptedIPsRef.current.add(ip));
8084

8185
const fetchData = async () => {
8286
setLoading(true);
8387
try {
8488
let fetchedLocations: Location[];
8589
if (demo) {
86-
fetchedLocations = generateDemoLocations(unknownIPs);
90+
fetchedLocations = generateDemoLocations(toFetch);
8791
} else {
88-
fetchedLocations = await fetchLocations(unknownIPs);
92+
fetchedLocations = await fetchLocations(toFetch);
8993
}
9094
if (fetchedLocations.length > 0) {
9195
setLocationMap((prevMap) => {

dashboard/lib/parse.worker.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/// <reference lib="webworker" />
2+
export type {};
3+
4+
import { parseNginxLogs } from './parse';
5+
import type { NginxLog } from './types';
6+
7+
type ParseMessage = {
8+
rawLogs: string[];
9+
logFormat?: string;
10+
isFirstBatch: boolean;
11+
};
12+
13+
export type ParseWorkerResult = {
14+
logs: NginxLog[];
15+
maxTimestamp: number | null;
16+
isFirstBatch: boolean;
17+
};
18+
19+
self.onmessage = (e: MessageEvent<ParseMessage>) => {
20+
const { rawLogs, logFormat, isFirstBatch } = e.data;
21+
const parsed = parseNginxLogs(rawLogs, logFormat);
22+
23+
let maxTimestamp: number | null = null;
24+
if (isFirstBatch) {
25+
for (const log of parsed) {
26+
if (log.timestamp) {
27+
const t = log.timestamp.getTime();
28+
if (maxTimestamp === null || t > maxTimestamp) maxTimestamp = t;
29+
}
30+
}
31+
}
32+
33+
self.postMessage({ logs: parsed, maxTimestamp, isFirstBatch } satisfies ParseWorkerResult);
34+
};

0 commit comments

Comments
 (0)