Skip to content

Commit 1eee255

Browse files
committed
now with web worker!
1 parent 3a766db commit 1eee255

File tree

3 files changed

+194
-189
lines changed

3 files changed

+194
-189
lines changed

src/lib/custom.d.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
interface Window {
2+
geodataWorker: Worker;
3+
geodataWorkerPromises: {
4+
[url: string]: {
5+
resolve: (value: any) => void;
6+
reject: (reason?: any) => void;
7+
}
8+
};
9+
}

src/lib/geodata.js

Lines changed: 46 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -4,206 +4,63 @@ import pako from 'pako';
44
const basePath = import.meta.env.BASE_URL;
55
// Initialize a LokiJS database
66

7-
const ingestedFiles = [];
8-
const db = new Loki('geodata.db', {
9-
autoload: true,
10-
autosave: false, // Disable autosave for better performance
11-
throttledSaves: false
12-
});
137

14-
// Create a collection for geo data with optimized configuration
15-
const geoCollection = db.addCollection('geodata', {
16-
indices: ['geo2', 'category'],
17-
adaptiveBinaryIndices: false, // Disable adaptive indices for bulk operations
18-
transactional: false, // Disable transactions for better performance
19-
clone: false, // Disable object cloning for better performance
20-
disableMeta: true // Disable meta properties for better performance
21-
});
22-
23-
const tinyGeoCollection = db.addCollection('tinygeodata', {
24-
indices: ['geo2', 'category'],
25-
adaptiveBinaryIndices: false,
26-
transactional: false,
27-
clone: false,
28-
disableMeta: true
29-
});
30-
31-
/**
32-
* @typedef {Object} GeoDocument
33-
* @property {string} geo2 - Geohash
34-
* @property {string} category - Category
35-
* @property {number} lat - Latitude
36-
* @property {number} lon - Longitude
37-
* @property {string} [geohash] - Original geohash
38-
* @property {number} [population] - Population
39-
*/
40-
41-
/**
42-
* Add latitude and longitude to rows based on geohash - optimized version using ngeohash
43-
* @param {Array} rows - Array of data objects
44-
*/
45-
function addLatLonToRows(rows) {
46-
// Process all rows in a single loop for better performance
47-
for (let i = 0; i < rows.length; i++) {
48-
const row = rows[i];
49-
if (row.geohash) {
50-
// Use ngeohash decode which is optimized for performance
51-
const latLon = ngeohash.decode(row.geohash);
52-
row.lat = latLon.latitude;
53-
row.lon = latLon.longitude;
54-
row.geo2 = row.geohash.substring(0, 2);
55-
}
56-
}
57-
}
58-
59-
function addIdstoRows(rows) {
60-
for (let i = 0; i < rows.length; i++) {
61-
rows[i].id = `${rows[i].geohash}-${rows[i].page_title}`;
62-
}
63-
}
648

659
/**
66-
* Loads a CSV.gz file from a specified URL and adds it to LokiJS collection
67-
* @param {string} url - URL of the CSV.gz file to load
68-
* @returns {Promise<Object>} LokiJS collection containing the loaded data
10+
* Get geo entries within bounds - optimized version
11+
* @param {Object} bounds - Object with minLat, maxLat, minLon, maxLon
12+
* @returns {Promise<Array>} Array of geo entries within bounds
6913
*/
70-
export async function loadCsvGzFile(url) {
71-
try {
72-
// Make sure URL starts with correct path
73-
const fullUrl = url.startsWith('/') ? url : `/${url}`;
74-
75-
// Fetch the gzipped CSV file
76-
const response = await fetch(fullUrl);
77-
78-
if (!response.ok) {
79-
throw new Error(`Failed to download file from ${fullUrl}: ${response.status} ${response.statusText}`);
80-
}
81-
82-
// Parse CSV directly from response text
83-
const buffer = await response.arrayBuffer();
84-
try {
85-
const decompressed = pako.inflate(new Uint8Array(buffer), { to: 'string' });
86-
return parseCsv(decompressed);
87-
} catch (err) {
88-
// If decompression fails, treat as plain text
89-
const text = new TextDecoder().decode(buffer);
90-
return parseCsv(text);
91-
}
92-
} catch (error) {
93-
console.error(`Error loading data from ${url}:`, error);
94-
return []; // Return empty array instead of throwing to make app more resilient
14+
export async function getGeoEntriesInBounds({minLat, maxLat, minLon, maxLon}) {
15+
// Make sure worker is initialized
16+
if (!window.geodataWorker) {
17+
initWorker();
9518
}
19+
20+
// Create a unique request ID
21+
const requestId = `query_${Date.now()}_${Math.random()}`;
22+
23+
// Create a promise that will be resolved when the worker returns results
24+
const queryPromise = new Promise((resolve, reject) => {
25+
window.geodataWorkerPromises[requestId] = { resolve, reject };
26+
});
27+
28+
// Send query to worker
29+
window.geodataWorker.postMessage({
30+
type: 'queryBounds',
31+
requestId,
32+
bounds: { minLat, maxLat, minLon, maxLon },
33+
basePath: import.meta.env.BASE_URL
34+
});
35+
36+
// Wait for worker to return results
37+
return await queryPromise;
9638
}
9739

98-
/**
99-
* Parse CSV text into an array of objects - optimized for speed
100-
* @param {string} csvText - The CSV text to parse
101-
* @returns {Array} - Array of objects representing CSV rows
102-
*/
103-
function parseCsv(csvText) {
104-
try {
105-
const startTime = performance.now();
106-
107-
if (!csvText || csvText.trim() === '') {
108-
console.error("Empty CSV text provided to parser");
109-
return [];
110-
}
111-
112-
const lines = csvText.trim().split('\n');
40+
// Initialize the worker once
41+
function initWorker() {
42+
window.geodataWorker = new Worker(new URL('./geodataWorker.js', import.meta.url), { type: 'module' });
43+
window.geodataWorkerPromises = {};
44+
45+
window.geodataWorker.addEventListener('message', (event) => {
46+
const { type, url, results, error, requestId } = event.data;
11347

114-
const headers = lines[0].split('\t').map(h => h.trim());
115-
if (headers.length < 5) {
116-
throw new Error("Invalid CSV headers");
48+
if (type === 'fileProcessed' && window.geodataWorkerPromises[url]) {
49+
window.geodataWorkerPromises[url].resolve(results);
50+
delete window.geodataWorkerPromises[url];
51+
}
52+
else if (type === 'queryResults' && window.geodataWorkerPromises[requestId]) {
53+
window.geodataWorkerPromises[requestId].resolve(results);
54+
delete window.geodataWorkerPromises[requestId];
11755
}
118-
119-
// Pre-allocate array for better performance
120-
const rows = new Array(lines.length - 1);
121-
122-
// Process all lines in a single loop - simpler and more efficient
123-
const numLines = lines.length;
124-
const headerLength = headers.length;
125-
126-
for (let i = 1; i < numLines; i++) {
127-
const values = lines[i].split('\t').map(v => v.trim());
128-
const row = {};
129-
130-
for (let k = 0; k < headerLength; k++) {
131-
row[headers[k]] = values[k];
56+
else if (type === 'error') {
57+
const promiseKey = url || requestId;
58+
if (window.geodataWorkerPromises[promiseKey]) {
59+
window.geodataWorkerPromises[promiseKey].reject(new Error(error));
60+
delete window.geodataWorkerPromises[promiseKey];
13261
}
133-
134-
rows[i-1] = row;
13562
}
136-
137-
return rows;
138-
} catch (error) {
139-
console.error("Error parsing CSV:", error);
140-
return [];
141-
}
142-
}
143-
144-
async function downloadMissingData(urls) {
145-
const needDownload = urls.filter(url => !ingestedFiles.includes(url));
146-
if (needDownload.length > 0) {
147-
const loadResults = await Promise.all(needDownload.map(async (url) => {
148-
console.time('load file');
149-
const rows = await loadCsvGzFile(url);
150-
console.timeEnd('load file');
151-
ingestedFiles.push(url);
152-
console.time('add_latlon');
153-
addLatLonToRows(rows);
154-
console.timeEnd('add_latlon');
155-
console.time('add_ids');
156-
addIdstoRows(rows);
157-
console.timeEnd('add_ids');
158-
return rows;
159-
}));
160-
return loadResults.flat();
161-
}
162-
return [];
163-
}
164-
165-
166-
function queryGeoTable(table, minLat, maxLat, minLon, maxLon) {
167-
const geohashes_2 = ngeohash.bboxes(minLat, minLon, maxLat, maxLon, 2);
168-
console.log("geohashes_2", geohashes_2);
169-
// Use LokiJS chaining to filter by geo2 and lat first
170-
// const data = table.find()
171-
// console.log("data", data.length);
172-
// console.log("geo2 only", data.filter(doc => geohashes_2.includes(doc.geo2)).length);
173-
return table.chain()
174-
.find({ geo2: { '$in': geohashes_2 }})
175-
.where(obj => obj.lat >= minLat && obj.lat <= maxLat && obj.lon >= minLon && obj.lon <= maxLon)
176-
.data();
177-
}
178-
179-
/**
180-
* Get geo entries within bounds - optimized version
181-
* @param {Object} bounds - Object with minLat, maxLat, minLon, maxLon
182-
* @returns {Array} Array of geo entries within bounds
183-
*/
184-
export async function getGeoEntriesInBounds({minLat, maxLat, minLon, maxLon}) {
185-
// Handle possible null/undefined bounds
186-
const geohashes_1 = ngeohash.bboxes(minLat, minLon, maxLat, maxLon, 1);
187-
let fileUrls = [];
188-
let table = null;
189-
if (geohashes_1.length > 3) {
190-
table = tinyGeoCollection;
191-
fileUrls = [`${basePath}geodata/geo2_unique.csv.gz`];
192-
} else {
193-
table = geoCollection;
194-
fileUrls = geohashes_1.map(g => `${basePath}geodata/${g}.csv.gz`);
195-
}
196-
197-
const rows = await downloadMissingData(fileUrls);
198-
if (rows.length > 0) {
199-
console.time('insert');
200-
table.insert(rows);
201-
console.timeEnd('insert');
202-
}
203-
console.time('query');
204-
const results = queryGeoTable(table, minLat, maxLat, minLon, maxLon);
205-
console.timeEnd('query');
206-
return results;
63+
});
20764
}
20865

20966
/**

0 commit comments

Comments
 (0)