@@ -4,206 +4,63 @@ import pako from 'pako';
44const basePath = import . meta. env . BASE_URL ;
55// Initialize a LokiJS database
66
7- const ingestedFiles = [ ] ;
8- const db = new Loki ( 'geodata.db' , {
9- autoload : true ,
10- autosave : false , // Disable autosave for better performance
11- throttledSaves : false
12- } ) ;
137
14- // Create a collection for geo data with optimized configuration
15- const geoCollection = db . addCollection ( 'geodata' , {
16- indices : [ 'geo2' , 'category' ] ,
17- adaptiveBinaryIndices : false , // Disable adaptive indices for bulk operations
18- transactional : false , // Disable transactions for better performance
19- clone : false , // Disable object cloning for better performance
20- disableMeta : true // Disable meta properties for better performance
21- } ) ;
22-
23- const tinyGeoCollection = db . addCollection ( 'tinygeodata' , {
24- indices : [ 'geo2' , 'category' ] ,
25- adaptiveBinaryIndices : false ,
26- transactional : false ,
27- clone : false ,
28- disableMeta : true
29- } ) ;
30-
31- /**
32- * @typedef {Object } GeoDocument
33- * @property {string } geo2 - Geohash
34- * @property {string } category - Category
35- * @property {number } lat - Latitude
36- * @property {number } lon - Longitude
37- * @property {string } [geohash] - Original geohash
38- * @property {number } [population] - Population
39- */
40-
41- /**
42- * Add latitude and longitude to rows based on geohash - optimized version using ngeohash
43- * @param {Array } rows - Array of data objects
44- */
45- function addLatLonToRows ( rows ) {
46- // Process all rows in a single loop for better performance
47- for ( let i = 0 ; i < rows . length ; i ++ ) {
48- const row = rows [ i ] ;
49- if ( row . geohash ) {
50- // Use ngeohash decode which is optimized for performance
51- const latLon = ngeohash . decode ( row . geohash ) ;
52- row . lat = latLon . latitude ;
53- row . lon = latLon . longitude ;
54- row . geo2 = row . geohash . substring ( 0 , 2 ) ;
55- }
56- }
57- }
58-
59- function addIdstoRows ( rows ) {
60- for ( let i = 0 ; i < rows . length ; i ++ ) {
61- rows [ i ] . id = `${ rows [ i ] . geohash } -${ rows [ i ] . page_title } ` ;
62- }
63- }
648
659/**
66- * Loads a CSV.gz file from a specified URL and adds it to LokiJS collection
67- * @param {string } url - URL of the CSV.gz file to load
68- * @returns {Promise<Object > } LokiJS collection containing the loaded data
10+ * Get geo entries within bounds - optimized version
11+ * @param {Object } bounds - Object with minLat, maxLat, minLon, maxLon
12+ * @returns {Promise<Array > } Array of geo entries within bounds
6913 */
70- export async function loadCsvGzFile ( url ) {
71- try {
72- // Make sure URL starts with correct path
73- const fullUrl = url . startsWith ( '/' ) ? url : `/${ url } ` ;
74-
75- // Fetch the gzipped CSV file
76- const response = await fetch ( fullUrl ) ;
77-
78- if ( ! response . ok ) {
79- throw new Error ( `Failed to download file from ${ fullUrl } : ${ response . status } ${ response . statusText } ` ) ;
80- }
81-
82- // Parse CSV directly from response text
83- const buffer = await response . arrayBuffer ( ) ;
84- try {
85- const decompressed = pako . inflate ( new Uint8Array ( buffer ) , { to : 'string' } ) ;
86- return parseCsv ( decompressed ) ;
87- } catch ( err ) {
88- // If decompression fails, treat as plain text
89- const text = new TextDecoder ( ) . decode ( buffer ) ;
90- return parseCsv ( text ) ;
91- }
92- } catch ( error ) {
93- console . error ( `Error loading data from ${ url } :` , error ) ;
94- return [ ] ; // Return empty array instead of throwing to make app more resilient
14+ export async function getGeoEntriesInBounds ( { minLat, maxLat, minLon, maxLon} ) {
15+ // Make sure worker is initialized
16+ if ( ! window . geodataWorker ) {
17+ initWorker ( ) ;
9518 }
19+
20+ // Create a unique request ID
21+ const requestId = `query_${ Date . now ( ) } _${ Math . random ( ) } ` ;
22+
23+ // Create a promise that will be resolved when the worker returns results
24+ const queryPromise = new Promise ( ( resolve , reject ) => {
25+ window . geodataWorkerPromises [ requestId ] = { resolve, reject } ;
26+ } ) ;
27+
28+ // Send query to worker
29+ window . geodataWorker . postMessage ( {
30+ type : 'queryBounds' ,
31+ requestId,
32+ bounds : { minLat, maxLat, minLon, maxLon } ,
33+ basePath : import . meta. env . BASE_URL
34+ } ) ;
35+
36+ // Wait for worker to return results
37+ return await queryPromise ;
9638}
9739
98- /**
99- * Parse CSV text into an array of objects - optimized for speed
100- * @param {string } csvText - The CSV text to parse
101- * @returns {Array } - Array of objects representing CSV rows
102- */
103- function parseCsv ( csvText ) {
104- try {
105- const startTime = performance . now ( ) ;
106-
107- if ( ! csvText || csvText . trim ( ) === '' ) {
108- console . error ( "Empty CSV text provided to parser" ) ;
109- return [ ] ;
110- }
111-
112- const lines = csvText . trim ( ) . split ( '\n' ) ;
40+ // Initialize the worker once
41+ function initWorker ( ) {
42+ window . geodataWorker = new Worker ( new URL ( './geodataWorker.js' , import . meta. url ) , { type : 'module' } ) ;
43+ window . geodataWorkerPromises = { } ;
44+
45+ window . geodataWorker . addEventListener ( 'message' , ( event ) => {
46+ const { type, url, results, error, requestId } = event . data ;
11347
114- const headers = lines [ 0 ] . split ( '\t' ) . map ( h => h . trim ( ) ) ;
115- if ( headers . length < 5 ) {
116- throw new Error ( "Invalid CSV headers" ) ;
48+ if ( type === 'fileProcessed' && window . geodataWorkerPromises [ url ] ) {
49+ window . geodataWorkerPromises [ url ] . resolve ( results ) ;
50+ delete window . geodataWorkerPromises [ url ] ;
51+ }
52+ else if ( type === 'queryResults' && window . geodataWorkerPromises [ requestId ] ) {
53+ window . geodataWorkerPromises [ requestId ] . resolve ( results ) ;
54+ delete window . geodataWorkerPromises [ requestId ] ;
11755 }
118-
119- // Pre-allocate array for better performance
120- const rows = new Array ( lines . length - 1 ) ;
121-
122- // Process all lines in a single loop - simpler and more efficient
123- const numLines = lines . length ;
124- const headerLength = headers . length ;
125-
126- for ( let i = 1 ; i < numLines ; i ++ ) {
127- const values = lines [ i ] . split ( '\t' ) . map ( v => v . trim ( ) ) ;
128- const row = { } ;
129-
130- for ( let k = 0 ; k < headerLength ; k ++ ) {
131- row [ headers [ k ] ] = values [ k ] ;
56+ else if ( type === 'error' ) {
57+ const promiseKey = url || requestId ;
58+ if ( window . geodataWorkerPromises [ promiseKey ] ) {
59+ window . geodataWorkerPromises [ promiseKey ] . reject ( new Error ( error ) ) ;
60+ delete window . geodataWorkerPromises [ promiseKey ] ;
13261 }
133-
134- rows [ i - 1 ] = row ;
13562 }
136-
137- return rows ;
138- } catch ( error ) {
139- console . error ( "Error parsing CSV:" , error ) ;
140- return [ ] ;
141- }
142- }
143-
144- async function downloadMissingData ( urls ) {
145- const needDownload = urls . filter ( url => ! ingestedFiles . includes ( url ) ) ;
146- if ( needDownload . length > 0 ) {
147- const loadResults = await Promise . all ( needDownload . map ( async ( url ) => {
148- console . time ( 'load file' ) ;
149- const rows = await loadCsvGzFile ( url ) ;
150- console . timeEnd ( 'load file' ) ;
151- ingestedFiles . push ( url ) ;
152- console . time ( 'add_latlon' ) ;
153- addLatLonToRows ( rows ) ;
154- console . timeEnd ( 'add_latlon' ) ;
155- console . time ( 'add_ids' ) ;
156- addIdstoRows ( rows ) ;
157- console . timeEnd ( 'add_ids' ) ;
158- return rows ;
159- } ) ) ;
160- return loadResults . flat ( ) ;
161- }
162- return [ ] ;
163- }
164-
165-
166- function queryGeoTable ( table , minLat , maxLat , minLon , maxLon ) {
167- const geohashes_2 = ngeohash . bboxes ( minLat , minLon , maxLat , maxLon , 2 ) ;
168- console . log ( "geohashes_2" , geohashes_2 ) ;
169- // Use LokiJS chaining to filter by geo2 and lat first
170- // const data = table.find()
171- // console.log("data", data.length);
172- // console.log("geo2 only", data.filter(doc => geohashes_2.includes(doc.geo2)).length);
173- return table . chain ( )
174- . find ( { geo2 : { '$in' : geohashes_2 } } )
175- . where ( obj => obj . lat >= minLat && obj . lat <= maxLat && obj . lon >= minLon && obj . lon <= maxLon )
176- . data ( ) ;
177- }
178-
179- /**
180- * Get geo entries within bounds - optimized version
181- * @param {Object } bounds - Object with minLat, maxLat, minLon, maxLon
182- * @returns {Array } Array of geo entries within bounds
183- */
184- export async function getGeoEntriesInBounds ( { minLat, maxLat, minLon, maxLon} ) {
185- // Handle possible null/undefined bounds
186- const geohashes_1 = ngeohash . bboxes ( minLat , minLon , maxLat , maxLon , 1 ) ;
187- let fileUrls = [ ] ;
188- let table = null ;
189- if ( geohashes_1 . length > 3 ) {
190- table = tinyGeoCollection ;
191- fileUrls = [ `${ basePath } geodata/geo2_unique.csv.gz` ] ;
192- } else {
193- table = geoCollection ;
194- fileUrls = geohashes_1 . map ( g => `${ basePath } geodata/${ g } .csv.gz` ) ;
195- }
196-
197- const rows = await downloadMissingData ( fileUrls ) ;
198- if ( rows . length > 0 ) {
199- console . time ( 'insert' ) ;
200- table . insert ( rows ) ;
201- console . timeEnd ( 'insert' ) ;
202- }
203- console . time ( 'query' ) ;
204- const results = queryGeoTable ( table , minLat , maxLat , minLon , maxLon ) ;
205- console . timeEnd ( 'query' ) ;
206- return results ;
63+ } ) ;
20764}
20865
20966/**
0 commit comments