@@ -107,7 +107,6 @@ function parseCsv(csvText) {
107107 const lines = csvText . trim ( ) . split ( '\n' ) ;
108108
109109 const headers = lines [ 0 ] . split ( '\t' ) . map ( h => h . trim ( ) ) ;
110- console . log ( "headers" , headers ) ;
111110 if ( headers . length < 5 ) {
112111 throw new Error ( "Invalid CSV headers" ) ;
113112 }
@@ -137,21 +136,31 @@ function parseCsv(csvText) {
137136 }
138137}
139138
140- /**
141- * Adds data from a CSV.gz file to the existing geodata collection
142- * @param {string } url - URL of the CSV.gz file to load
143- * @returns {Promise<Object> } Updated LokiJS collection
144- */
145- export async function addDataFromUrl ( url ) {
146- // Load the new data
147- const newRows = await loadCsvGzFile ( url ) ;
148-
149- // Add the new rows to the collection
150- geoCollection . insert ( newRows ) ;
151-
152- return newRows ;
139+ async function downloadMissingData ( urls ) {
140+ const needDownload = urls . filter ( url => ! ingestedFiles . includes ( url ) ) ;
141+ console . log ( "needDownload" , needDownload ) ;
142+ if ( needDownload . length > 0 ) {
143+ const loadResults = await Promise . all ( needDownload . map ( async ( url ) => {
144+ const rows = await loadCsvGzFile ( url ) ;
145+ ingestedFiles . push ( url ) ;
146+ return rows ;
147+ } ) ) ;
148+ return loadResults . flat ( ) ;
149+ }
150+ return [ ] ;
153151}
154152
153+
154+ function queryGeoTable ( table , minLat , maxLat , minLon , maxLon ) {
155+ const geohashes_2 = ngeohash . bboxes ( minLat , minLon , maxLat , maxLon , 2 ) ;
156+ return table . find ( { geo2 : { '$in' : geohashes_2 } } ) . filter ( doc =>
157+ doc . lat > minLat &&
158+ doc . lat < maxLat &&
159+ doc . lon > minLon &&
160+ doc . lon < maxLon
161+ ) ;
162+ }
163+
155164/**
156165 * Get geo entries within bounds - optimized version
157166 * @param {Object } bounds - Object with minLat, maxLat, minLon, maxLon
@@ -160,79 +169,29 @@ export async function addDataFromUrl(url) {
160169export async function getGeoEntriesInBounds ( { minLat, maxLat, minLon, maxLon} ) {
161170 // Handle possible null/undefined bounds
162171 const geohashes_1 = ngeohash . bboxes ( minLat , minLon , maxLat , maxLon , 1 ) ;
163-
164- if ( ! Array . isArray ( geohashes_1 ) || geohashes_1 . length === 0 ) {
165- console . warn ( "No geohashes found for the current bounds" ) ;
166- return [ ] ;
167- }
168-
172+ let fileUrls = [ ] ;
173+ let table = null ;
169174 if ( geohashes_1 . length > 3 ) {
170- if ( ! ingestedFiles . includes ( "geodata/geo3_unique.csv.gz" ) ) {
171- // Use consistent path format
172- const fileUrl = `${ basePath } geodata/geo3_unique.csv.gz` ;
173- try {
174- const rows = await loadCsvGzFile ( fileUrl ) ;
175- addLatLonToRows ( rows ) ;
176- tinyGeoCollection . insert ( rows ) ;
177- ingestedFiles . push ( fileUrl ) ;
178- } catch ( error ) {
179- console . error ( "Failed to load tiny geodata:" , error ) ;
180- return [ ] ;
181- }
182- }
183-
184- // Type assertion for the filter method
185- /** @type {GeoDocument[] } */
186- const results = tinyGeoCollection . find ( ) ;
187- return results . filter (
188- doc => doc . lat > minLat &&
189- doc . lat < maxLat &&
190- doc . lon > minLon &&
191- doc . lon < maxLon
192- ) ;
175+ table = tinyGeoCollection ;
176+ fileUrls = [ `${ basePath } geodata/geo3_unique.csv.gz` ] ;
193177 } else {
194- // Download and ingest any new geohash files that haven't been processed yet
195- // Use consistent path format without leading dot or slash
196- const needDownload = geohashes_1 . filter ( g => ! ingestedFiles . includes ( `geodata/${ g } .csv.gz` ) ) ;
197-
198- if ( needDownload . length > 0 ) {
199- // needDownload all rows first before doing a single bulk insert
200-
201- // Load all files in parallel but collect results before inserting
202- const loadResults = await Promise . all ( needDownload . map ( async ( g ) => {
203- const url = `${ basePath } geodata/${ g } .csv.gz` ;
204- try {
205- const rows = await loadCsvGzFile ( url ) ;
206- ingestedFiles . push ( url ) ;
207- return rows ;
208- } catch ( error ) {
209- console . warn ( `Failed to load data for geohash ${ g } :` , error ) ;
210- return [ ] ;
211- }
212- } ) ) ;
213- const allRows = loadResults . flat ( ) ;
214-
215- console . time ( 'add_latlon' ) ;
216- addLatLonToRows ( allRows ) ;
217- console . timeEnd ( 'add_latlon' ) ;
218-
219- // Do a single bulk insert with all rows
220- if ( allRows . length > 0 ) {
221- geoCollection . insert ( allRows ) ;
222- }
223- }
224-
225- const geohashes_2 = ngeohash . bboxes ( minLat , minLon , maxLat , maxLon , 2 ) ;
226-
227- const results = geoCollection . find ( { geo2 : { '$in' : geohashes_2 } } ) ;
228-
229- return results . filter ( doc =>
230- doc . lat > minLat &&
231- doc . lat < maxLat &&
232- doc . lon > minLon &&
233- doc . lon < maxLon
234- ) ;
178+ table = geoCollection ;
179+ fileUrls = geohashes_1 . map ( g => `${ basePath } geodata/${ g } .csv.gz` ) ;
180+ }
181+
182+ const rows = await downloadMissingData ( fileUrls ) ;
183+ if ( rows . length > 0 ) {
184+ console . time ( 'add_latlon' ) ;
185+ addLatLonToRows ( rows ) ;
186+ console . timeEnd ( 'add_latlon' ) ;
187+ console . time ( 'insert' ) ;
188+ table . insert ( rows ) ;
189+ console . timeEnd ( 'insert' ) ;
235190 }
191+ console . time ( 'query' ) ;
192+ const results = queryGeoTable ( table , minLat , maxLat , minLon , maxLon ) ;
193+ console . timeEnd ( 'query' ) ;
194+ return results ;
236195}
237196
238197/**
0 commit comments