Skip to content

Commit 80d3602

Browse files
committed
feat: handle new formats for coords columns from csv-detective
1 parent 97b8a2f commit 80d3602

File tree

1 file changed

+25
-1
lines changed

1 file changed

+25
-1
lines changed

udata_hydra/analysis/geojson.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ async def csv_to_geojson_and_pmtiles(
150150
) -> tuple[str, int, str, int] | None:
151151
def cast_latlon(latlon: str) -> list[float, float]:
152152
# we can safely do this as the detection was successful
153-
lat, lon = latlon.replace(" ", "").split(",")
153+
# removing potential blank and brackets
154+
lat, lon = latlon.replace(" ", "").replace("[", "").replace("]", "").split(",")
154155
# using the geojson standard: longitude before latitude
155156
return [float(lon), float(lat)]
156157

@@ -178,6 +179,9 @@ def prevent_nan(value):
178179
if "latlon" in detection["format"]:
179180
geo["latlon"] = column
180181
break
182+
if "lonlat" in detection["format"]:
183+
geo["lonlat"] = column
184+
break
181185
if "latitude" in detection["format"]:
182186
geo["lat"] = column
183187
if "longitude" in detection["format"]:
@@ -187,6 +191,8 @@ def prevent_nan(value):
187191
geo = {"geometry": geo["geometry"]}
188192
if "latlon" in geo:
189193
geo = {"latlon": geo["latlon"]}
194+
if "lonlat" in geo:
195+
geo = {"lonlat": geo["lonlat"]}
190196
if not geo or (("lat" in geo and "lon" not in geo) or ("lon" in geo and "lat" not in geo)):
191197
log.debug("No geographical columns found, skipping")
192198
return None
@@ -224,6 +230,24 @@ def prevent_nan(value):
224230
},
225231
}
226232
)
233+
elif "lonlat" in geo:
234+
# ending up here means we either have the exact lon,lat format, or NaN
235+
# skipping row if NaN
236+
if pd.isna(row[geo["lonlat"]]):
237+
continue
238+
template["features"].append(
239+
{
240+
"type": "Feature",
241+
"geometry": {
242+
"type": "Point",
243+
# inverting lon and lat to match the standard
244+
"coordinates": cast_latlon(row[geo["lonlat"]])[::-1],
245+
},
246+
"properties": {
247+
col: prevent_nan(row[col]) for col in df.columns if col != geo["lonlat"]
248+
},
249+
}
250+
)
227251
else:
228252
# skipping row if lat or lon is NaN
229253
if any(pd.isna(coord) for coord in (row[geo["lon"]], row[geo["lat"]])):

0 commit comments

Comments
 (0)