Skip to content

Commit 71dc5fb

Browse files
committed
Enhance logging when stops missing latitude and longitude
1 parent 89f165c commit 71dc5fb

File tree

7 files changed

+162
-12
lines changed

7 files changed

+162
-12
lines changed

functions-python/helpers/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ google-cloud-bigquery
2828

2929
# Additional package
3030
pycountry
31-
shapely
31+
shapely
32+
pandas

functions-python/helpers/tests/test_transform.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
from transform import to_boolean, get_nested_value
1+
import unittest
2+
3+
import pandas as pd
4+
5+
from transform import (
6+
to_boolean,
7+
get_nested_value,
8+
to_float,
9+
get_safe_value,
10+
get_safe_float,
11+
)
212

313

414
def test_to_boolean():
@@ -60,3 +70,87 @@ def test_get_nested_value():
6070
# Test case 9: Non-dictionary data
6171
assert get_nested_value("not a dict", ["a", "b", "c"]) is None
6272
assert get_nested_value("not a dict", ["a", "b", "c"], []) == []
73+
74+
75+
class TestToFloat(unittest.TestCase):
76+
def test_valid_float(self):
77+
self.assertEqual(to_float("3.14"), 3.14)
78+
self.assertEqual(to_float(2.5), 2.5)
79+
self.assertEqual(to_float("0"), 0.0)
80+
self.assertEqual(to_float(0), 0.0)
81+
82+
def test_invalid_float(self):
83+
self.assertIsNone(to_float("abc"))
84+
self.assertIsNone(to_float(None))
85+
self.assertIsNone(to_float(""))
86+
87+
def test_default_value(self):
88+
self.assertEqual(to_float("abc", default_value=1.23), 1.23)
89+
self.assertEqual(to_float(None, default_value=4.56), 4.56)
90+
self.assertEqual(to_float("", default_value=7.89), 7.89)
91+
92+
93+
class TestGetSafeValue(unittest.TestCase):
94+
def test_valid_value(self):
95+
row = {"name": " Alice "}
96+
self.assertEqual(get_safe_value(row, "name"), "Alice")
97+
98+
def test_missing_column(self):
99+
row = {"age": 30}
100+
self.assertIsNone(get_safe_value(row, "name"))
101+
102+
def test_empty_string(self):
103+
row = {"name": " "}
104+
self.assertIsNone(get_safe_value(row, "name"))
105+
106+
def test_nan_value(self):
107+
row = {"name": pd.NA}
108+
self.assertIsNone(get_safe_value(row, "name"))
109+
row = {"name": float("nan")}
110+
self.assertIsNone(get_safe_value(row, "name"))
111+
112+
def test_default_value(self):
113+
row = {"name": ""}
114+
self.assertEqual(
115+
get_safe_value(row, "name", default_value="default"), "default"
116+
)
117+
118+
119+
class TestGetSafeFloat(unittest.TestCase):
120+
def test_valid_float(self):
121+
row = {"value": "3.14"}
122+
self.assertEqual(get_safe_float(row, "value"), 3.14)
123+
row = {"value": 2.5}
124+
self.assertEqual(get_safe_float(row, "value"), 2.5)
125+
row = {"value": "0"}
126+
self.assertEqual(get_safe_float(row, "value"), 0.0)
127+
row = {"value": 0}
128+
self.assertEqual(get_safe_float(row, "value"), 0.0)
129+
130+
def test_missing_column(self):
131+
row = {"other": 1.23}
132+
self.assertIsNone(get_safe_float(row, "value"))
133+
134+
def test_empty_string(self):
135+
row = {"value": " "}
136+
self.assertIsNone(get_safe_float(row, "value"))
137+
138+
def test_nan_value(self):
139+
row = {"value": pd.NA}
140+
self.assertIsNone(get_safe_float(row, "value"))
141+
row = {"value": float("nan")}
142+
self.assertIsNone(get_safe_float(row, "value"))
143+
144+
def test_invalid_float(self):
145+
row = {"value": "abc"}
146+
self.assertIsNone(get_safe_float(row, "value"))
147+
row = {"value": None}
148+
self.assertIsNone(get_safe_float(row, "value"))
149+
150+
def test_default_value(self):
151+
row = {"value": ""}
152+
self.assertEqual(get_safe_float(row, "value", default_value=1.23), 1.23)
153+
row = {"value": "abc"}
154+
self.assertEqual(get_safe_float(row, "value", default_value=4.56), 4.56)
155+
row = {"value": None}
156+
self.assertEqual(get_safe_float(row, "value", default_value=7.89), 7.89)

functions-python/helpers/transform.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ def get_safe_value(row, column_name, default_value=None) -> Optional[str]:
9696
import pandas
9797

9898
value = row.get(column_name, None)
99-
if not value or pandas.isna(value) or f"{value}".strip() == "":
99+
if (
100+
value is None
101+
or pandas.isna(value)
102+
or (isinstance(value, str) and value.strip() == "")
103+
):
100104
return default_value
101105
return f"{value}".strip()
102106

@@ -105,12 +109,8 @@ def get_safe_float(row, column_name, default_value=None) -> Optional[float]:
105109
"""
106110
Get a safe float value from the row. If the value is missing or cannot be converted to float,
107111
"""
108-
import pandas
109-
110-
value = row.get(column_name, None)
111-
if not value or pandas.isna(value) or f"{value}".strip() == "":
112-
return default_value
112+
safe_value = get_safe_value(row, column_name)
113113
try:
114-
return float(value)
114+
return float(safe_value)
115115
except (ValueError, TypeError):
116116
return default_value

functions-python/pmtiles_builder/src/csv_cache.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import csv
1717
import os
1818

19+
from gtfs import stop_txt_is_lat_log_required
1920
from shared.helpers.logger import get_logger
2021
from shared.helpers.transform import get_safe_value, get_safe_float
2122

@@ -134,8 +135,18 @@ def get_coordinates_for_stop(self, stop_id) -> tuple[float, float] | None:
134135
row_stop_id = get_safe_value(s, "stop_id")
135136
row_stop_lon = get_safe_float(s, "stop_lon")
136137
row_stop_lat = get_safe_float(s, "stop_lat")
137-
if row_stop_id is None or row_stop_lon is None or row_stop_lat is None:
138-
self.logger.warning("Invalid stop data: %s", s)
138+
if row_stop_id is None:
139+
self.logger.warning("Missing stop id: %s", s)
140+
continue
141+
if row_stop_lon is None or row_stop_lat is None:
142+
if stop_txt_is_lat_log_required(s):
143+
self.logger.warning(
144+
"Missing stop latitude and longitude : %s", s
145+
)
146+
else:
147+
self.logger.debug(
148+
"Missing optional stop latitude and longitude : %s", s
149+
)
139150
continue
140151
self.stop_to_coordinates[row_stop_id] = (row_stop_lon, row_stop_lat)
141152
return self.stop_to_coordinates.get(stop_id, None)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from shared.helpers.transform import get_safe_value
2+
3+
# TODO: Move this file to a shared folder
4+
5+
6+
def stop_txt_is_lat_log_required(stop_row):
7+
"""
8+
Conditionally Required:
9+
- Required for locations which are stops (location_type=0), stations (location_type=1)
10+
or entrances/exits (location_type=2).
11+
- Optional for locations which are generic nodes (location_type=3) or boarding areas (location_type=4).
12+
13+
Args:
14+
row (dict): The data row to check.
15+
16+
Returns:
17+
bool: True if both latitude and longitude is required, False otherwise.
18+
"""
19+
location_type = get_safe_value(stop_row, "location_type", 0)
20+
return location_type in ("0", "1", "2")

functions-python/pmtiles_builder/src/main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,11 @@ def _create_routes_geojson(self):
403403
if shape_id:
404404
coordinates = self._get_shape_points(shape_id, shapes_index)
405405
if not coordinates:
406-
# We don't have the coordinates for the shape, fallback on using stops.
406+
logging.info(
407+
"Could not find coordinates for the shape: %s, "
408+
"fallback on using stops.",
409+
shape_id,
410+
)
407411
trip_id = self.csv_cache.get_trip_from_route(route_id)
408412

409413
if trip_id:
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import unittest
2+
3+
from gtfs import stop_txt_is_lat_log_required
4+
5+
6+
class TestStopTxtIsLatLogRequired(unittest.TestCase):
7+
def test_required_cases(self):
8+
self.assertTrue(stop_txt_is_lat_log_required({"location_type": "0"}))
9+
self.assertTrue(stop_txt_is_lat_log_required({"location_type": 0}))
10+
self.assertTrue(stop_txt_is_lat_log_required({"location_type": "2"}))
11+
12+
def test_not_required_cases(self):
13+
self.assertFalse(stop_txt_is_lat_log_required({"location_type": "3"}))
14+
self.assertFalse(stop_txt_is_lat_log_required({"location_type": 3}))
15+
self.assertFalse(stop_txt_is_lat_log_required({"location_type": "4"}))
16+
self.assertFalse(stop_txt_is_lat_log_required({"location_type": 4}))
17+
18+
def test_missing_location_type(self):
19+
self.assertFalse(stop_txt_is_lat_log_required({}))
20+
self.assertFalse(stop_txt_is_lat_log_required({"other": "value"}))

0 commit comments

Comments
 (0)