Skip to content

Commit ee2303d

Browse files
authored
fix: increase batch to 500 and delete visualization join in the CSV export function (#1379)
1 parent f0a52ad commit ee2303d

File tree

5 files changed

+25
-24
lines changed

5 files changed

+25
-24
lines changed

api/src/shared/common/db_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def get_all_gtfs_feeds(
164164
165165
:return: The GTFS feeds in an iterator.
166166
"""
167-
batch_size = os.getenv("BATCH_SIZE", 100)
167+
batch_size = int(os.getenv("BATCH_SIZE", "500"))
168168
batch_query = db_session.query(Gtfsfeed).order_by(Gtfsfeed.stable_id).yield_per(batch_size)
169169
if published_only:
170170
batch_query = batch_query.filter(Gtfsfeed.operational_status == "published")
@@ -182,7 +182,6 @@ def get_all_gtfs_feeds(
182182
contains_eager(Gtfsfeed.gtfsdatasets)
183183
.joinedload(Gtfsdataset.validation_reports)
184184
.joinedload(Validationreport.features),
185-
joinedload(Gtfsfeed.visualization_dataset),
186185
*get_joinedload_options(include_extracted_location_entities=True),
187186
)
188187
)

functions-python/export_csv/src/main.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ def get_gtfs_feed_csv_data(
208208
:return: Dictionary with feed data formatted for CSV output.
209209
"""
210210
joined_features = ""
211-
validated_at = None
212211
bounding_box = None
213212

214213
# First extract the common feed data
@@ -240,18 +239,16 @@ def get_gtfs_feed_csv_data(
240239
if features
241240
else ""
242241
)
243-
if latest_report.validated_at:
244-
validated_at = latest_report.validated_at
245-
if latest_dataset.bounding_box:
246-
shape = to_shape(latest_dataset.bounding_box)
247-
if shape and shape.bounds:
248-
bounding_box = BoundingBox(
249-
minimum_latitude=shape.bounds[1],
250-
maximum_latitude=shape.bounds[3],
251-
minimum_longitude=shape.bounds[0],
252-
maximum_longitude=shape.bounds[2],
253-
extracted_on=validated_at,
254-
)
242+
if feed.bounding_box:
243+
shape = to_shape(feed.bounding_box)
244+
if shape and shape.bounds:
245+
bounding_box = BoundingBox(
246+
minimum_latitude=shape.bounds[1],
247+
maximum_latitude=shape.bounds[3],
248+
minimum_longitude=shape.bounds[0],
249+
maximum_longitude=shape.bounds[2],
250+
extracted_on=feed.bounding_box_dataset.downloaded_at,
251+
)
255252

256253
# Keep the bounding box for that GTFS feed so it can be used in associated real-time feeds, if any
257254
if bounding_box:

functions-python/export_csv/tests/__init__.py

Whitespace-only changes.

functions-python/export_csv/tests/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def populate_database(db_session):
156156
hosted_url=f"https://url_prefix/{feed_stable_id}/dataset-{i}_some_fake_hosted_url",
157157
note=f"dataset-{i} Some fake note",
158158
hash=fake.sha256(),
159-
downloaded_at=datetime.utcnow(),
159+
downloaded_at=datetime(2025, 1, 12),
160160
stable_id=f"dataset-{i}",
161161
)
162162
validation_report = Validationreport(
@@ -175,6 +175,9 @@ def populate_database(db_session):
175175
gtfs_dataset.locations = locations
176176

177177
active_gtfs_feeds[feed_index].gtfsdatasets.append(gtfs_dataset)
178+
db_session.flush()
179+
active_gtfs_feeds[feed_index].bounding_box = gtfs_dataset.bounding_box
180+
active_gtfs_feeds[feed_index].bounding_box_dataset_id = gtfs_dataset.id
178181
active_gtfs_feeds[0].locations = locations
179182
active_gtfs_feeds[1].locations = locations
180183

functions-python/export_csv/tests/test_export_csv_main.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515
#
1616
import io
17+
import unittest
1718

1819
import pandas as pd
1920
import pandas.testing as pdt
@@ -34,13 +35,14 @@
3435
""" # noqa
3536

3637

37-
def test_export_csv():
38-
csv_file_path = "./output.csv"
39-
main.export_csv(csv_file_path)
40-
df_actual = pd.read_csv(csv_file_path)
41-
print(f"Collected data for {len(df_actual)} feeds.")
38+
class TestExportCSV(unittest.TestCase):
39+
def test_export_csv(self):
40+
csv_file_path = "./output.csv"
41+
main.export_csv(csv_file_path)
42+
df_actual = pd.read_csv(csv_file_path)
43+
print(f"Collected data for {len(df_actual)} feeds.")
4244

43-
df_expected = pd.read_csv(io.StringIO(expected_csv))
45+
df_expected = pd.read_csv(io.StringIO(expected_csv))
4446

45-
pdt.assert_frame_equal(df_actual, df_expected)
46-
print("DataFrames are equal.")
47+
pdt.assert_frame_equal(df_actual, df_expected)
48+
print("DataFrames are equal.")

0 commit comments

Comments
 (0)