Skip to content

Commit 9dee1e9

Browse files
authored
feat: added alt name to geopolygons (#1535)
1 parent 10c93d1 commit 9dee1e9

File tree

6 files changed

+291
-0
lines changed

6 files changed

+291
-0
lines changed

functions-python/reverse_geolocation_populate/src/main.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ def fetch_data(admin_level, country_code, location_type, country_name=None):
174174
"name:en": all_tags.get("name:en"),
175175
"name:fr": all_tags.get("name:fr"),
176176
"geometry": row.geometry,
177+
"alt_name": all_tags.get("alt_name"),
178+
"alt_name:en": all_tags.get("alt_name"),
177179
}
178180
)
179181
return data
@@ -203,6 +205,7 @@ def save_to_database(data, db_session=None):
203205
geopolygon.iso_3166_1_code = row["iso3166_1"]
204206
geopolygon.iso_3166_2_code = row["iso3166_2"]
205207
geopolygon.name = row["name:en"] if row["name:en"] else row["name"]
208+
geopolygon.alt_name = row["alt_name"] if row["alt_name"] else row["alt_name:en"]
206209
geopolygon.geometry = WKTElement(row["geometry"], srid=4326)
207210
db_session.commit()
208211

functions-python/reverse_geolocation_populate/tests/test_reverse_geolocation_populate.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def test_save_to_database(self):
172172
"name:en": "Toronto",
173173
"iso3166_1": None,
174174
"iso3166_2": None,
175+
"alt_name": None,
176+
"alt_name:en": None,
175177
},
176178
{
177179
"osm_id": 2,
@@ -181,6 +183,8 @@ def test_save_to_database(self):
181183
"name:en": "Ontario",
182184
"iso3166_1": None,
183185
"iso3166_2": "CA-ON",
186+
"alt_name": None,
187+
"alt_name:en": None,
184188
},
185189
{
186190
"osm_id": 3,
@@ -190,6 +194,8 @@ def test_save_to_database(self):
190194
"name:en": "Canada",
191195
"iso3166_1": "CA",
192196
"iso3166_2": None,
197+
"alt_name": None,
198+
"alt_name:en": None,
193199
},
194200
{
195201
"osm_id": 5,
@@ -199,6 +205,8 @@ def test_save_to_database(self):
199205
"name:en": "Canada",
200206
"iso3166_1": "CA",
201207
"iso3166_2": None,
208+
"alt_name": "Kanada",
209+
"alt_name:en": None,
202210
},
203211
]
204212
db_session = MagicMock()

liquibase/changelog.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,13 @@
9393
<include file="changes/feat_1507.sql" relativeToChangelogFile="true"/>
9494
<!-- Added constraint on operational status -->
9595
<include file="changes/fix_operation_status_constraint.sql" relativeToChangelogFile="true"/>
96+
<!-- Adding alt_name to the geopolygon table -->
97+
<include file="changes/feat_1479.sql" relativeToChangelogFile="true"/>
98+
99+
100+
<!-- Centralized materialized view definitions.
101+
Views are rebuilt from source SQL files using runOnChange. -->
102+
<!-- Keep this at the end to ensure all table and schema changes
103+
are applied before materialized views are rebuilt. -->
104+
<include file="materialized_views/materialized_views.xml" relativeToChangelogFile="true"/>
96105
</databaseChangeLog>

liquibase/changes/feat_1479.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Adding alternative names to the geolocation table
2+
ALTER TABLE geopolygon
3+
ADD COLUMN IF NOT EXISTS alt_name VARCHAR(255);
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
CREATE MATERIALIZED VIEW FeedSearch AS
2+
SELECT
3+
-- feed
4+
Feed.stable_id AS feed_stable_id,
5+
Feed.id AS feed_id,
6+
Feed.data_type,
7+
Feed.status,
8+
Feed.feed_name,
9+
Feed.note,
10+
Feed.feed_contact_email,
11+
-- source
12+
Feed.producer_url,
13+
Feed.authentication_info_url,
14+
Feed.authentication_type,
15+
Feed.api_key_parameter_name,
16+
Feed.license_url,
17+
Feed.provider,
18+
Feed.operational_status,
19+
-- official status
20+
Feed.official AS official,
21+
-- created_at
22+
Feed.created_at AS created_at,
23+
-- latest_dataset
24+
Latest_dataset.stable_id AS latest_dataset_id,
25+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
26+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
27+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
28+
Latest_dataset.hash AS latest_dataset_hash,
29+
Latest_dataset.agency_timezone AS latest_dataset_agency_timezone,
30+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
31+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
32+
-- Latest dataset features
33+
LatestDatasetFeatures AS latest_dataset_features,
34+
-- Latest dataset validation totals
35+
COALESCE(LatestDatasetValidationReportJoin.total_error, 0) as latest_total_error,
36+
COALESCE(LatestDatasetValidationReportJoin.total_warning, 0) as latest_total_warning,
37+
COALESCE(LatestDatasetValidationReportJoin.total_info, 0) as latest_total_info,
38+
COALESCE(LatestDatasetValidationReportJoin.unique_error_count, 0) as latest_unique_error_count,
39+
COALESCE(LatestDatasetValidationReportJoin.unique_warning_count, 0) as latest_unique_warning_count,
40+
COALESCE(LatestDatasetValidationReportJoin.unique_info_count, 0) as latest_unique_info_count,
41+
-- external_ids
42+
ExternalIdJoin.external_ids,
43+
-- redirect_ids
44+
RedirectingIdJoin.redirect_ids,
45+
-- feed gtfs_rt references
46+
FeedReferenceJoin.feed_reference_ids,
47+
-- feed gtfs_rt entities
48+
EntityTypeFeedJoin.entities,
49+
-- locations
50+
FeedLocationJoin.locations,
51+
-- osm locations grouped
52+
OsmLocationJoin.osm_locations,
53+
-- gbfs versions
54+
COALESCE(GbfsVersionsJoin.versions, '[]'::jsonb) AS versions,
55+
56+
-- full-text searchable document
57+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
58+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
59+
setweight(to_tsvector('english', coalesce(unaccent((
60+
SELECT string_agg(
61+
coalesce(location->>'country_code', '') || ' ' ||
62+
coalesce(location->>'country', '') || ' ' ||
63+
coalesce(location->>'subdivision_name', '') || ' ' ||
64+
coalesce(location->>'municipality', ''),
65+
' '
66+
)
67+
FROM json_array_elements(FeedLocationJoin.locations) AS location
68+
)), '')), 'A') ||
69+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
70+
AS document
71+
FROM Feed
72+
73+
-- Latest dataset
74+
LEFT JOIN gtfsfeed gtf ON gtf.id = Feed.id AND Feed.data_type = 'gtfs'
75+
LEFT JOIN gtfsdataset Latest_dataset ON Latest_dataset.id = gtf.latest_dataset_id
76+
77+
-- Latest dataset features
78+
LEFT JOIN (
79+
SELECT
80+
GtfsDataset.id AS FeatureGtfsDatasetId,
81+
array_agg(DISTINCT FeatureValidationReport.feature) AS LatestDatasetFeatures
82+
FROM GtfsDataset
83+
JOIN ValidationReportGtfsDataset
84+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
85+
JOIN (
86+
-- Pick latest ValidationReport per dataset based on validated_at
87+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
88+
ValidationReportGtfsDataset.dataset_id,
89+
ValidationReport.id AS latest_validation_report_id
90+
FROM ValidationReportGtfsDataset
91+
JOIN ValidationReport
92+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
93+
ORDER BY
94+
ValidationReportGtfsDataset.dataset_id,
95+
ValidationReport.validated_at DESC
96+
) AS LatestReports
97+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
98+
JOIN FeatureValidationReport
99+
ON FeatureValidationReport.validation_id = ValidationReportGtfsDataset.validation_report_id
100+
GROUP BY FeatureGtfsDatasetId
101+
) AS LatestDatasetFeaturesJoin ON Latest_dataset.id = FeatureGtfsDatasetId
102+
103+
-- Latest dataset validation report
104+
LEFT JOIN (
105+
SELECT
106+
GtfsDataset.id AS ValidationReportGtfsDatasetId,
107+
ValidationReport.total_error,
108+
ValidationReport.total_warning,
109+
ValidationReport.total_info,
110+
ValidationReport.unique_error_count,
111+
ValidationReport.unique_warning_count,
112+
ValidationReport.unique_info_count
113+
FROM GtfsDataset
114+
JOIN ValidationReportGtfsDataset
115+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
116+
JOIN (
117+
-- Pick latest ValidationReport per dataset based on validated_at
118+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
119+
ValidationReportGtfsDataset.dataset_id,
120+
ValidationReport.id AS latest_validation_report_id
121+
FROM ValidationReportGtfsDataset
122+
JOIN ValidationReport
123+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
124+
ORDER BY
125+
ValidationReportGtfsDataset.dataset_id,
126+
ValidationReport.validated_at DESC
127+
) AS LatestReports
128+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
129+
JOIN ValidationReport
130+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
131+
) AS LatestDatasetValidationReportJoin ON Latest_dataset.id = ValidationReportGtfsDatasetId
132+
133+
-- External ids
134+
LEFT JOIN (
135+
SELECT
136+
feed_id,
137+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
138+
FROM externalid
139+
GROUP BY feed_id
140+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
141+
142+
-- feed reference ids
143+
LEFT JOIN (
144+
SELECT
145+
gtfs_rt_feed_id,
146+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
147+
FROM FeedReference
148+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
149+
GROUP BY gtfs_rt_feed_id
150+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
151+
152+
-- Redirect ids
153+
-- Redirect ids
154+
LEFT JOIN (
155+
SELECT
156+
r.target_id,
157+
json_agg(json_build_object('target_id', f.stable_id, 'comment', r.redirect_comment)) AS redirect_ids
158+
FROM RedirectingId r
159+
JOIN Feed f ON r.target_id = f.id
160+
GROUP BY r.target_id
161+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
162+
-- Feed locations
163+
LEFT JOIN (
164+
SELECT
165+
LocationFeed.feed_id,
166+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
167+
subdivision_name, 'municipality', municipality)) AS locations
168+
FROM Location
169+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
170+
GROUP BY LocationFeed.feed_id
171+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
172+
173+
-- Entity types
174+
LEFT JOIN (
175+
SELECT
176+
feed_id,
177+
array_agg(entity_name) AS entities
178+
FROM EntityTypeFeed
179+
GROUP BY feed_id
180+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
181+
182+
-- OSM locations
183+
LEFT JOIN (
184+
WITH locations_per_group AS (
185+
SELECT
186+
fog.feed_id,
187+
olg.group_name,
188+
jsonb_agg(
189+
DISTINCT jsonb_build_object(
190+
'admin_level', gp.admin_level,
191+
'name', gp.name,
192+
'alt_name', gp.alt_name
193+
)
194+
) AS locations
195+
FROM FeedOsmLocationGroup fog
196+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
197+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
198+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
199+
GROUP BY fog.feed_id, olg.group_name
200+
)
201+
SELECT
202+
feed_id,
203+
jsonb_agg(
204+
jsonb_build_object(
205+
'group_name', group_name,
206+
'locations', locations
207+
)
208+
)::json AS osm_locations
209+
FROM locations_per_group
210+
GROUP BY feed_id
211+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
212+
213+
-- OSM location names
214+
LEFT JOIN (
215+
SELECT
216+
fog.feed_id,
217+
string_agg(
218+
DISTINCT concat_ws(' ', gp.name, gp.alt_name),
219+
' '
220+
) AS osm_location_names
221+
FROM FeedOsmLocationGroup fog
222+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
223+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
224+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
225+
WHERE gp.name IS NOT NULL
226+
GROUP BY fog.feed_id
227+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id
228+
229+
-- GBFS versions
230+
LEFT JOIN (
231+
SELECT
232+
Feed.id AS feed_id,
233+
to_jsonb(array_agg(DISTINCT GbfsVersion.version ORDER BY GbfsVersion.version)) AS versions
234+
FROM Feed
235+
JOIN GbfsFeed ON GbfsFeed.id = Feed.id
236+
JOIN GbfsVersion ON GbfsVersion.feed_id = GbfsFeed.id
237+
WHERE Feed.data_type = 'gbfs'
238+
GROUP BY Feed.id
239+
) AS GbfsVersionsJoin ON GbfsVersionsJoin.feed_id = Feed.id;
240+
241+
242+
-- This index allows concurrent refresh on the materialized view avoiding table locks
243+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
244+
245+
-- Indices for feedsearch view optimization
246+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
247+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
248+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
249+
CREATE INDEX feedsearch_status ON FeedSearch(status);
250+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<databaseChangeLog
3+
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
6+
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.1.xsd">
7+
8+
<changeSet id="rebuild-feed-search-mv" author="you" runOnChange="true">
9+
<sql>DROP MATERIALIZED VIEW IF EXISTS feedsearch;</sql>
10+
11+
<sqlFile
12+
path="feed_search.sql"
13+
relativeToChangelogFile="true"
14+
splitStatements="false"
15+
endDelimiter=";"/>
16+
</changeSet>
17+
18+
</databaseChangeLog>

0 commit comments

Comments
 (0)