Skip to content

Commit 2780b4c

Browse files
authored
fix: 1396 search endpoint returning uid as id instead of stable id for latest dataset an redirects (#1466)
* updated feedsearch view to return stable_ids to unique ids
1 parent 8c98eb4 commit 2780b4c

File tree

2 files changed

+254
-0
lines changed

2 files changed

+254
-0
lines changed

liquibase/changelog.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,6 @@
8080
<include file="changes/feat_1412.sql" relativeToChangelogFile="true"/>
8181
<include file="changes/feat_1343.sql" relativeToChangelogFile="true"/>
8282
<include file="changes/feat_1249-2.sql" relativeToChangelogFile="true"/>
83+
<!-- Materialized view recreated to return stable_ids not UIDs -->
84+
<include file="changes/feat_1396.sql" relativeToChangelogFile="true"/>
8385
</databaseChangeLog>

liquibase/changes/feat_1396.sql

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
-- Latest dataset id should return the feed_id, not the UID of the dataset
2+
3+
DROP MATERIALIZED VIEW IF EXISTS feedsearch;
4+
5+
6+
-- Recreate the FeedSearch materialized view
7+
CREATE MATERIALIZED VIEW FeedSearch AS
8+
SELECT
9+
-- feed
10+
Feed.stable_id AS feed_stable_id,
11+
Feed.id AS feed_id,
12+
Feed.data_type,
13+
Feed.status,
14+
Feed.feed_name,
15+
Feed.note,
16+
Feed.feed_contact_email,
17+
-- source
18+
Feed.producer_url,
19+
Feed.authentication_info_url,
20+
Feed.authentication_type,
21+
Feed.api_key_parameter_name,
22+
Feed.license_url,
23+
Feed.provider,
24+
Feed.operational_status,
25+
-- official status
26+
Feed.official AS official,
27+
-- created_at
28+
Feed.created_at AS created_at,
29+
-- latest_dataset
30+
Latest_dataset.stable_id AS latest_dataset_id,
31+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
32+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
33+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
34+
Latest_dataset.hash AS latest_dataset_hash,
35+
Latest_dataset.agency_timezone AS latest_dataset_agency_timezone,
36+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
37+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
38+
-- Latest dataset features
39+
LatestDatasetFeatures AS latest_dataset_features,
40+
-- Latest dataset validation totals
41+
COALESCE(LatestDatasetValidationReportJoin.total_error, 0) as latest_total_error,
42+
COALESCE(LatestDatasetValidationReportJoin.total_warning, 0) as latest_total_warning,
43+
COALESCE(LatestDatasetValidationReportJoin.total_info, 0) as latest_total_info,
44+
COALESCE(LatestDatasetValidationReportJoin.unique_error_count, 0) as latest_unique_error_count,
45+
COALESCE(LatestDatasetValidationReportJoin.unique_warning_count, 0) as latest_unique_warning_count,
46+
COALESCE(LatestDatasetValidationReportJoin.unique_info_count, 0) as latest_unique_info_count,
47+
-- external_ids
48+
ExternalIdJoin.external_ids,
49+
-- redirect_ids
50+
RedirectingIdJoin.redirect_ids,
51+
-- feed gtfs_rt references
52+
FeedReferenceJoin.feed_reference_ids,
53+
-- feed gtfs_rt entities
54+
EntityTypeFeedJoin.entities,
55+
-- locations
56+
FeedLocationJoin.locations,
57+
-- osm locations grouped
58+
OsmLocationJoin.osm_locations,
59+
-- gbfs versions
60+
COALESCE(GbfsVersionsJoin.versions, '[]'::jsonb) AS versions,
61+
62+
-- full-text searchable document
63+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
64+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
65+
setweight(to_tsvector('english', coalesce(unaccent((
66+
SELECT string_agg(
67+
coalesce(location->>'country_code', '') || ' ' ||
68+
coalesce(location->>'country', '') || ' ' ||
69+
coalesce(location->>'subdivision_name', '') || ' ' ||
70+
coalesce(location->>'municipality', ''),
71+
' '
72+
)
73+
FROM json_array_elements(FeedLocationJoin.locations) AS location
74+
)), '')), 'A') ||
75+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
76+
AS document
77+
FROM Feed
78+
79+
-- Latest dataset
80+
LEFT JOIN gtfsfeed gtf ON gtf.id = Feed.id AND Feed.data_type = 'gtfs'
81+
LEFT JOIN gtfsdataset Latest_dataset ON Latest_dataset.id = gtf.latest_dataset_id
82+
83+
-- Latest dataset features
84+
LEFT JOIN (
85+
SELECT
86+
GtfsDataset.id AS FeatureGtfsDatasetId,
87+
array_agg(DISTINCT FeatureValidationReport.feature) AS LatestDatasetFeatures
88+
FROM GtfsDataset
89+
JOIN ValidationReportGtfsDataset
90+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
91+
JOIN (
92+
-- Pick latest ValidationReport per dataset based on validated_at
93+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
94+
ValidationReportGtfsDataset.dataset_id,
95+
ValidationReport.id AS latest_validation_report_id
96+
FROM ValidationReportGtfsDataset
97+
JOIN ValidationReport
98+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
99+
ORDER BY
100+
ValidationReportGtfsDataset.dataset_id,
101+
ValidationReport.validated_at DESC
102+
) AS LatestReports
103+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
104+
JOIN FeatureValidationReport
105+
ON FeatureValidationReport.validation_id = ValidationReportGtfsDataset.validation_report_id
106+
GROUP BY FeatureGtfsDatasetId
107+
) AS LatestDatasetFeaturesJoin ON Latest_dataset.id = FeatureGtfsDatasetId
108+
109+
-- Latest dataset validation report
110+
LEFT JOIN (
111+
SELECT
112+
GtfsDataset.id AS ValidationReportGtfsDatasetId,
113+
ValidationReport.total_error,
114+
ValidationReport.total_warning,
115+
ValidationReport.total_info,
116+
ValidationReport.unique_error_count,
117+
ValidationReport.unique_warning_count,
118+
ValidationReport.unique_info_count
119+
FROM GtfsDataset
120+
JOIN ValidationReportGtfsDataset
121+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
122+
JOIN (
123+
-- Pick latest ValidationReport per dataset based on validated_at
124+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
125+
ValidationReportGtfsDataset.dataset_id,
126+
ValidationReport.id AS latest_validation_report_id
127+
FROM ValidationReportGtfsDataset
128+
JOIN ValidationReport
129+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
130+
ORDER BY
131+
ValidationReportGtfsDataset.dataset_id,
132+
ValidationReport.validated_at DESC
133+
) AS LatestReports
134+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
135+
JOIN ValidationReport
136+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
137+
) AS LatestDatasetValidationReportJoin ON Latest_dataset.id = ValidationReportGtfsDatasetId
138+
139+
-- External ids
140+
LEFT JOIN (
141+
SELECT
142+
feed_id,
143+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
144+
FROM externalid
145+
GROUP BY feed_id
146+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
147+
148+
-- feed reference ids
149+
LEFT JOIN (
150+
SELECT
151+
gtfs_rt_feed_id,
152+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
153+
FROM FeedReference
154+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
155+
GROUP BY gtfs_rt_feed_id
156+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
157+
158+
-- Redirect ids
159+
-- Redirect ids
160+
LEFT JOIN (
161+
SELECT
162+
r.target_id,
163+
json_agg(json_build_object('target_id', f.stable_id, 'comment', r.redirect_comment)) AS redirect_ids
164+
FROM RedirectingId r
165+
JOIN Feed f ON r.target_id = f.id
166+
GROUP BY r.target_id
167+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
168+
-- Feed locations
169+
LEFT JOIN (
170+
SELECT
171+
LocationFeed.feed_id,
172+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
173+
subdivision_name, 'municipality', municipality)) AS locations
174+
FROM Location
175+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
176+
GROUP BY LocationFeed.feed_id
177+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
178+
179+
-- Entity types
180+
LEFT JOIN (
181+
SELECT
182+
feed_id,
183+
array_agg(entity_name) AS entities
184+
FROM EntityTypeFeed
185+
GROUP BY feed_id
186+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
187+
188+
-- OSM locations
189+
LEFT JOIN (
190+
WITH locations_per_group AS (
191+
SELECT
192+
fog.feed_id,
193+
olg.group_name,
194+
jsonb_agg(
195+
DISTINCT jsonb_build_object(
196+
'admin_level', gp.admin_level,
197+
'name', gp.name
198+
)
199+
) AS locations
200+
FROM FeedOsmLocationGroup fog
201+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
202+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
203+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
204+
GROUP BY fog.feed_id, olg.group_name
205+
)
206+
SELECT
207+
feed_id,
208+
jsonb_agg(
209+
jsonb_build_object(
210+
'group_name', group_name,
211+
'locations', locations
212+
)
213+
)::json AS osm_locations
214+
FROM locations_per_group
215+
GROUP BY feed_id
216+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
217+
218+
-- OSM location names
219+
LEFT JOIN (
220+
SELECT
221+
fog.feed_id,
222+
string_agg(DISTINCT gp.name, ' ') AS osm_location_names
223+
FROM FeedOsmLocationGroup fog
224+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
225+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
226+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
227+
WHERE gp.name IS NOT NULL
228+
GROUP BY fog.feed_id
229+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id
230+
231+
-- GBFS versions
232+
LEFT JOIN (
233+
SELECT
234+
Feed.id AS feed_id,
235+
to_jsonb(array_agg(DISTINCT GbfsVersion.version ORDER BY GbfsVersion.version)) AS versions
236+
FROM Feed
237+
JOIN GbfsFeed ON GbfsFeed.id = Feed.id
238+
JOIN GbfsVersion ON GbfsVersion.feed_id = GbfsFeed.id
239+
WHERE Feed.data_type = 'gbfs'
240+
GROUP BY Feed.id
241+
) AS GbfsVersionsJoin ON GbfsVersionsJoin.feed_id = Feed.id;
242+
243+
244+
-- This index allows concurrent refresh on the materialized view avoiding table locks
245+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
246+
247+
-- Indices for feedsearch view optimization
248+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
249+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
250+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
251+
CREATE INDEX feedsearch_status ON FeedSearch(status);
252+

0 commit comments

Comments
 (0)