Skip to content

Commit 22fb4f4

Browse files
authored
Merge pull request #1128 from MobilityData/1083-include-official-realtime-feeds-in-search
fix: should use Feed.official in materialized view
2 parents a8d3964 + a315aee commit 22fb4f4

File tree

2 files changed

+237
-0
lines changed

2 files changed

+237
-0
lines changed

liquibase/changelog.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,6 @@
5050
<include file="changes/feat_997.sql" relativeToChangelogFile="true"/>
5151
<!-- Materialized view updated. Added features and totals. -->
5252
<include file="changes/feat_993.sql" relativeToChangelogFile="true"/>
53+
<!-- Materialized view updated. Used Feed.official field as official status. -->
54+
<include file="changes/feat_1083.sql" relativeToChangelogFile="true"/>
5355
</databaseChangeLog>

liquibase/changes/feat_1083.sql

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
-- Updating the FeedSearch materialized view to include Feed.official field as official status
2+
-- Adds the 'official' status flag from the Feed table
3+
4+
DROP MATERIALIZED VIEW IF EXISTS FeedSearch;
5+
CREATE MATERIALIZED VIEW FeedSearch AS
6+
SELECT
7+
-- feed
8+
Feed.stable_id AS feed_stable_id,
9+
Feed.id AS feed_id,
10+
Feed.data_type,
11+
Feed.status,
12+
Feed.feed_name,
13+
Feed.note,
14+
Feed.feed_contact_email,
15+
-- source
16+
Feed.producer_url,
17+
Feed.authentication_info_url,
18+
Feed.authentication_type,
19+
Feed.api_key_parameter_name,
20+
Feed.license_url,
21+
Feed.provider,
22+
Feed.operational_status,
23+
-- official status
24+
Feed.official AS official,
25+
-- latest_dataset
26+
Latest_dataset.id AS latest_dataset_id,
27+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
28+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
29+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
30+
Latest_dataset.hash AS latest_dataset_hash,
31+
Latest_dataset.agency_timezone AS latest_dataset_agency_timezone,
32+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
33+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
34+
-- Latest dataset features
35+
LatestDatasetFeatures AS latest_dataset_features,
36+
-- Latest dataset validation totals
37+
COALESCE(LatestDatasetValidationReportJoin.total_error, 0) as latest_total_error,
38+
COALESCE(LatestDatasetValidationReportJoin.total_warning, 0) as latest_total_warning,
39+
COALESCE(LatestDatasetValidationReportJoin.total_info, 0) as latest_total_info,
40+
COALESCE(LatestDatasetValidationReportJoin.unique_error_count, 0) as latest_unique_error_count,
41+
COALESCE(LatestDatasetValidationReportJoin.unique_warning_count, 0) as latest_unique_warning_count,
42+
COALESCE(LatestDatasetValidationReportJoin.unique_info_count, 0) as latest_unique_info_count,
43+
-- external_ids
44+
ExternalIdJoin.external_ids,
45+
-- redirect_ids
46+
RedirectingIdJoin.redirect_ids,
47+
-- feed gtfs_rt references
48+
FeedReferenceJoin.feed_reference_ids,
49+
-- feed gtfs_rt entities
50+
EntityTypeFeedJoin.entities,
51+
-- locations
52+
FeedLocationJoin.locations,
53+
-- osm locations grouped
54+
OsmLocationJoin.osm_locations,
55+
56+
-- full-text searchable document
57+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
58+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
59+
setweight(to_tsvector('english', coalesce(unaccent((
60+
SELECT string_agg(
61+
coalesce(location->>'country_code', '') || ' ' ||
62+
coalesce(location->>'country', '') || ' ' ||
63+
coalesce(location->>'subdivision_name', '') || ' ' ||
64+
coalesce(location->>'municipality', ''),
65+
' '
66+
)
67+
FROM json_array_elements(FeedLocationJoin.locations) AS location
68+
)), '')), 'A') ||
69+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
70+
AS document
71+
FROM Feed
72+
73+
-- Latest dataset
74+
LEFT JOIN (
75+
SELECT *
76+
FROM gtfsdataset
77+
WHERE latest = true
78+
) AS Latest_dataset ON Latest_dataset.feed_id = Feed.id AND Feed.data_type = 'gtfs'
79+
80+
-- Latest dataset features
81+
LEFT JOIN (
82+
SELECT
83+
GtfsDataset.id AS FeatureGtfsDatasetId,
84+
array_agg(DISTINCT FeatureValidationReport.feature) AS LatestDatasetFeatures
85+
FROM GtfsDataset
86+
JOIN ValidationReportGtfsDataset
87+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
88+
JOIN (
89+
-- Pick latest ValidationReport per dataset based on validated_at
90+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
91+
ValidationReportGtfsDataset.dataset_id,
92+
ValidationReport.id AS latest_validation_report_id
93+
FROM ValidationReportGtfsDataset
94+
JOIN ValidationReport
95+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
96+
ORDER BY
97+
ValidationReportGtfsDataset.dataset_id,
98+
ValidationReport.validated_at DESC
99+
) AS LatestReports
100+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
101+
JOIN FeatureValidationReport
102+
ON FeatureValidationReport.validation_id = ValidationReportGtfsDataset.validation_report_id
103+
GROUP BY FeatureGtfsDatasetId
104+
) AS LatestDatasetFeaturesJoin ON Latest_dataset.id = FeatureGtfsDatasetId
105+
106+
-- Latest dataset validation report
107+
LEFT JOIN (
108+
SELECT
109+
GtfsDataset.id AS ValidationReportGtfsDatasetId,
110+
ValidationReport.total_error,
111+
ValidationReport.total_warning,
112+
ValidationReport.total_info,
113+
ValidationReport.unique_error_count,
114+
ValidationReport.unique_warning_count,
115+
ValidationReport.unique_info_count
116+
FROM GtfsDataset
117+
JOIN ValidationReportGtfsDataset
118+
ON ValidationReportGtfsDataset.dataset_id = GtfsDataset.id
119+
JOIN (
120+
-- Pick latest ValidationReport per dataset based on validated_at
121+
SELECT DISTINCT ON (ValidationReportGtfsDataset.dataset_id)
122+
ValidationReportGtfsDataset.dataset_id,
123+
ValidationReport.id AS latest_validation_report_id
124+
FROM ValidationReportGtfsDataset
125+
JOIN ValidationReport
126+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
127+
ORDER BY
128+
ValidationReportGtfsDataset.dataset_id,
129+
ValidationReport.validated_at DESC
130+
) AS LatestReports
131+
ON LatestReports.latest_validation_report_id = ValidationReportGtfsDataset.validation_report_id
132+
JOIN ValidationReport
133+
ON ValidationReport.id = ValidationReportGtfsDataset.validation_report_id
134+
) AS LatestDatasetValidationReportJoin ON Latest_dataset.id = ValidationReportGtfsDatasetId
135+
136+
-- External ids
137+
LEFT JOIN (
138+
SELECT
139+
feed_id,
140+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
141+
FROM externalid
142+
GROUP BY feed_id
143+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
144+
145+
-- feed reference ids
146+
LEFT JOIN (
147+
SELECT
148+
gtfs_rt_feed_id,
149+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
150+
FROM FeedReference
151+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
152+
GROUP BY gtfs_rt_feed_id
153+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
154+
155+
-- Redirect ids
156+
LEFT JOIN (
157+
SELECT
158+
target_id,
159+
json_agg(json_build_object('target_id', target_id, 'comment', redirect_comment)) AS redirect_ids
160+
FROM RedirectingId
161+
GROUP BY target_id
162+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
163+
164+
-- Feed locations
165+
LEFT JOIN (
166+
SELECT
167+
LocationFeed.feed_id,
168+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
169+
subdivision_name, 'municipality', municipality)) AS locations
170+
FROM Location
171+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
172+
GROUP BY LocationFeed.feed_id
173+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
174+
175+
-- Entity types
176+
LEFT JOIN (
177+
SELECT
178+
feed_id,
179+
array_agg(entity_name) AS entities
180+
FROM EntityTypeFeed
181+
GROUP BY feed_id
182+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
183+
184+
-- OSM locations
185+
LEFT JOIN (
186+
WITH locations_per_group AS (
187+
SELECT
188+
fog.feed_id,
189+
olg.group_name,
190+
jsonb_agg(
191+
DISTINCT jsonb_build_object(
192+
'admin_level', gp.admin_level,
193+
'name', gp.name
194+
)
195+
) AS locations
196+
FROM FeedOsmLocationGroup fog
197+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
198+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
199+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
200+
GROUP BY fog.feed_id, olg.group_name
201+
)
202+
SELECT
203+
feed_id,
204+
jsonb_agg(
205+
jsonb_build_object(
206+
'group_name', group_name,
207+
'locations', locations
208+
)
209+
)::json AS osm_locations
210+
FROM locations_per_group
211+
GROUP BY feed_id
212+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
213+
214+
-- OSM location names
215+
LEFT JOIN (
216+
SELECT
217+
fog.feed_id,
218+
string_agg(DISTINCT gp.name, ' ') AS osm_location_names
219+
FROM FeedOsmLocationGroup fog
220+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
221+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
222+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
223+
WHERE gp.name IS NOT NULL
224+
GROUP BY fog.feed_id
225+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id;
226+
227+
228+
-- This index allows concurrent refresh on the materialized view avoiding table locks
229+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
230+
231+
-- Indices for feedsearch view optimization
232+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
233+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
234+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
235+
CREATE INDEX feedsearch_status ON FeedSearch(status);

0 commit comments

Comments
 (0)