Skip to content

Commit 5b6a11d

Browse files
committed
should use Feed.official in materialized view
1 parent b0dac39 commit 5b6a11d

File tree

3 files changed

+161
-1
lines changed

3 files changed

+161
-1
lines changed

api/src/feeds/impl/search_api_impl.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from feeds_gen.models.search_feeds200_response import SearchFeeds200Response
1212
from middleware.request_context import is_user_email_restricted
1313
from sqlalchemy import or_
14+
import logging
1415

1516
feed_search_columns = [column for column in t_feedsearch.columns if column.name != "document"]
1617

@@ -54,8 +55,10 @@ def add_search_query_filters(query, search_query, data_type, feed_id, status, is
5455
query = query.where(t_feedsearch.c.status.in_([s.strip().lower() for s in status_list]))
5556
if is_official is not None:
5657
if is_official:
57-
query = query.where(t_feedsearch.c.official.is_(True))
58+
logging.debug("is_official is true")
59+
query = query.where(t_feedsearch.c.official.is_not(None))
5860
else:
61+
logging.debug("is_official is false")
5962
query = query.where(or_(t_feedsearch.c.official.is_(False), t_feedsearch.c.official.is_(None)))
6063
if search_query and len(search_query.strip()) > 0:
6164
query = query.filter(

liquibase/changelog.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@
4646
<include file="changes/feat_1046.sql" relativeToChangelogFile="true"/>
4747
<include file="changes/feat_951.sql" relativeToChangelogFile="true"/>
4848
<include file="changes/feat_1055.sql" relativeToChangelogFile="true"/>
49+
<include file="changes/feat_1083.sql" relativeToChangelogFile="true"/>
4950
</databaseChangeLog>

liquibase/changes/feat_1083.sql

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
-- Updating the FeedSearch materialized view to include location extraction details
2+
-- 1. Added osm_locations as a column which is a json list of OsmlLocationGroup names and their locations
3+
-- 2. Added the names of the locations to the document for full-text search
4+
DROP MATERIALIZED VIEW IF EXISTS FeedSearch;
5+
CREATE MATERIALIZED VIEW FeedSearch AS
6+
SELECT
7+
-- feed
8+
Feed.stable_id AS feed_stable_id,
9+
Feed.id AS feed_id,
10+
Feed.data_type,
11+
Feed.status,
12+
Feed.feed_name,
13+
Feed.note,
14+
Feed.feed_contact_email,
15+
-- source
16+
Feed.producer_url,
17+
Feed.authentication_info_url,
18+
Feed.authentication_type,
19+
Feed.api_key_parameter_name,
20+
Feed.license_url,
21+
Feed.provider,
22+
Feed.operational_status,
23+
-- official status
24+
Feed.official AS official,
25+
-- latest_dataset
26+
Latest_dataset.id AS latest_dataset_id,
27+
Latest_dataset.hosted_url AS latest_dataset_hosted_url,
28+
Latest_dataset.downloaded_at AS latest_dataset_downloaded_at,
29+
Latest_dataset.bounding_box AS latest_dataset_bounding_box,
30+
Latest_dataset.hash AS latest_dataset_hash,
31+
Latest_dataset.service_date_range_start AS latest_dataset_service_date_range_start,
32+
Latest_dataset.service_date_range_end AS latest_dataset_service_date_range_end,
33+
-- external_ids
34+
ExternalIdJoin.external_ids,
35+
-- redirect_ids
36+
RedirectingIdJoin.redirect_ids,
37+
-- feed gtfs_rt references
38+
FeedReferenceJoin.feed_reference_ids,
39+
-- feed gtfs_rt entities
40+
EntityTypeFeedJoin.entities,
41+
-- locations
42+
FeedLocationJoin.locations,
43+
-- osm locations grouped
44+
OsmLocationJoin.osm_locations,
45+
46+
-- full-text searchable document
47+
setweight(to_tsvector('english', coalesce(unaccent(Feed.feed_name), '')), 'C') ||
48+
setweight(to_tsvector('english', coalesce(unaccent(Feed.provider), '')), 'C') ||
49+
setweight(to_tsvector('english', coalesce(unaccent((
50+
SELECT string_agg(
51+
coalesce(location->>'country_code', '') || ' ' ||
52+
coalesce(location->>'country', '') || ' ' ||
53+
coalesce(location->>'subdivision_name', '') || ' ' ||
54+
coalesce(location->>'municipality', ''),
55+
' '
56+
)
57+
FROM json_array_elements(FeedLocationJoin.locations) AS location
58+
)), '')), 'A') ||
59+
setweight(to_tsvector('english', coalesce(unaccent(OsmLocationNamesJoin.osm_location_names), '')), 'A')
60+
AS document
61+
FROM Feed
62+
LEFT JOIN (
63+
SELECT *
64+
FROM gtfsdataset
65+
WHERE latest = true
66+
) AS Latest_dataset ON Latest_dataset.feed_id = Feed.id AND Feed.data_type = 'gtfs'
67+
LEFT JOIN (
68+
SELECT
69+
feed_id,
70+
json_agg(json_build_object('external_id', associated_id, 'source', source)) AS external_ids
71+
FROM externalid
72+
GROUP BY feed_id
73+
) AS ExternalIdJoin ON ExternalIdJoin.feed_id = Feed.id
74+
LEFT JOIN (
75+
SELECT
76+
gtfs_rt_feed_id,
77+
array_agg(FeedReferenceJoinInnerQuery.stable_id) AS feed_reference_ids
78+
FROM FeedReference
79+
LEFT JOIN Feed AS FeedReferenceJoinInnerQuery ON FeedReferenceJoinInnerQuery.id = FeedReference.gtfs_feed_id
80+
GROUP BY gtfs_rt_feed_id
81+
) AS FeedReferenceJoin ON FeedReferenceJoin.gtfs_rt_feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
82+
LEFT JOIN (
83+
SELECT
84+
target_id,
85+
json_agg(json_build_object('target_id', target_id, 'comment', redirect_comment)) AS redirect_ids
86+
FROM RedirectingId
87+
GROUP BY target_id
88+
) AS RedirectingIdJoin ON RedirectingIdJoin.target_id = Feed.id
89+
LEFT JOIN (
90+
SELECT
91+
LocationFeed.feed_id,
92+
json_agg(json_build_object('country', country, 'country_code', country_code, 'subdivision_name',
93+
subdivision_name, 'municipality', municipality)) AS locations
94+
FROM Location
95+
LEFT JOIN LocationFeed ON LocationFeed.location_id = Location.id
96+
GROUP BY LocationFeed.feed_id
97+
) AS FeedLocationJoin ON FeedLocationJoin.feed_id = Feed.id
98+
LEFT JOIN (
99+
SELECT
100+
feed_id,
101+
array_agg(entity_name) AS entities
102+
FROM EntityTypeFeed
103+
GROUP BY feed_id
104+
) AS EntityTypeFeedJoin ON EntityTypeFeedJoin.feed_id = Feed.id AND Feed.data_type = 'gtfs_rt'
105+
LEFT JOIN (
106+
WITH locations_per_group AS (
107+
SELECT
108+
fog.feed_id,
109+
olg.group_name,
110+
jsonb_agg(
111+
DISTINCT jsonb_build_object(
112+
'admin_level', gp.admin_level,
113+
'name', gp.name
114+
)
115+
) AS locations
116+
FROM FeedOsmLocationGroup fog
117+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
118+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
119+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
120+
GROUP BY fog.feed_id, olg.group_name
121+
)
122+
SELECT
123+
feed_id,
124+
jsonb_agg(
125+
jsonb_build_object(
126+
'group_name', group_name,
127+
'locations', locations
128+
)
129+
)::json AS osm_locations
130+
FROM locations_per_group
131+
GROUP BY feed_id
132+
) AS OsmLocationJoin ON OsmLocationJoin.feed_id = Feed.id
133+
LEFT JOIN (
134+
SELECT
135+
fog.feed_id,
136+
string_agg(DISTINCT gp.name, ' ') AS osm_location_names
137+
FROM FeedOsmLocationGroup fog
138+
JOIN OsmLocationGroup olg ON olg.group_id = fog.group_id
139+
JOIN OsmLocationGroupGeopolygon olgg ON olgg.group_id = olg.group_id
140+
JOIN Geopolygon gp ON gp.osm_id = olgg.osm_id
141+
WHERE gp.name IS NOT NULL
142+
GROUP BY fog.feed_id
143+
) AS OsmLocationNamesJoin ON OsmLocationNamesJoin.feed_id = Feed.id;
144+
145+
146+
-- This index allows concurrent refresh on the materialized view avoiding table locks
147+
CREATE UNIQUE INDEX idx_unique_feed_id ON FeedSearch(feed_id);
148+
149+
-- Indices for feedsearch view optimization
150+
CREATE INDEX feedsearch_document_idx ON FeedSearch USING GIN(document);
151+
CREATE INDEX feedsearch_feed_stable_id ON FeedSearch(feed_stable_id);
152+
CREATE INDEX feedsearch_data_type ON FeedSearch(data_type);
153+
CREATE INDEX feedsearch_status ON FeedSearch(status);
154+
155+
DROP VIEW IF EXISTS location_with_translations_en;
156+
DROP TABLE IF EXISTS translation;

0 commit comments

Comments
 (0)