Skip to content

Commit 93e2740

Browse files
committed
[E] Background node extrapolation
Related to MNFLD-1100
1 parent ad397ce commit 93e2740

14 files changed

+155
-60
lines changed

api/app/jobs/text_section_nodes/backport_search_index_job.rb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,16 @@ class BackportSearchIndexJob < ApplicationJob
99
unique :until_executed, lock_ttl: 2.days, on_conflict: :log
1010

1111
def build_enumerator(cursor:)
12-
enumerator_builder.active_record_on_batch_relations(
12+
enumerator_builder.active_record_on_records(
1313
TextSectionNode.sans_search_indexed,
14-
cursor:,
15-
batch_size: 1000
14+
cursor:
1615
)
1716
end
1817

19-
# @param [ActiveRecord::Relation<TextSectionNode>] batch_relation
18+
# @param [TextSectionNode] text_section_node
2019
# @return [void]
21-
def each_iteration(batch_relation)
22-
batch_relation.backport_search_index!
20+
def each_iteration(text_section_node)
21+
text_section_node.index_contained_content!
2322
end
2423
end
2524
end

api/app/jobs/text_sections/index_current_node_content_job.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ class IndexCurrentNodeContentJob < ApplicationJob
1111

1212
queue_as :low_priority
1313

14+
queue_with_priority 500
15+
1416
# @param [TextSection] text_section
1517
# @param [String, nil] cursor
1618
# @return [Enumerator]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# frozen_string_literal: true
2+
3+
module TextSections
4+
# @see TextSections::IndexNodes
5+
class IndexNodesJob < ApplicationJob
6+
queue_as :default
7+
8+
queue_with_priority 100
9+
10+
# @param [TextSection] text_section
11+
# @return [void]
12+
def perform(text_section)
13+
text_section.index_nodes!
14+
end
15+
end
16+
end

api/app/models/text_section.rb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ class TextSection < ApplicationRecord
7777

7878
before_validation :update_body_json
7979
after_destroy :remove_linked_toc_entries
80-
after_save :extrapolate_nodes!
81-
after_save_commit :asynchronously_index_current_node_content!
80+
after_save_commit :asynchronously_index_nodes!
8281
after_commit :maybe_adopt_or_orphan_annotations!, on: [:update, :destroy]
8382

8483
scope :in_texts, ->(texts) { where(text: texts) }
@@ -196,6 +195,12 @@ def asynchronously_index_current_node_content!
196195
TextSections::IndexCurrentNodeContentJob.perform_later self
197196
end
198197

198+
# @api private
199+
# @return [void]
200+
def asynchronously_index_nodes!
201+
TextSections::IndexNodesJob.perform_later self
202+
end
203+
199204
# @api private
200205
# @return [void]
201206
def extrapolate_nodes!
@@ -216,6 +221,11 @@ def index_contained_content!
216221
ManifoldApi::Container["text_sections.index_contained_content"].(self).value!
217222
end
218223

224+
# @return [void]
225+
def index_nodes!
226+
ManifoldApi::Container["text_sections.index_nodes"].(self).value!
227+
end
228+
219229
private
220230

221231
def extract_content_from!(source, data: [])

api/app/models/text_section_node.rb

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -136,42 +136,6 @@ def hit_search_for(keyword, text_section_ids: [])
136136
end
137137
end
138138

139-
# @api private
140-
# @see TextSectionNodes::BackportSearchIndexJob
141-
# @return [void]
142-
def backport_search_index!
143-
ids = all.where_values_hash.fetch("id", [])
144-
145-
raise "must be called in a batch relation" if ids.blank?
146-
147-
tsn = arel_table.alias("pn")
148-
149-
id_in_batch = tsn[:id].in(ids).to_sql
150-
151-
connection.exec_update(<<~SQL)
152-
WITH contained AS (
153-
SELECT
154-
pn.id AS id,
155-
array_agg(cn.node_uuid ORDER BY cn.node_indices) FILTER (WHERE cn.node_uuid IS NOT NULL) AS contained_node_uuids,
156-
string_agg(cn.content, ' ' ORDER BY cn.node_indices) FILTER (WHERE cn.content IS NOT NULL AND cn.content ~ '[^[:space:]]+') AS contained_content
157-
FROM text_section_nodes pn
158-
INNER JOIN text_section_nodes cn ON pn.node_path @> cn.node_path
159-
WHERE
160-
#{id_in_batch}
161-
GROUP BY 1
162-
)
163-
UPDATE text_section_nodes tsn SET
164-
contained_node_uuids = COALESCE(c.contained_node_uuids, '{}'::text[]),
165-
contained_content = CASE WHEN char_length(c.contained_content) <= 4096 THEN c.contained_content ELSE '' END,
166-
search_indexed_at = CURRENT_TIMESTAMP,
167-
search_indexed = TRUE
168-
FROM contained c
169-
WHERE
170-
c.id = tsn.id
171-
;
172-
SQL
173-
end
174-
175139
private
176140

177141
def arel_content_highlighted_for(keyword, node_hits:)

api/app/operations/text_sections/extrapolate_nodes.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,14 @@ class ExtrapolateNodes
170170
SQL
171171

172172
def call(**args)
173-
corrected = yield correct_intermediate_nodes.(**args)
173+
# We only need this if we end up changing the nodes that count as intermediate.
174+
# corrected = yield correct_intermediate_nodes.(**args)
174175

175-
return Success(upserted: 0, corrected:) if args[:text_section].present? && args[:text_section].body_json.blank?
176+
return Success(upserted: 0) if args[:text_section].present? && args[:text_section].body_json.blank?
176177

177178
upserted = sql_update! FIRST_PART, interpolate(**args), SECOND_PART, FINAL_PART
178179

179-
Success(upserted:, corrected:)
180+
Success(upserted:)
180181
end
181182

182183
private
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# frozen_string_literal: true
2+
3+
module TextSections
4+
# An operation to extrapolate and prune {TextSectionNode}s for a {TextSection}.
5+
# Because this operation can be very large, we run it in the background,
6+
# with a separate task to actually index the content for full-text search.
7+
# @see TextSections::IndexCurrentNodeContentJob
8+
class IndexNodes
9+
include Dry::Monads[:result, :do]
10+
include ManifoldApi::Deps[
11+
extrapolate_nodes: "text_sections.extrapolate_nodes",
12+
prune_nodes: "text_sections.prune_nodes",
13+
]
14+
15+
# @param [TextSection] text_section
16+
# @return [Dry::Monads::Result]
17+
def call(text_section)
18+
results = yield extrapolate_nodes.(text_section:)
19+
20+
results => { upserted:, }
21+
22+
pruned = yield prune_nodes.(text_section)
23+
24+
text_section.asynchronously_index_current_node_content!
25+
26+
counts = { upserted:, pruned:, }
27+
28+
Success counts
29+
end
30+
end
31+
end
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# frozen_string_literal: true
2+
3+
module TextSections
4+
class PruneNodes
5+
include Dry::Monads[:result]
6+
7+
# @param [TextSection] text_section
8+
# @return [Dry::Monads::Success(Integer)]
9+
def call(text_section)
10+
# :nocov:
11+
# We preserve old nodes if there are any annotations
12+
# for now. In the future, we need to determine if the
13+
# annotations actually reference the nodes being kept.
14+
return Success(0) if text_section.annotations.exists?
15+
# :nocov:
16+
17+
pruned = text_section.text_section_nodes.orphaned.delete_all
18+
19+
Success pruned
20+
end
21+
end
22+
end
Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,8 @@
11
# frozen_string_literal: true
22

33
class AddContainedContentHelperIndex < ActiveRecord::Migration[7.0]
4-
# Needed for CREATE INDEX CONCURRENTLY
5-
disable_ddl_transaction!
6-
74
def change
8-
change_table :text_section_nodes do |t|
9-
t.index %i[text_section_id body_hash node_path id],
10-
name: "index_text_section_nodes_contained_content_indexing",
11-
opclass: { node_path: "gist_ltree_ops(siglen=100)" },
12-
algorithm: :concurrently,
13-
using: :gist
14-
end
5+
# Intentionally left blank. Old migration file kept
6+
# but supplanted with ImproveTextSectionNodeIndex
157
end
168
end
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# frozen_string_literal: true
2+
3+
class ImproveTextSectionNodeIndex < ActiveRecord::Migration[7.0]
4+
# Needed for CREATE INDEX CONCURRENTLY
5+
disable_ddl_transaction!
6+
7+
def change
8+
reversible do |dir|
9+
dir.up do
10+
execute <<~SQL.squish
11+
DROP INDEX CONCURRENTLY IF EXISTS index_text_section_nodes_contained_content_indexing;
12+
SQL
13+
end
14+
end
15+
16+
change_table :text_section_nodes do |t|
17+
t.index %i[text_section_id body_hash node_path id],
18+
name: "index_text_section_nodes_contained_content_indexing",
19+
opclass: { node_path: "gist_ltree_ops(siglen=24)" },
20+
algorithm: :concurrently,
21+
using: :gist
22+
end
23+
end
24+
end

0 commit comments

Comments
 (0)