Skip to content

Commit a3cbdd9

Browse files
authored
Batch xml generation (#403)
2 parents 096787e + 85c53b8 commit a3cbdd9

File tree

6 files changed

+37
-18
lines changed

6 files changed

+37
-18
lines changed

app/jobs/file_upload_job.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class FileUploadJob < ApplicationJob
22
# Consider removing concurrency limits due to SolidQueue blocking issues
33
# or use a more specific key to avoid blocking all jobs for a language
4-
limits_concurrency key: ->(language_id, content_id, content_type) { "#{language_id}-#{content_type}-#{content_id}" }
4+
limits_concurrency to: 3, key: ->(_language_id, content_id, _content_type) { "hard-limit" }
55

66
retry_on AzureFileShares::Errors::ApiError, wait: :exponentially_longer, attempts: 3
77
retry_on Timeout::Error, wait: :exponentially_longer, attempts: 2

app/jobs/language_files_job.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
class LanguageFilesJob < ApplicationJob
2+
limits_concurrency to: 1, key: ->(language_id) { "hard-limit" }
3+
24
def perform(language_id)
35
language = Language.find(language_id)
46
LanguageContentProcessor.new(language).perform

app/services/xml_generator/all_providers.rb

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,22 @@ def initialize(language, **args)
77
attr_reader :language, :args
88

99
def xml_content(xml)
10-
language.providers
11-
.select("providers.id, providers.name, topics.id AS topic_id, topics.title AS topic_title, topics.created_at AS topic_created_at")
12-
.joins(:topics)
13-
.merge(Topic.with_attached_documents)
14-
.each do |provider|
15-
xml << provider_xml(provider)
10+
# Avoid building a massive join result; iterate provider ids in small slices.
11+
ActiveRecord::Base.uncached do
12+
provider_ids_in_language_in_batches do |ids|
13+
Provider.where(id: ids).order(:id).each do |provider|
14+
xml << provider_xml(provider) # provider_xml should eager-load topics/attachments per provider
15+
end
1616
end
17+
end
18+
end
19+
20+
private
21+
22+
# Yields slices of provider ids that have topics in this language.
23+
def provider_ids_in_language_in_batches(batch_size: 500)
24+
Topic.where(language_id: language.id).distinct.pluck(:provider_id).each_slice(batch_size) do |ids|
25+
yield ids
26+
end
1727
end
1828
end

app/services/xml_generator/single_provider.rb

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ def provider_xml(provider)
3535
end
3636
end
3737
end
38-
title_element << Ox::Element.new("topic_tags").tap { |tags| tags << topic.current_tags_list.join(", ") }
38+
title_element << Ox::Element.new("topic_tags").tap do |tags|
39+
names = topic.taggings.map { |tg| tg.tag&.name }.compact.uniq
40+
tags << names.join(", ")
41+
end
3942
end
4043
end
4144
end
@@ -45,12 +48,16 @@ def provider_xml(provider)
4548
end
4649

4750
def grouped_topics(prov)
48-
topic_scope(prov).group_by { |topic| [ topic.created_at.year, topic.created_at.strftime("%m_%B") ] }
51+
topic_scope(prov).group_by { |topic| [ topic.published_at.year, topic.published_at.strftime("%m_%B") ] }
4952
end
5053

5154
def topic_scope(prov)
52-
return prov.topics.where("created_at > ?", 1.month.ago) if args.fetch(:recent, false)
53-
54-
prov.topics
55+
scope = prov.topics
56+
scope = scope.where("published_at > ?", 1.month.ago) if args.fetch(:recent, false)
57+
scope
58+
.select(:id, :title, :published_at, :language_id, :provider_id)
59+
.includes(:language, taggings: :tag) # eager-load language and taggings->tag
60+
.with_attached_documents # eager-load Active Storage attachments + blobs
61+
.order(published_at: :desc)
5562
end
5663
end

spec/services/xml_generator/all_providers_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
<?xml version="1.0"?>
2525
<cmes>
2626
<content_provider name="#{provider1.name}">
27-
<topic_year year="#{topic1.created_at.year}">
28-
<topic_month month="#{topic1.created_at.strftime("%m_%B")}">
27+
<topic_year year="#{topic1.published_at.year}">
28+
<topic_month month="#{topic1.published_at.strftime("%m_%B")}">
2929
<title name="#{topic1.title}">
3030
<topic_id>#{topic1.id}</topic_id>
3131
<topic_files files="Files"/>
@@ -35,8 +35,8 @@
3535
</topic_year>
3636
</content_provider>
3737
<content_provider name="#{provider2.name}">
38-
<topic_year year="#{topic2.created_at.year}">
39-
<topic_month month="#{topic2.created_at.strftime("%m_%B")}">
38+
<topic_year year="#{topic2.published_at.year}">
39+
<topic_month month="#{topic2.published_at.strftime("%m_%B")}">
4040
<title name="#{topic2.title}">
4141
<topic_id>#{topic2.id}</topic_id>
4242
<topic_files files="Files">

spec/services/xml_generator/single_provider_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040
<?xml version="1.0"?>
4141
<cmes>
4242
<content_provider name="#{provider.name}">
43-
<topic_year year="#{topic.created_at.year}">
44-
<topic_month month="#{topic.created_at.strftime("%m_%B")}">
43+
<topic_year year="#{topic.published_at.year}">
44+
<topic_month month="#{topic.published_at.strftime("%m_%B")}">
4545
<title name="#{topic.title}">
4646
<topic_id>#{topic.id}</topic_id>
4747
<topic_files files="Files">

0 commit comments

Comments
 (0)