Skip to content

Commit 37deb39

Browse files
authored
Limit topic scope by language (#414)
2 parents aa23059 + 2b84143 commit 37deb39

File tree

3 files changed

+68
-1
lines changed

3 files changed

+68
-1
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ db/structure.sql
1818
!/log/.keep
1919
!/tmp/.keep
2020

21+
.trunk/
22+
2123
# Ignore pidfiles, but keep the directory.
2224
/tmp/pids/*
2325
!/tmp/pids/

app/services/xml_generator/single_provider.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ def grouped_topics(prov)
6363
end
6464

6565
def topic_scope(prov)
66-
scope = prov.topics
66+
scope = prov.topics
67+
# When invoked from AllProviders, restrict topics to that language
68+
scope = scope.where(language_id: language.id) if respond_to?(:language) && language.present?
6769
scope = scope.where("published_at > ?", 1.month.ago) if args.fetch(:recent, false)
6870
scope
6971
.select(:id, :title, :published_at, :language_id, :provider_id)

spec/services/xml_generator/all_providers_spec.rb

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,67 @@
6262
expect(tnode.at_xpath("./topic_tags").text).to eq(topic.current_tags_list.join(", "))
6363
end
6464
end
65+
66+
it "does not duplicate topic_year per provider across batches" do
67+
# Create providers and topics spanning multiple months/years for one provider
68+
provider3 = create(:provider)
69+
70+
# Provider 1: multiple topics in same year and month + another month and another year
71+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-01-15"))
72+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-01-20"))
73+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-03-05"))
74+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2023-07-01"))
75+
76+
# Other providers to force multiple batches
77+
create(:topic, language:, provider: provider2, published_at: Time.zone.parse("2023-01-01"))
78+
create(:topic, language:, provider: provider3, published_at: Time.zone.parse("2024-02-01"))
79+
80+
# Stub batching to yield two slices, simulating multiple provider-id batches
81+
allow_any_instance_of(described_class)
82+
.to receive(:provider_ids_in_language_in_batches)
83+
.and_yield([ provider1.id, provider2.id ])
84+
.and_yield([ provider3.id ])
85+
86+
xml = subject.perform
87+
doc = Nokogiri::XML(xml)
88+
89+
pnode = doc.at_xpath("//cmes/content_provider[@name='#{provider1.name}']")
90+
expect(pnode).to be_present
91+
92+
years = pnode.xpath("./topic_year/@year").map(&:value)
93+
# Expect no duplicate year nodes for the provider
94+
expect(years.tally.values.max).to eq(1)
95+
96+
# Specifically ensure 2024 appears once with months Jan and Mar once each
97+
y2024 = pnode.at_xpath("./topic_year[@year='2024']")
98+
expect(y2024).to be_present
99+
100+
months_2024 = y2024.xpath("./topic_month/@month").map(&:value)
101+
expect(months_2024).to include("01_January", "03_March")
102+
expect(months_2024.tally["01_January"]).to eq(1)
103+
expect(months_2024.tally["03_March"]).to eq(1)
104+
end
105+
106+
it "does not duplicate topic_month within a year for a provider" do
107+
# Create multiple topics for provider1 in the same month/year and another month in the same year
108+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-02-01"))
109+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-02-10"))
110+
create(:topic, language:, provider: provider1, published_at: Time.zone.parse("2024-03-05"))
111+
112+
xml = subject.perform
113+
doc = Nokogiri::XML(xml)
114+
115+
pnode = doc.at_xpath("//cmes/content_provider[@name='#{provider1.name}']")
116+
expect(pnode).to be_present
117+
118+
y2024 = pnode.at_xpath("./topic_year[@year='2024']")
119+
expect(y2024).to be_present
120+
121+
months = y2024.xpath("./topic_month/@month").map(&:value)
122+
# months should be unique (no duplicate month nodes)
123+
expect(months.uniq.size).to eq(months.size)
124+
expect(months).to include("02_February", "03_March")
125+
expect(months.tally["02_February"]).to eq(1)
126+
expect(months.tally["03_March"]).to eq(1)
127+
end
65128
end

0 commit comments

Comments
 (0)