Skip to content

Commit 614f88b

Browse files
committed
CV2-6038: pass team_slug and batch_size to rake task (#2245)
1 parent dec6bf1 commit 614f88b

File tree

1 file changed

+30
-6
lines changed

1 file changed

+30
-6
lines changed

lib/tasks/migrate/20230130074014_add_mapping_for_keyword_search_fields.rake

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,45 @@
11
namespace :check do
22
namespace :migrate do
3-
task index_fact_check_fields: :environment do
3+
def parse_args(args)
4+
output = {}
5+
return output if args.blank?
6+
args.each do |a|
7+
arg = a.split('&')
8+
arg.each do |pair|
9+
key, value = pair.split(':')
10+
output.merge!({ key => value })
11+
end
12+
end
13+
output
14+
end
15+
# bundle exec rails check:migrate:index_fact_check_fields['slug:team_slug&batch_size:batch_size']
16+
task index_fact_check_fields: :environment do |_t, args|
417
# This rake task to index the following fields
518
# 1) claim_description [content, context]
619
# 2) fact_check [title, summary, url, language]
720
started = Time.now.to_i
21+
data_args = parse_args args.extras
22+
batch_size = data_args['batch_size'] || 500
23+
batch_size = batch_size.to_i
24+
# Add team condition
25+
slug = data_args['slug']
26+
team_condition = {}
27+
if slug.blank?
28+
last_team_id = Rails.cache.read('check:migrate:index_fact_check_fields:team_id') || 0
29+
else
30+
last_team_id = 0
31+
team_condition = { slug: slug }
32+
end
833
index_alias = CheckElasticSearchModel.get_index_alias
934
client = $repository.client
10-
last_team_id = Rails.cache.read('check:migrate:index_fact_check_fields:team_id') || 0
11-
Team.where('id > ?', last_team_id).find_each do |team|
12-
team.claim_descriptions.joins(:project_media).find_in_batches(:batch_size => 1000) do |cds|
35+
Team.where('id > ?', last_team_id).where(team_condition).find_each do |team|
36+
team.claim_descriptions.joins(:project_media).find_in_batches(:batch_size => batch_size) do |cds|
1337
es_body = []
1438
ids = cds.map(&:id)
1539
ClaimDescription.select('claim_descriptions.project_media_id as pm_id, claim_descriptions.description, claim_descriptions.context, fact_checks.*')
1640
.where(id: ids)
1741
.joins(:fact_check)
18-
.find_in_batches(:batch_size => 1000) do |items|
42+
.find_in_batches(:batch_size => batch_size) do |items|
1943
print '.'
2044
items.each do |item|
2145
doc_id = Base64.encode64("ProjectMedia/#{item['pm_id']}")
@@ -32,7 +56,7 @@ namespace :check do
3256
end
3357
client.bulk body: es_body unless es_body.blank?
3458
end
35-
Rails.cache.write('check:migrate:index_fact_check_fields:team_id', team.id)
59+
Rails.cache.write('check:migrate:index_fact_check_fields:team_id', team.id) if slug.blank?
3660
end
3761
minutes = ((Time.now.to_i - started) / 60).to_i
3862
puts "[#{Time.now}] Done in #{minutes} minutes."

0 commit comments

Comments
 (0)