11namespace :check do
22 namespace :migrate do
3- task index_fact_check_fields : :environment do
3+ def parse_args ( args )
4+ output = { }
5+ return output if args . blank?
6+ args . each do |a |
7+ arg = a . split ( '&' )
8+ arg . each do |pair |
9+ key , value = pair . split ( ':' )
10+ output . merge! ( { key => value } )
11+ end
12+ end
13+ output
14+ end
15+ # bundle exec rails check:migrate:index_fact_check_fields['slug:team_slug&batch_size:batch_size']
16+ task index_fact_check_fields : :environment do |_t , args |
417 # This rake task to index the following fields
518 # 1) claim_description [content, context]
619 # 2) fact_check [title, summary, url, language]
720 started = Time . now . to_i
21+ data_args = parse_args args . extras
22+ batch_size = data_args [ 'batch_size' ] || 500
23+ batch_size = batch_size . to_i
24+ # Add team condition
25+ slug = data_args [ 'slug' ]
26+ team_condition = { }
27+ if slug . blank?
28+ last_team_id = Rails . cache . read ( 'check:migrate:index_fact_check_fields:team_id' ) || 0
29+ else
30+ last_team_id = 0
31+ team_condition = { slug : slug }
32+ end
833 index_alias = CheckElasticSearchModel . get_index_alias
934 client = $repository. client
10- last_team_id = Rails . cache . read ( 'check:migrate:index_fact_check_fields:team_id' ) || 0
11- Team . where ( 'id > ?' , last_team_id ) . find_each do |team |
12- team . claim_descriptions . joins ( :project_media ) . find_in_batches ( :batch_size => 1000 ) do |cds |
35+ Team . where ( 'id > ?' , last_team_id ) . where ( team_condition ) . find_each do |team |
36+ team . claim_descriptions . joins ( :project_media ) . find_in_batches ( :batch_size => batch_size ) do |cds |
1337 es_body = [ ]
1438 ids = cds . map ( &:id )
1539 ClaimDescription . select ( 'claim_descriptions.project_media_id as pm_id, claim_descriptions.description, claim_descriptions.context, fact_checks.*' )
1640 . where ( id : ids )
1741 . joins ( :fact_check )
18- . find_in_batches ( :batch_size => 1000 ) do |items |
42+ . find_in_batches ( :batch_size => batch_size ) do |items |
1943 print '.'
2044 items . each do |item |
2145 doc_id = Base64 . encode64 ( "ProjectMedia/#{ item [ 'pm_id' ] } " )
@@ -32,7 +56,7 @@ namespace :check do
3256 end
3357 client . bulk body : es_body unless es_body . blank?
3458 end
35- Rails . cache . write ( 'check:migrate:index_fact_check_fields:team_id' , team . id )
59+ Rails . cache . write ( 'check:migrate:index_fact_check_fields:team_id' , team . id ) if slug . blank?
3660 end
3761 minutes = ( ( Time . now . to_i - started ) / 60 ) . to_i
3862 puts "[#{ Time . now } ] Done in #{ minutes } minutes."
0 commit comments