Skip to content

Commit 6b2f30e

Browse files
committed
Added favorite script
1 parent a858be2 commit 6b2f30e

File tree

1 file changed

+216
-0
lines changed

1 file changed

+216
-0
lines changed

bin/favorite.pl

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
use Digest::file qw< digest_file_hex >;
7+
use MetaCPAN::Logger qw< :log :dlog >;
8+
9+
use MetaCPAN::ES;
10+
use MetaCPAN::Ingest qw< minion >;
11+
12+
# args
13+
my ( $age, $check_missing, $count, $distribution, $limit, $queue );
14+
my $dry_run;
15+
GetOptions(
16+
"age=i" => \$age,
17+
"check_missing" => \$check_missing,
18+
"count=i" => \$count,
19+
"distribution=s" => \$distribution,
20+
"limit=i" => \$limit,
21+
"queue" => \$queue,
22+
);
23+
24+
if ( $count and !$distribution ) {
25+
die
26+
"Cannot set count in a distribution search mode, this flag only applies to a single distribution. please use together with --distribution DIST";
27+
}
28+
29+
if ( $check_missing and $distribution ) {
30+
die
31+
"check_missing doesn't work in filtered mode - please remove other flags";
32+
}
33+
34+
index_favorites();
35+
36+
log_info {'done'};
37+
38+
1;
39+
40+
###
41+
42+
sub index_favorites () {
43+
my $body;
44+
my $age_filter;
45+
46+
if ($age) {
47+
$age_filter
48+
= { range => { date => { gte => sprintf( 'now-%dm', $age ) } } };
49+
}
50+
51+
if ($distribution) {
52+
$body = {
53+
query => {
54+
term => { distribution => $distribution }
55+
}
56+
};
57+
}
58+
elsif ($age) {
59+
my $es = MetaCPAN::ES->new( type => "favorite" );
60+
my $favs = $es->scroll(
61+
scroll => '5m',
62+
fields => [qw< distribution >],
63+
body => {
64+
query => $age_filter,
65+
( $limit ? ( size => $limit ) : () )
66+
}
67+
);
68+
69+
my %recent_dists;
70+
71+
while ( my $fav = $favs->next ) {
72+
my $dist = $fav->{fields}{distribution}[0];
73+
$recent_dists{$dist}++ if $dist;
74+
}
75+
76+
my @keys = keys %recent_dists;
77+
if (@keys) {
78+
$body = {
79+
query => {
80+
terms => { distribution => \@keys }
81+
}
82+
};
83+
}
84+
$es->index_refresh;
85+
}
86+
87+
# get total fav counts for distributions
88+
89+
my %dist_fav_count;
90+
91+
if ($count) {
92+
$dist_fav_count{$distribution} = $count;
93+
}
94+
else {
95+
my $es = MetaCPAN::ES->new( type => "favorite" );
96+
my $favs = $es->scroll(
97+
scroll => '30s',
98+
fields => [qw< distribution >],
99+
( $body ? ( body => $body ) : () ),
100+
);
101+
102+
while ( my $fav = $favs->next ) {
103+
my $dist = $fav->{fields}{distribution}[0];
104+
$dist_fav_count{$dist}++ if $dist;
105+
}
106+
107+
$es->index_refresh;
108+
log_debug {"Done counting favs for distributions"};
109+
}
110+
111+
# Report missing distributions if requested
112+
113+
if ($check_missing) {
114+
my %missing;
115+
my @age_filter;
116+
if ($age) {
117+
@age_filter = ( must => [$age_filter] );
118+
}
119+
120+
my $es = MetaCPAN::ES->new( type => "file" );
121+
my $files = $es->scroll(
122+
scroll => '15m',
123+
fields => [qw< id distribution >],
124+
size => 500,
125+
body => {
126+
query => {
127+
bool => {
128+
must_not => [
129+
{ range => { dist_fav_count => { gte => 1 } } }
130+
],
131+
@age_filter,
132+
}
133+
}
134+
},
135+
);
136+
137+
while ( my $file = $files->next ) {
138+
my $dist = $file->{fields}{distribution}[0];
139+
next unless $dist;
140+
next if exists $missing{$dist} or exists $dist_fav_count{$dist};
141+
142+
if ($queue) {
143+
log_debug {"Queueing: $dist"};
144+
my $minion = minion();
145+
146+
my @count_flag;
147+
if ( $count or $dist_fav_count{$dist} ) {
148+
@count_flag
149+
= ( '--count', $count || $dist_fav_count{$dist} );
150+
}
151+
152+
$minion->enqueue(
153+
index_favorite =>
154+
[ '--distribution', $dist, @count_flag ],
155+
{ priority => 0, attempts => 10 }
156+
);
157+
}
158+
else {
159+
log_debug {"Found missing: $dist"};
160+
}
161+
162+
$missing{$dist} = 1;
163+
last if $limit and scalar( keys %missing ) >= $limit;
164+
}
165+
166+
my $total_missing = scalar( keys %missing );
167+
log_debug {"Total missing: $total_missing"} unless $queue;
168+
169+
$es->index_refresh;
170+
return;
171+
}
172+
173+
# Update fav counts for files per distributions
174+
175+
for my $dist ( keys %dist_fav_count ) {
176+
log_debug {"Dist $dist"};
177+
178+
if ($queue) {
179+
my $minion = minion();
180+
$minion->enqueue(
181+
index_favorite => [
182+
'--distribution', $dist, '--count',
183+
( $count ? $count : $dist_fav_count{$dist} )
184+
],
185+
{ priority => 0, attempts => 10 }
186+
);
187+
}
188+
else {
189+
my $es = MetaCPAN::ES->new( type => "file" );
190+
my $bulk = $es->bulk( timeout => '120m' );
191+
my $files = $es->scroll(
192+
scroll => '15s',
193+
fields => [qw< id >],
194+
body => {
195+
query => { term => { distribution => $dist } }
196+
},
197+
);
198+
199+
while ( my $file = $files->next ) {
200+
my $id = $file->{fields}{id}[0];
201+
my $cnt = $dist_fav_count{$dist};
202+
203+
log_debug {"Updating file id $id with fav_count $cnt"};
204+
205+
$bulk->update( {
206+
id => $file->{fields}{id}[0],
207+
doc => { dist_fav_count => $cnt },
208+
} );
209+
}
210+
211+
$bulk->flush;
212+
}
213+
}
214+
}
215+
216+
1;

0 commit comments

Comments
 (0)