Skip to content

Commit e5ec140

Browse files
authored
Merge pull request #62 from metacpan/mickey/es_syntax
More ES syntax updates
2 parents 8cc107e + 526f702 commit e5ec140

File tree

12 files changed

+169
-145
lines changed

12 files changed

+169
-145
lines changed

bin/backpan.pl

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,17 @@ ()
3838
log_info {"find_releases"};
3939

4040
my $scroll = $es_release->scroll(
41-
fields => [qw< author archive name >],
42-
body => get_release_query(),
41+
body => {
42+
%{ get_release_query() },
43+
size => 500,
44+
_source => [qw< author archive name >],
45+
},
4346
);
4447

4548
while ( my $release = $scroll->next ) {
46-
my $author = $release->{fields}{author}[0];
47-
my $archive = $release->{fields}{archive}[0];
48-
my $name = $release->{fields}{name}[0];
49+
my $author = $release->{_source}{author};
50+
my $archive = $release->{_source}{archive};
51+
my $name = $release->{_source}{name};
4952
next unless $name; # bypass some broken releases
5053

5154
$release_status{$author}{$name} = [
@@ -64,8 +67,10 @@ ()
6467
unless ($undo) {
6568
return +{
6669
query => {
67-
not => { term => { status => 'backpan' } }
68-
}
70+
bool => {
71+
must_not => [ { term => { status => 'backpan' } }, ],
72+
},
73+
},
6974
};
7075
}
7176

@@ -118,23 +123,24 @@ ( $author, $author_releases )
118123

119124
my $scroll_file = $es_file->scroll(
120125
scroll => '5m',
121-
fields => [qw< release >],
122126
body => {
123127
query => {
124128
bool => {
125129
must => [
126130
{ term => { author => $author } },
127-
{ terms => { release => $author_releases } }
128-
]
129-
}
130-
}
131+
{ terms => { release => $author_releases } },
132+
],
133+
},
134+
},
135+
size => 500,
136+
_source => [qw< release >],
131137
},
132138
);
133139

134140
$bulk{file} ||= $es_file->bulk( timeout => '5m' );
135141

136142
while ( my $file = $scroll_file->next ) {
137-
my $release = $file->{fields}{release}[0];
143+
my $release = $file->{_source}{release};
138144
$bulk{file}->update( {
139145
id => $file->{_id},
140146
doc => {

bin/backup.pl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
use Try::Tiny qw< catch try >;
1313

1414
use MetaCPAN::ES;
15-
use MetaCPAN::Ingest qw< home >;
15+
use MetaCPAN::Ingest qw< home true >;
1616

1717
# config
1818

@@ -82,7 +82,7 @@ ()
8282
$bulk_store{$key} ||= $es->bulk( max_count => $batch_size );
8383
my $bulk = $bulk_store{$key};
8484

85-
my $parent = $raw->{fields}{_parent};
85+
my $parent = $raw->{_parent};
8686

8787
if ( $raw->{_type} eq 'author' ) {
8888

@@ -169,9 +169,12 @@ sub run_backup {
169169
( $type ? ( type => $type ) : () )
170170
);
171171
my $scroll = $es->scroll(
172-
size => $size,
173-
fields => [qw< _parent _source >],
174172
scroll => '1m',
173+
body => {
174+
_source => true,
175+
size => $size,
176+
sort => '_doc',
177+
},
175178
);
176179

177180
log_info { 'Backing up ', $scroll->total, ' documents' };

bin/check.pl

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,7 @@
3838

3939
# look up this module in ElasticSearch and see what we have on it
4040
my $results = $es_file->search(
41-
size => 100, # shouldn't get more than this
42-
fields => [
43-
qw< name release author distribution version authorized indexed maturity date >
44-
],
45-
query => {
41+
query => {
4642
bool => {
4743
must => [
4844
{ term => { 'module.name' => $pkg } },
@@ -51,22 +47,38 @@
5147
],
5248
},
5349
},
50+
size => 100, # shouldn't get more than this
51+
_source => [ qw<
52+
name
53+
release
54+
author
55+
distribution
56+
version
57+
authorized
58+
indexed
59+
maturity
60+
date
61+
> ],
62+
5463
);
5564
my @files = @{ $results->{hits}{hits} };
5665

5766
# now find the first latest releases for these files
5867
foreach my $file (@files) {
5968
my $release_results = $es_release->search(
60-
size => 1,
61-
fields => [qw< name status authorized version id date >],
62-
query => {
69+
query => {
6370
bool => {
6471
must => [
65-
{ term => { name => $file->{fields}{release} } },
72+
{
73+
term =>
74+
{ name => $file->{_source}{release} }
75+
},
6676
{ term => { status => 'latest' } },
6777
],
6878
},
6979
},
80+
size => 1,
81+
_source => [qw< name status authorized version id date >],
7082
);
7183

7284
push @releases, $release_results->{hits}{hits}[0]
@@ -78,16 +90,20 @@
7890
if ( !@releases ) {
7991
foreach my $file (@files) {
8092
my $release_results = $es_release->search(
81-
size => 1,
82-
fields =>
83-
[qw< name status authorized version id date >],
84-
query => {
93+
query => {
8594
bool => {
8695
must => [
87-
{ term => { name => $file->{fields}{release} } },
96+
{
97+
term => {
98+
name => $file->{_source}{release}
99+
}
100+
},
88101
],
89102
},
90103
},
104+
size => 1,
105+
_source =>
106+
[qw< name status authorized version id date >],
91107
);
92108

93109
push @releases, @{ $release_results->{hits}{hits} };
@@ -97,22 +113,22 @@
97113
# if we found the releases tell them about it
98114
if (@releases) {
99115
if ( @releases == 1
100-
and $releases[0]->{fields}{status} eq 'latest' )
116+
and $releases[0]->{_source}{status} eq 'latest' )
101117
{
102118
log_info {
103-
"Found latest release $releases[0]->{fields}{name} for $pkg"
119+
"Found latest release $releases[0]->{_source}{name} for $pkg"
104120
}
105121
unless $errors_only;
106122
}
107123
else {
108124
log_error {"Could not find latest release for $pkg"};
109125
foreach my $rel (@releases) {
110-
log_warn {" Found release $rel->{fields}{name}"};
111-
log_warn {" STATUS : $rel->{fields}{status}"};
126+
log_warn {" Found release $rel->{_source}{name}"};
127+
log_warn {" STATUS : $rel->{_source}{status}"};
112128
log_warn {
113-
" AUTORIZED : $rel->{fields}{authorized}"
129+
" AUTORIZED : $rel->{_source}{authorized}"
114130
};
115-
log_warn {" DATE : $rel->{fields}{date}"};
131+
log_warn {" DATE : $rel->{_source}{date}"};
116132
}
117133

118134
$error_count++;
@@ -123,13 +139,13 @@
123139
"Module $pkg doesn't have any releases in ElasticSearch!"
124140
};
125141
foreach my $file (@files) {
126-
log_warn {" Found file $file->{fields}{name}"};
127-
log_warn {" RELEASE : $file->{fields}{release}"};
128-
log_warn {" AUTHOR : $file->{fields}{author}"};
142+
log_warn {" Found file $file->{_source}{name}"};
143+
log_warn {" RELEASE : $file->{_source}{release}"};
144+
log_warn {" AUTHOR : $file->{_source}{author}"};
129145
log_warn {
130-
" AUTHORIZED : $file->{fields}{authorized}"
146+
" AUTHORIZED : $file->{_source}{authorized}"
131147
};
132-
log_warn {" DATE : $file->{fields}{date}"};
148+
log_warn {" DATE : $file->{_source}{date}"};
133149
}
134150
$error_count++;
135151
}

bin/checksum.pl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@
3232
not => {
3333
exists => {
3434
field => "checksum_md5"
35-
}
36-
}
37-
}
35+
},
36+
},
37+
},
38+
_source => [qw< id name download_url >],
3839
},
39-
fields => [qw< id name download_url >],
4040
);
4141

4242
log_warn { "Found " . $scroll->total . " releases" };
@@ -50,11 +50,11 @@
5050
last;
5151
}
5252

53-
log_info { "Adding checksums for " . $p->{fields}{name}[0] };
53+
log_info { "Adding checksums for " . $p->{_source}{name} };
5454

55-
if ( my $download_url = $p->{fields}{download_url} ) {
55+
if ( my $download_url = $p->{_source}{download_url} ) {
5656
my $file
57-
= cpan_dir . "/authors" . $p->{fields}{download_url}[0]
57+
= cpan_dir . "/authors" . $p->{_source}{download_url}
5858
=~ s/^.*authors//r;
5959
my $checksum_md5 = digest_file_hex( $file, 'MD5' );
6060
my $checksum_sha256 = digest_file_hex( $file, 'SHA-256' );
@@ -75,7 +75,7 @@
7575
}
7676
}
7777
else {
78-
log_info { $p->{fields}{name}[0] . " is missing a download_url" };
78+
log_info { $p->{_source}{name} . " is missing a download_url" };
7979
}
8080
}
8181

bin/cve.pl

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -134,32 +134,29 @@
134134

135135
if (@filters) {
136136
my $query = {
137-
query => {
138-
bool => {
139-
must => [
140-
{ term => { distribution => $dist } }, @filters,
141-
]
142-
}
143-
},
137+
bool => {
138+
must =>
139+
[ { term => { distribution => $dist } }, @filters, ]
140+
}
144141
};
145142

146143
my $releases = $es->search(
147-
index => 'cpan',
148-
type => 'release',
149-
body => $query,
150-
fields => [ "version", "name", "author", ],
151-
size => 2000,
144+
index => 'cpan',
145+
type => 'release',
146+
body => {
147+
query => $query,
148+
_source => [qw< version name author >],
149+
size => 2000,
150+
},
152151
);
153152

154153
if ( $releases->{hits}{total} ) {
155154
## no critic (ControlStructures::ProhibitMutatingListFunctions)
156155
@matches = map { $_->[0] }
157156
sort { $a->[1] <=> $b->[1] }
158-
map {
159-
my %fields = %{ $_->{fields} };
160-
ref $_ and $_ = $_->[0] for values %fields;
161-
[ \%fields, numify_version( $fields{version} ) ];
162-
} @{ $releases->{hits}{hits} };
157+
map { [ $_->{_source},
158+
numify_version( $_->{_source}{version} ) ] }
159+
@{ $releases->{hits}{hits} };
163160
}
164161
else {
165162
log_debug {

0 commit comments

Comments
 (0)