@@ -280,44 +280,38 @@ def automated_validation
280
280
# IMPORTANT: Notes are soft-registered, remember to +save+ to make them
281
281
# persistent
282
282
def check_pdf_files
283
- has_acc = false
284
283
inames = Hash [ names . map { |n | [ n , false ] } ]
285
284
anames = Hash [ names . map { |n | [ n , false ] } ]
286
285
[ publication_pdf , supplementary_pdf ] . each do |as |
287
- break if has_acc && inames . values . all?
288
286
next unless as . attached?
287
+ break if anames . values . all? && inames . values . all?
289
288
290
289
as . open do |file |
291
290
render = PDF ::Reader . new ( file . path )
292
291
render . pages . each do |page |
293
- txt = page . text
294
- has_acc = true if txt . index ( accession )
295
- inames . each_key do |n |
296
- bn = n . base_name
297
- cn = n . corrigendum_from
298
- inames [ n ] = true if txt . index ( bn ) || ( cn && txt . index ( cn ) )
299
- anames [ n ] = true if txt . index ( n . seqcode_url ( false ) )
292
+ txt = page . text . unicode_normalize ( :nfkc )
293
+ anames . each { |n , _ | anames [ n ] = true } if txt . index ( accession )
294
+ names . each do |n |
295
+ inames [ n ] ||= n . pdf_variants . find { |i | txt . index ( i ) } . present?
296
+ anames [ n ] ||= txt . index ( n . seqcode_url ( false ) ) . present?
300
297
end
301
- break if ( has_acc || anames . values . all? ) && inames . values . all?
298
+ break if anames . values . all? && inames . values . all?
302
299
end
303
300
end
304
301
end
305
302
306
303
names . each do |n |
307
- v = has_acc || anames [ n ]
308
- Check . create_with ( pass : v ) . find_or_create_by (
304
+ Check . create_with ( pass : anames [ n ] ) . find_or_create_by (
309
305
name : n , kind : :effective_publication_missing_accession
310
- ) . update ( pass : v )
306
+ ) . update ( pass : anames [ n ] )
311
307
312
- v = inames [ n ]
313
- Check . create_with ( pass : v ) . find_or_create_by (
308
+ Check . create_with ( pass : inames [ n ] ) . find_or_create_by (
314
309
name : n , kind : :name_missing_in_effective_publication
315
- ) . update ( pass : v )
310
+ ) . update ( pass : inames [ n ] )
316
311
end
317
312
318
313
add_note ( 'The effective publication files have been parsed' )
319
-
320
- has_acc && inames . values . all?
314
+ anames . values . all? && inames . values . all?
321
315
rescue => e
322
316
add_note ( 'ERROR: The effective publication files could not be parsed' )
323
317
raise e
0 commit comments