Skip to content

Commit 5162a47

Browse files
committed
remove external logging as csv file has smaller scope
1 parent dbc173b commit 5162a47

File tree

2 files changed

+9
-20
lines changed

2 files changed

+9
-20
lines changed

lib/tasks/detect_duplicates.rake

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,10 @@
11
# frozen_string_literal: true
22

33
require "csv"
4-
require "logger"
5-
64
namespace :duplicates do
75
desc "Detect and list duplicates according to Rule 3"
86
task detect: :environment do
9-
logger = Logger.new("log/isilon-duplicates-detect.log")
10-
log = lambda do |message|
11-
puts message
12-
logger.info(message)
13-
end
14-
15-
log.call("Starting Rule 3 duplicate detection...")
7+
puts "Starting Rule 3 duplicate detection..."
168

179
# Find all assets outside main areas with non-empty checksums
1810
output_path = "log/isilon-duplicate-paths.csv"
@@ -23,8 +15,8 @@ namespace :duplicates do
2315
slow_seconds = ENV.fetch("DUPLICATES_SLOW_SECONDS", "10").to_f
2416
large_group_size = ENV.fetch("DUPLICATES_LARGE_GROUP_SIZE", "20000").to_i
2517

26-
log.call("Scanning assets with matching checksums...")
27-
log.call("Processing in batches of #{batch_size}...")
18+
puts "Scanning assets with matching checksums..."
19+
puts "Processing in batches of #{batch_size}..."
2820

2921
main_volume_names = %w[Deposit Media-Repository]
3022

@@ -90,25 +82,25 @@ namespace :duplicates do
9082
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
9183
global_index = processed + index + 1
9284
if (global_index % log_every == 0) || elapsed >= slow_seconds || asset_ids.length >= large_group_size
93-
log.call("Processed checksum #{global_index}/#{duplicate_checksums.length} (assets=#{asset_ids.length}) in #{format('%.2f', elapsed)}s")
85+
puts "Processed checksum #{global_index}/#{duplicate_checksums.length} (assets=#{asset_ids.length}) in #{format('%.2f', elapsed)}s"
9486
end
9587
end
9688

9789
processed += checksum_batch.size
9890
GC.start
9991

10092
if processed % progress_interval == 0
101-
log.call("Processed #{processed} checksum groups...")
93+
puts "Processed #{processed} checksum groups..."
10294
end
10395

10496
batch_elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - batch_started_at
105-
log.call("Batch complete (#{checksum_batch.size} checksums) in #{format('%.2f', batch_elapsed)}s")
97+
puts "Batch complete (#{checksum_batch.size} checksums) in #{format('%.2f', batch_elapsed)}s"
10698
end
10799
end
108100

109-
log.call("\n✓ Complete!")
110-
log.call("Processed: #{processed} checksum groups")
111-
log.call("Duplicate paths exported to #{output_path} (#{written} rows)")
101+
puts "\n✓ Complete!"
102+
puts "Processed: #{processed} checksum groups"
103+
puts "Duplicate paths exported to #{output_path} (#{written} rows)"
112104
end
113105

114106
desc "Show duplicate statistics"

spec/tasks/duplicates_rake_spec.rb

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@
1919
let!(:deposit_folder) { create(:isilon_folder, volume: deposit_volume, full_path: "/Deposit/project") }
2020
let!(:media_folder) { create(:isilon_folder, volume: media_volume, full_path: "/Media-Repository/project") }
2121
let(:export_path) { Rails.root.join("log/isilon-duplicate-paths.csv") }
22-
let(:detect_log_path) { Rails.root.join("log/isilon-duplicates-detect.log") }
23-
2422
after do
2523
File.delete(export_path) if File.exist?(export_path)
26-
File.delete(detect_log_path) if File.exist?(detect_log_path)
2724
end
2825

2926
it "groups assets with matching checksums" do

0 commit comments

Comments
 (0)