|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +require 'benchmark' |
| 4 | +require 'logger' |
| 5 | +require 'open3' |
| 6 | +require 'pathname' |
| 7 | +require_relative '../lib/receiptisan' |
| 8 | + |
| 9 | +# rubocop:disable Metrics/ModuleLength |
| 10 | +module MasterLoadingBenchmark |
| 11 | + VERSION = Receiptisan::Model::ReceiptComputer::Master::Version::V2024_R06 |
| 12 | + MASTER = Receiptisan::Model::ReceiptComputer::Master |
| 13 | + LOADER_TYPES = %i[ |
| 14 | + shinryou_koui |
| 15 | + iyakuhin |
| 16 | + tokutei_kizai |
| 17 | + comment |
| 18 | + shoubyoumei |
| 19 | + shuushokugo |
| 20 | + ].freeze |
| 21 | + |
| 22 | + class ForeachCollector |
| 23 | + Result = Struct.new(:loader, :io_and_encoding, :string_processing, :object_building, :lines, keyword_init: true) |
| 24 | + |
| 25 | + def initialize |
| 26 | + @results = Hash.new do | hash, key | |
| 27 | + hash[key] = Result.new( |
| 28 | + loader: key, |
| 29 | + io_and_encoding: 0.0, |
| 30 | + string_processing: 0.0, |
| 31 | + object_building: 0.0, |
| 32 | + lines: 0 |
| 33 | + ) |
| 34 | + end |
| 35 | + end |
| 36 | + |
| 37 | + def record(loader_name, io_and_encoding:, string_processing:, object_building:, lines:) |
| 38 | + result = @results[loader_name] |
| 39 | + result.io_and_encoding += io_and_encoding |
| 40 | + result.string_processing += string_processing |
| 41 | + result.object_building += object_building |
| 42 | + result.lines += lines |
| 43 | + end |
| 44 | + |
| 45 | + def to_a |
| 46 | + @results.values.sort_by(&:loader) |
| 47 | + end |
| 48 | + end |
| 49 | + |
| 50 | + module ForeachProfiler |
| 51 | + class << self |
| 52 | + attr_accessor :collector |
| 53 | + end |
| 54 | + end |
| 55 | + |
| 56 | + module LoaderTraitInstrument |
| 57 | + def foreach(csv_paths) |
| 58 | + logger.info 'prepare to load following CSV %d files:' % csv_paths.length |
| 59 | + logger.info csv_paths.map(&:to_path) |
| 60 | + |
| 61 | + io_and_encoding = 0.0 |
| 62 | + string_processing = 0.0 |
| 63 | + object_building = 0.0 |
| 64 | + lines = 0 |
| 65 | + |
| 66 | + csv_paths.each do | csv_path | |
| 67 | + load_path, read_encoding = resolve_load_path(csv_path) |
| 68 | + contents = nil |
| 69 | + io_and_encoding += Benchmark.realtime do |
| 70 | + contents = File.read(load_path, mode: "r:#{read_encoding}:UTF-8") |
| 71 | + end |
| 72 | + |
| 73 | + rows = contents.split("\n") |
| 74 | + rows.each do | row | |
| 75 | + values = nil |
| 76 | + string_processing += Benchmark.realtime do |
| 77 | + values = row.delete_suffix("\r").tr('"', '').split(',') |
| 78 | + end |
| 79 | + object_building += Benchmark.realtime do |
| 80 | + yield values |
| 81 | + end |
| 82 | + lines += 1 |
| 83 | + end |
| 84 | + |
| 85 | + logger.info "#{load_path}(#{rows.length} lines) was loaded." |
| 86 | + end |
| 87 | + |
| 88 | + ForeachProfiler.collector&.record( |
| 89 | + self.class.name.split('::').last, |
| 90 | + io_and_encoding: io_and_encoding, |
| 91 | + string_processing: string_processing, |
| 92 | + object_building: object_building, |
| 93 | + lines: lines |
| 94 | + ) |
| 95 | + end |
| 96 | + end |
| 97 | + |
| 98 | + module_function |
| 99 | + |
| 100 | + def run |
| 101 | + Receiptisan::Model::ReceiptComputer::Master::Loader::LoaderTrait.prepend(LoaderTraitInstrument) |
| 102 | + puts '=== Master CSV Loading Benchmark ===' |
| 103 | + puts "Version: #{VERSION}" |
| 104 | + puts |
| 105 | + |
| 106 | + benchmark_full_loading |
| 107 | + benchmark_by_loader_type |
| 108 | + benchmark_search_command |
| 109 | + end |
| 110 | + |
| 111 | + def benchmark_full_loading |
| 112 | + puts '[1] Full load benchmark (Loader#load)' |
| 113 | + times = [] |
| 114 | + rss_diffs = [] |
| 115 | + |
| 116 | + 3.times do | i | |
| 117 | + before_rss = rss_kb |
| 118 | + elapsed = Benchmark.realtime { build_loader.load(VERSION) } |
| 119 | + after_rss = rss_kb |
| 120 | + times << elapsed |
| 121 | + rss_diffs << (after_rss - before_rss) |
| 122 | + puts format(' run%-2d: %.3fs (RSS %+d KB)', i + 1, elapsed, after_rss - before_rss) |
| 123 | + end |
| 124 | + |
| 125 | + puts format(' cold run: %.3fs', times.first) |
| 126 | + puts format(' warm avg: %.3fs', average(times.drop(1))) |
| 127 | + puts format(' RSS diff avg: %.1f KB', average(rss_diffs)) |
| 128 | + puts |
| 129 | + end |
| 130 | + |
| 131 | + def benchmark_by_loader_type |
| 132 | + puts '[2][3] Breakdown by loader type and foreach phases' |
| 133 | + LOADER_TYPES.each do | type | |
| 134 | + elapsed_times = [] |
| 135 | + collector = ForeachCollector.new |
| 136 | + |
| 137 | + 3.times do | i | |
| 138 | + ForeachProfiler.collector = collector |
| 139 | + elapsed = Benchmark.realtime { build_loader.load_type(VERSION, type) } |
| 140 | + elapsed_times << elapsed |
| 141 | + puts format(' %-15s run%-2d: %.3fs', type, i + 1, elapsed) |
| 142 | + end |
| 143 | + |
| 144 | + puts format(' %-15s cold: %.3fs / warm avg: %.3fs', type, elapsed_times.first, average(elapsed_times.drop(1))) |
| 145 | + collector.to_a.each do | result | |
| 146 | + puts format( |
| 147 | + ' %-18s io+enc: %.3fs | split: %.3fs | build: %.3fs | lines: %d', |
| 148 | + result.loader, |
| 149 | + result.io_and_encoding, |
| 150 | + result.string_processing, |
| 151 | + result.object_building, |
| 152 | + result.lines |
| 153 | + ) |
| 154 | + end |
| 155 | + ensure |
| 156 | + ForeachProfiler.collector = nil |
| 157 | + end |
| 158 | + puts |
| 159 | + end |
| 160 | + |
| 161 | + def benchmark_search_command |
| 162 | + puts '[4][5] receiptisan search end-to-end' |
| 163 | + cases = [ |
| 164 | + ['max-shinryou', %w[--type shinryou-koui --name 初診 --month 202406]], |
| 165 | + ['mid-iyakuhin', %w[--type iyakuhin --name アセト --month 202406]], |
| 166 | + ['small-kizai', %w[--type tokutei-kizai --name カテーテル --month 202406]], |
| 167 | + ] |
| 168 | + |
| 169 | + cases.each do | name, args | |
| 170 | + times = [] |
| 171 | + 3.times do | i | |
| 172 | + elapsed = Benchmark.realtime do |
| 173 | + stdout, stderr, status = Open3.capture3( |
| 174 | + 'bundle', 'exec', 'ruby', 'exe/receiptisan', 'search', *args, |
| 175 | + chdir: repo_root |
| 176 | + ) |
| 177 | + next if status.success? |
| 178 | + |
| 179 | + raise "search failed (#{name}): #{stderr}\n#{stdout}" |
| 180 | + end |
| 181 | + times << elapsed |
| 182 | + puts format(' %-12s run%-2d: %.3fs', name, i + 1, elapsed) |
| 183 | + end |
| 184 | + |
| 185 | + puts format(' %-12s cold: %.3fs / warm avg: %.3fs', name, times.first, average(times.drop(1))) |
| 186 | + end |
| 187 | + puts |
| 188 | + end |
| 189 | + |
| 190 | + def build_loader |
| 191 | + MASTER::Loader.new(MASTER::ResourceResolver.new, Logger.new(nil)) |
| 192 | + end |
| 193 | + |
| 194 | + def rss_kb |
| 195 | + line = File.read('/proc/self/status').each_line.find { | l | l.start_with?('VmRSS:') } |
| 196 | + return 0 unless line |
| 197 | + |
| 198 | + line.split[1].to_i |
| 199 | + end |
| 200 | + |
| 201 | + def average(values) |
| 202 | + return 0.0 if values.empty? |
| 203 | + |
| 204 | + values.sum(0.0) / values.length |
| 205 | + end |
| 206 | + |
| 207 | + def repo_root |
| 208 | + Pathname(__dir__).join('..').expand_path.to_path |
| 209 | + end |
| 210 | + # rubocop:enable Metrics/ModuleLength |
| 211 | +end |
| 212 | + |
| 213 | +MasterLoadingBenchmark.run if $PROGRAM_NAME == __FILE__ |
0 commit comments