diff --git a/.rubocop.yml b/.rubocop.yml index b1c8b19..144320f 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -11,11 +11,5 @@ Style: Layout: Enabled: false -Metrics/BlockLength: - Max: 50 - -Metrics/MethodLength: - Max: 40 - -Metrics/AbcSize: - Max: 50 +Metrics: + Enabled: false diff --git a/app/helpers/messages_helper.rb b/app/helpers/messages_helper.rb index 5981d9b..94ef84a 100644 --- a/app/helpers/messages_helper.rb +++ b/app/helpers/messages_helper.rb @@ -7,18 +7,18 @@ def without_list_prefix(subject) def search_snippet(body, keyword) snippet = '' - offset = 0 - while (i = body.index(keyword, offset)) - start = [i - MARGIN, offset].max - len = keyword.length + MARGIN - snippet += body[start, len] - offset = start + len - end + offset = 0 + while (i = body.index(keyword, offset)) + start = [i - MARGIN, offset].max + len = keyword.length + MARGIN + snippet += body[start, len] + offset = start + len + end - if snippet.empty? - return body[0, MARGIN * 2] - else - snippet - end + if snippet.empty? + return body[0, MARGIN * 2] + else + snippet + end end end diff --git a/app/models/list.rb b/app/models/list.rb index 641da36..d87c3cb 100644 --- a/app/models/list.rb +++ b/app/models/list.rb @@ -1,16 +1,16 @@ class List def initialize(name, id) @name = name - @id = id + @id = id end attr_reader :name, :id - # Ordered by the established dates. ruby-list was started in 1995. + # Ordered by the established dates. ruby-list was started in 1995. LISTS = [ - List.new('ruby-list', 1), - List.new('ruby-dev', 2), - List.new('ruby-core', 3), - List.new('ruby-talk', 4), + List.new('ruby-list', 1), + List.new('ruby-dev', 2), + List.new('ruby-core', 3), + List.new('ruby-talk', 4), ] def self.find_by_name(name) diff --git a/app/models/message.rb b/app/models/message.rb index 45390d1..1f5151b 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -4,28 +4,29 @@ require 'kconv' class Message < ApplicationRecord - # Not really sure we will utlize this configuration, - # but I don't want to make this column. - # https://blade.ruby-lang.org/ruby-talk/1 is JST. - # https://blade.ruby-lang.org/ruby-talk/410000 is not. + # Not really sure we will utlize this configuration, + # but I don't want to make this column. + # https://blade.ruby-lang.org/ruby-talk/1 is JST. + # https://blade.ruby-lang.org/ruby-talk/410000 is not. self.skip_time_zone_conversion_for_attributes = [:published_at] - def self.from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) - obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}") + class << self + def from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) + obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}") m = self.from_string(obj.body.read) m.list_id = List.find_by_name(list_name).id m.list_seq = list_seq m - end + end - def self.from_string(str) + def from_string(str) # There are a few hacks to import messages from blade.ruby-lang.org's # S3 bucket. # Need to call String#b. There are messages that have headers in non-UTF8, # but the body is in UTF-8, such as ruby-list:2882. - headers_str, body = str.b.split(/\n\n/, 2) + headers_str, body = str.b.split(/\n\n/, 2) # ruby-list:2840 doesn't have a proper From header. headers_str = Kconv.toutf8(headers_str).gsub(/\r\n/, '') @@ -37,21 +38,22 @@ def self.from_string(str) published_at = DateTime.strptime(headers['Date'], '%Y-%m-%dT%H:%M:%S%:z') self.new( - body: Kconv.toutf8(body), - subject: headers['Subject'], - from: headers['From'], - published_at: published_at, + body: Kconv.toutf8(body), + subject: headers['Subject'], + from: headers['From'], + published_at: published_at, ) + end end def reload_from_s3(s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) m = Message.from_s3(List.find_by_id(self.list_id).name, self.list_seq, s3_client) - self.body = m.body - self.subject = m.subject - self.from = from - self.published_at = m.published_at + self.body = m.body + self.subject = m.subject + self.from = from + self.published_at = m.published_at - m + m end end diff --git a/import.rb b/import.rb index 7b151a5..71e4d92 100644 --- a/import.rb +++ b/import.rb @@ -3,21 +3,23 @@ params = {} OptionParser.new do |opts| opts.on('--list LIST') - opts.on('--from FROM', Integer) - opts.on('--to TO', Integer) + opts.on('--from FROM', Integer) + opts.on('--to TO', Integer) end.parse!(into: params) list = params[:list] -(params[:from]..params[:to]).each do |seq| - begin - message = Message.from_s3(list, seq) - message.save -rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists in Postgres") -rescue Aws::S3::Errors::NoSuchKey - STDERR.puts("#{list}:#{seq} doesn't exist in S3") -rescue StandardError => e - STDERR.puts("failed to import #{list}:#{seq}: #{e}") +Message.transaction do + (params[:from]..params[:to]).each do |seq| + begin + message = Message.from_s3(list, seq) + message.save! + rescue ActiveRecord::RecordNotUnique + STDERR.puts("#{list}:#{seq} already exists in Postgres") + rescue Aws::S3::Errors::NoSuchKey + STDERR.puts("#{list}:#{seq} doesn't exist in S3") + rescue StandardError => e + STDERR.puts("failed to import #{list}:#{seq}: #{e}") + end end end