diff --git a/app/helpers/messages_helper.rb b/app/helpers/messages_helper.rb index 94ef84a..66beb1e 100644 --- a/app/helpers/messages_helper.rb +++ b/app/helpers/messages_helper.rb @@ -1,6 +1,6 @@ module MessagesHelper def without_list_prefix(subject) - subject.sub(/^\[.+?\]\s*/, '') + subject&.sub(/^\[.+?\]\s*/, '') end MARGIN = 50 diff --git a/app/models/message.rb b/app/models/message.rb index 1f5151b..c6d051f 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -11,6 +11,45 @@ class Message < ApplicationRecord self.skip_time_zone_conversion_for_attributes = [:published_at] class << self + def from_mail(mail, list, list_seq) + body = Kconv.toutf8 mail.body.raw_source + if ((list.name == 'ruby-dev') && list_seq.in?([13859, 26229, 39731, 39734])) || ((list.name == 'ruby-core') && list_seq.in?([5231])) || ((list.name == 'ruby-list') && list_seq.in?([29637, 29711, 30148])) || ((list.name == 'ruby-talk') && list_seq.in?([5198, 61316])) + body.gsub!("\u0000", '') + end + if (list.name == 'ruby-list') && list_seq.in?([37565, 38116, 43106]) + mail.header[:subject].value.chop! + end + if (list.name == 'ruby-list') && (list_seq.in?([41850, 43710])) + mail.header[:subject].value = Kconv.toutf8 mail.header[:subject].value + end + subject = mail.subject + subject = Kconv.toutf8 subject if subject + from = Kconv.toutf8 mail.from_address&.raw + if !from && (list.name == 'ruby-core') && (list_seq == 161) + from = mail.from.encode Encoding::UTF_8, Encoding::KOI8_R + end + + message_id = mail.message_id&.encode Encoding::UTF_8, invalid: :replace, undef: :replace + + # mail.in_reply_to returns strange Array object in some cases (?), so let's use the raw value + parent_message_id_header = extract_message_id_from_in_reply_to(mail.header[:in_reply_to]&.value) + parent_message_id = Message.where(message_id_header: parent_message_id_header).pick(:id) if parent_message_id_header + if !parent_message_id && (String === mail.references) + parent_message_id = Message.where(message_id_header: mail.references).pick(:id) + end + if !parent_message_id && (Array === mail.references) + mail.references.compact.each do |ref| + break if (parent_message_id = Message.where(message_id_header: ref).pick(:id)) + end + end + + new list_id: list.id, list_seq: list_seq, body: body, subject: subject, from: from, published_at: mail.date, message_id_header: message_id, parent_id: parent_message_id + end + + private def extract_message_id_from_in_reply_to(header) + header && header.strip.scan(/<([^>]+)>/).flatten.first + end + def from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION)) obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}") diff --git a/bin/import_mails b/bin/import_mails new file mode 100755 index 0000000..a5e2a2f --- /dev/null +++ b/bin/import_mails @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby + +require 'optparse' +require 'mail' + +BASE_DIR = Rails.root.join('tmp') + +params = {} +OptionParser.new do |opts| + opts.on('--list LIST') + opts.on('--from FROM', Integer) + opts.on('--to TO', Integer) +end.parse!(into: params) + +list = List.find_by_name(params[:list]) + +errors = [] + +Rails.logger.level = Logger::INFO + +Message.transaction do + (params[:from]..params[:to]).each do |seq| + begin + filepath = BASE_DIR.join(list.name, seq.to_s) + next unless filepath.exist? + + str = File.binread filepath + next if str.blank? + + mail = Mail.read_from_string str + message = Message.from_mail mail, list, seq + message.save! + rescue ActiveRecord::RecordNotUnique + STDERR.puts("#{list}:#{seq} already exists in Postgres") + rescue StandardError => e + errors << [seq, e] + STDERR.puts("failed to import #{list}:#{seq}: #{e}") + end + end +end + +pp errors if errors.any? diff --git a/db/migrate/20251017161507_add_index_messages_message_id_header.rb b/db/migrate/20251017161507_add_index_messages_message_id_header.rb new file mode 100644 index 0000000..b92d7d6 --- /dev/null +++ b/db/migrate/20251017161507_add_index_messages_message_id_header.rb @@ -0,0 +1,4 @@ +class AddIndexMessagesMessageIdHeader < ActiveRecord::Migration[8.0] + def change + end +end diff --git a/db/schema.rb b/db/schema.rb index 4ebda35..9b743e7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_10_10_175060) do +ActiveRecord::Schema[8.0].define(version: 2025_10_17_161507) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" enable_extension "pg_trgm" diff --git a/test/models/message_test.rb b/test/models/message_test.rb index ce97450..58b647d 100644 --- a/test/models/message_test.rb +++ b/test/models/message_test.rb @@ -1,6 +1,20 @@ require "test_helper" class MessageTest < ActiveSupport::TestCase + test 'from_mail' do + mail = Mail.read_from_string(<