Skip to content

Commit 8a29b98

Browse files
authored
Merge pull request #63 from amatsuda/import_from_raw_mail
Yet another email importer that imports from raw mail text
2 parents ceacc98 + 7fdc658 commit 8a29b98

File tree

6 files changed

+101
-2
lines changed

6 files changed

+101
-2
lines changed

app/helpers/messages_helper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module MessagesHelper
22
def without_list_prefix(subject)
3-
subject.sub(/^\[.+?\]\s*/, '')
3+
subject&.sub(/^\[.+?\]\s*/, '')
44
end
55

66
MARGIN = 50

app/models/message.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,45 @@ class Message < ApplicationRecord
1111
self.skip_time_zone_conversion_for_attributes = [:published_at]
1212

1313
class << self
14+
def from_mail(mail, list, list_seq)
15+
body = Kconv.toutf8 mail.body.raw_source
16+
if ((list.name == 'ruby-dev') && list_seq.in?([13859, 26229, 39731, 39734])) || ((list.name == 'ruby-core') && list_seq.in?([5231])) || ((list.name == 'ruby-list') && list_seq.in?([29637, 29711, 30148])) || ((list.name == 'ruby-talk') && list_seq.in?([5198, 61316]))
17+
body.gsub!("\u0000", '')
18+
end
19+
if (list.name == 'ruby-list') && list_seq.in?([37565, 38116, 43106])
20+
mail.header[:subject].value.chop!
21+
end
22+
if (list.name == 'ruby-list') && (list_seq.in?([41850, 43710]))
23+
mail.header[:subject].value = Kconv.toutf8 mail.header[:subject].value
24+
end
25+
subject = mail.subject
26+
subject = Kconv.toutf8 subject if subject
27+
from = Kconv.toutf8 mail.from_address&.raw
28+
if !from && (list.name == 'ruby-core') && (list_seq == 161)
29+
from = mail.from.encode Encoding::UTF_8, Encoding::KOI8_R
30+
end
31+
32+
message_id = mail.message_id&.encode Encoding::UTF_8, invalid: :replace, undef: :replace
33+
34+
# mail.in_reply_to returns strange Array object in some cases (?), so let's use the raw value
35+
parent_message_id_header = extract_message_id_from_in_reply_to(mail.header[:in_reply_to]&.value)
36+
parent_message_id = Message.where(message_id_header: parent_message_id_header).pick(:id) if parent_message_id_header
37+
if !parent_message_id && (String === mail.references)
38+
parent_message_id = Message.where(message_id_header: mail.references).pick(:id)
39+
end
40+
if !parent_message_id && (Array === mail.references)
41+
mail.references.compact.each do |ref|
42+
break if (parent_message_id = Message.where(message_id_header: ref).pick(:id))
43+
end
44+
end
45+
46+
new list_id: list.id, list_seq: list_seq, body: body, subject: subject, from: from, published_at: mail.date, message_id_header: message_id, parent_id: parent_message_id
47+
end
48+
49+
private def extract_message_id_from_in_reply_to(header)
50+
header && header.strip.scan(/<([^>]+)>/).flatten.first
51+
end
52+
1453
def from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION))
1554
obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}")
1655

bin/import_mails

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env ruby
2+
3+
require 'optparse'
4+
require 'mail'
5+
6+
BASE_DIR = Rails.root.join('tmp')
7+
8+
params = {}
9+
OptionParser.new do |opts|
10+
opts.on('--list LIST')
11+
opts.on('--from FROM', Integer)
12+
opts.on('--to TO', Integer)
13+
end.parse!(into: params)
14+
15+
list = List.find_by_name(params[:list])
16+
17+
errors = []
18+
19+
Rails.logger.level = Logger::INFO
20+
21+
Message.transaction do
22+
(params[:from]..params[:to]).each do |seq|
23+
begin
24+
filepath = BASE_DIR.join(list.name, seq.to_s)
25+
next unless filepath.exist?
26+
27+
str = File.binread filepath
28+
next if str.blank?
29+
30+
mail = Mail.read_from_string str
31+
message = Message.from_mail mail, list, seq
32+
message.save!
33+
rescue ActiveRecord::RecordNotUnique
34+
STDERR.puts("#{list}:#{seq} already exists in Postgres")
35+
rescue StandardError => e
36+
errors << [seq, e]
37+
STDERR.puts("failed to import #{list}:#{seq}: #{e}")
38+
end
39+
end
40+
end
41+
42+
pp errors if errors.any?
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class AddIndexMessagesMessageIdHeader < ActiveRecord::Migration[8.0]
2+
def change
3+
end
4+
end

db/schema.rb

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/models/message_test.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
require "test_helper"
22

33
class MessageTest < ActiveSupport::TestCase
4+
test 'from_mail' do
5+
mail = Mail.read_from_string(<<END_OF_BODY)
6+
Subject: [ruby-list:1] Hello
7+
8+
Date: 2005-12-15T19:32:40+09:00
9+
10+
Hello, world!
11+
END_OF_BODY
12+
m = Message.from_mail(mail, List.find_by_name('ruby-list'), 1)
13+
assert_equal "Hello, world!\r\n", m.body
14+
15+
assert_equal DateTime.parse('2005-12-15T19:32:40+09:00'), m.published_at
16+
end
17+
418
test 'from_string' do
519
m = Message.from_string(<<END_OF_BODY)
620
Subject: [ruby-list:1] Hello

0 commit comments

Comments
 (0)