Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/controllers/messages_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def search(query)
# %> and <-> are defined by pg_trgm.
# https://www.postgresql.org/docs/17/pgtrgm.html
message_where = if Rails.env.production?
Message.where('body %> ? AND list_id IN (?)', query, list_ids)
.order(Arel.sql('body <-> ?', query))
Message.where('body %> ? AND list_id IN (?)', query, list_ids)
.order(Arel.sql('body <-> ?', query))
else
Message.where('body LIKE ? AND list_id IN (?)', "%#{query}%", list_ids)
end
Expand Down
38 changes: 19 additions & 19 deletions app/helpers/messages_helper.rb
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
module MessagesHelper
def without_list_prefix(subject)
subject.sub(/^\[.+?\]\s*/, '')
end
def without_list_prefix(subject)
subject.sub(/^\[.+?\]\s*/, '')
end

MARGIN = 50
def search_snippet(body, keyword)
snippet = ''
MARGIN = 50
def search_snippet(body, keyword)
snippet = ''

offset = 0
while (i = body.index(keyword, offset))
start = [i - MARGIN, offset].max
len = keyword.length + MARGIN
snippet += body[start, len]
offset = start + len
end
offset = 0
while (i = body.index(keyword, offset))
start = [i - MARGIN, offset].max
len = keyword.length + MARGIN
snippet += body[start, len]
offset = start + len
end

if snippet.empty?
return body[0, MARGIN * 2]
else
snippet
end
end
if snippet.empty?
return body[0, MARGIN * 2]
else
snippet
end
end
end
34 changes: 17 additions & 17 deletions app/models/list.rb
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
class List
def initialize(name, id)
@name = name
@id = id
end
attr_reader :name, :id
def initialize(name, id)
@name = name
@id = id
end
attr_reader :name, :id

# Ordered by the established dates. ruby-list was started in 1995.
LISTS = [
List.new('ruby-list', 1),
List.new('ruby-dev', 2),
List.new('ruby-core', 3),
List.new('ruby-talk', 4),
]
LISTS = [
List.new('ruby-list', 1),
List.new('ruby-dev', 2),
List.new('ruby-core', 3),
List.new('ruby-talk', 4),
]

def self.find_by_name(name)
LISTS.find { |list| list.name == name }
end
def self.find_by_name(name)
LISTS.find { |list| list.name == name }
end

def self.find_by_id(id)
LISTS.find { |list| list.id == id }
end
def self.find_by_id(id)
LISTS.find { |list| list.id == id }
end
end
70 changes: 35 additions & 35 deletions app/models/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,50 @@ class Message < ApplicationRecord
# but I don't want to make this column.
# https://blade.ruby-lang.org/ruby-talk/1 is JST.
# https://blade.ruby-lang.org/ruby-talk/410000 is not.
self.skip_time_zone_conversion_for_attributes = [:published_at]
self.skip_time_zone_conversion_for_attributes = [:published_at]

def self.from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION))
obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}")
def self.from_s3(list_name, list_seq, s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION))
obj = s3_client.get_object(bucket: BLADE_BUCKET_NAME, key: "#{list_name}/#{list_seq}")

m = self.from_string(obj.body.read)
m.list_id = List.find_by_name(list_name).id
m.list_seq = list_seq
m
end
m = self.from_string(obj.body.read)
m.list_id = List.find_by_name(list_name).id
m.list_seq = list_seq
m
end

def self.from_string(str)
# There are a few hacks to import messages from blade.ruby-lang.org's
# S3 bucket.
def self.from_string(str)
# There are a few hacks to import messages from blade.ruby-lang.org's
# S3 bucket.

# Need to call String#b. There are messages that have headers in non-UTF8,
# but the body is in UTF-8, such as ruby-list:2882.
headers_str, body = str.b.split(/\n\n/, 2)
# Need to call String#b. There are messages that have headers in non-UTF8,
# but the body is in UTF-8, such as ruby-list:2882.
headers_str, body = str.b.split(/\n\n/, 2)

# ruby-list:2840 doesn't have a proper From header.
headers_str = Kconv.toutf8(headers_str).gsub(/\r\n/, '')
# ruby-list:2840 doesn't have a proper From header.
headers_str = Kconv.toutf8(headers_str).gsub(/\r\n/, '')

headers = headers_str.split(/\n/).map { |line|
line.split(/:\s+/, 2)
}.to_h
headers = headers_str.split(/\n/).map { |line|
line.split(/:\s+/, 2)
}.to_h

published_at = DateTime.strptime(headers['Date'], '%Y-%m-%dT%H:%M:%S%:z')
published_at = DateTime.strptime(headers['Date'], '%Y-%m-%dT%H:%M:%S%:z')

self.new(
body: Kconv.toutf8(body),
subject: headers['Subject'],
from: headers['From'],
published_at: published_at,
)
end
self.new(
body: Kconv.toutf8(body),
subject: headers['Subject'],
from: headers['From'],
published_at: published_at,
)
end

def reload_from_s3(s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION))
m = Message.from_s3(List.find_by_id(self.list_id).name, self.list_seq, s3_client)
def reload_from_s3(s3_client = Aws::S3::Client.new(region: BLADE_BUCKET_REGION))
m = Message.from_s3(List.find_by_id(self.list_id).name, self.list_seq, s3_client)

self.body = m.body
self.subject = m.subject
self.from = from
self.published_at = m.published_at
self.body = m.body
self.subject = m.subject
self.from = from
self.published_at = m.published_at

m
end
m
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ def change
end

private
def primary_key_type
config = Rails.configuration.generators
config.options[config.orm][:primary_key_type] || :primary_key
end
def primary_key_type
config = Rails.configuration.generators
config.options[config.orm][:primary_key_type] || :primary_key
end

def blobs_primary_key_type
pkey_name = connection.primary_key(:active_storage_blobs)
pkey_column = connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
pkey_column.bigint? ? :bigint : pkey_column.type
end
def blobs_primary_key_type
pkey_name = connection.primary_key(:active_storage_blobs)
pkey_column = connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
pkey_column.bigint? ? :bigint : pkey_column.type
end
end
22 changes: 11 additions & 11 deletions import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@

params = {}
OptionParser.new do |opts|
opts.on('--list LIST')
opts.on('--list LIST')
opts.on('--from FROM', Integer)
opts.on('--to TO', Integer)
end.parse!(into: params)

list = params[:list]

(params[:from]..params[:to]).each do |seq|
begin
message = Message.from_s3(list, seq)
message.save
rescue ActiveRecord::RecordNotUnique
STDERR.puts("#{list}:#{seq} already exists in Postgres")
rescue Aws::S3::Errors::NoSuchKey
STDERR.puts("#{list}:#{seq} doesn't exist in S3")
rescue StandardError => e
STDERR.puts("failed to import #{list}:#{seq}: #{e}")
end
begin
message = Message.from_s3(list, seq)
message.save
rescue ActiveRecord::RecordNotUnique
STDERR.puts("#{list}:#{seq} already exists in Postgres")
rescue Aws::S3::Errors::NoSuchKey
STDERR.puts("#{list}:#{seq} doesn't exist in S3")
rescue StandardError => e
STDERR.puts("failed to import #{list}:#{seq}: #{e}")
end
end