From bdd20090778e95cef42fa2810c894135552db6e0 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Fri, 11 Oct 2024 21:53:58 -0700 Subject: [PATCH] Workaround ruby-list:2882 --- app/models/message.rb | 11 +++++++---- import.rb | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/app/models/message.rb b/app/models/message.rb index 6886765..80d2364 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -21,11 +21,14 @@ def self.from_s3(list_name, list_seq) end def self.from_string(str) - headers_str, body = str.split(/\n\n/, 2) + # There are a few hacks to import messages from blade.ruby-lang.org's + # S3 bucket. - # Not really sure this is from the original email, or while making - # blade.ruby-lang.org's S3 archive, but there are emails without - # a proper Form header, such as ruby-list:2840. + # Need to call String#b. There are messages that have headers in non-UTF8, + # but the body is in UTF-8, such as ruby-list:2882. + headers_str, body = str.b.split(/\n\n/, 2) + + # ruby-list:2840 doesn't have a proper From header. headers_str = Kconv.toutf8(headers_str).gsub(/\r\n/, '') headers = headers_str.split(/\n/).map { |line| diff --git a/import.rb b/import.rb index ab4dfc8..0abc87a 100644 --- a/import.rb +++ b/import.rb @@ -14,10 +14,10 @@ message = Message.from_s3(list, seq) message.save rescue ActiveRecord::RecordNotUnique - STDERR.puts("#{list}:#{seq} already exists") + STDERR.puts("#{list}:#{seq} already exists in Postgres") rescue Aws::S3::Errors::NoSuchKey - STDERR.puts("#{list}:#{seq} doesn't exist") - rescue - STDERR.puts("failed to import #{list}:#{seq}") + STDERR.puts("#{list}:#{seq} doesn't exist in S3") + rescue StandardError => e + STDERR.puts("failed to import #{list}:#{seq}: #{e}") end end