diff --git a/app/controllers/import_reports_controller.rb b/app/controllers/import_reports_controller.rb
new file mode 100644
index 00000000..9b6c5ca1
--- /dev/null
+++ b/app/controllers/import_reports_controller.rb
@@ -0,0 +1,30 @@
+class ImportReportsController < ApplicationController
+ include Pagy::Backend
+
+ before_action :set_import_report, only: [ :show ]
+
+ def index
+ @pagy, @import_reports = pagy(scope.includes(:import_errors))
+
+ # Optional filtering by status
+ @import_reports = @import_reports.where(status: params[:status]) if params[:status].present?
+
+ # Collect unique statuses and import types for filter dropdowns
+ @available_statuses = ImportReport.statuses.keys
+ @available_import_types = scope.distinct.pluck(:import_type).compact.sort
+ end
+
+ def show
+ @import_errors = @import_report.import_errors
+ end
+
+ private
+
+ def set_import_report
+ @import_report = ImportReport.includes(:import_errors).find(params[:id])
+ end
+
+ def scope
+ @scope ||= ImportReport.all
+ end
+end
diff --git a/app/models/import_error.rb b/app/models/import_error.rb
new file mode 100644
index 00000000..0b20b124
--- /dev/null
+++ b/app/models/import_error.rb
@@ -0,0 +1,32 @@
+# == Schema Information
+#
+# Table name: import_errors
+#
+# id :bigint not null, primary key
+# error_message :text
+# error_type :string not null
+# file_name :string
+# metadata :json
+# created_at :datetime not null
+# updated_at :datetime not null
+# import_report_id :bigint not null
+# topic_id :integer
+#
+# Indexes
+#
+# index_import_errors_on_error_type (error_type)
+# index_import_errors_on_file_name (file_name)
+# index_import_errors_on_import_report_id (import_report_id)
+#
+# Foreign Keys
+#
+# fk_rails_... (import_report_id => import_reports.id)
+#
+class ImportError < ApplicationRecord
+ belongs_to :import_report
+
+ validates :error_type, presence: true
+
+ scope :by_type, ->(type) { where(error_type: type) }
+ scope :with_files, -> { where.not(file_name: nil) }
+end
diff --git a/app/models/import_report.rb b/app/models/import_report.rb
new file mode 100644
index 00000000..101f6ba5
--- /dev/null
+++ b/app/models/import_report.rb
@@ -0,0 +1,29 @@
+# == Schema Information
+#
+# Table name: import_reports
+#
+# id :bigint not null, primary key
+# completed_at :datetime
+# error_details :json
+# import_type :string not null
+# started_at :datetime
+# status :string default("pending")
+# summary_stats :json
+# unmatched_files :json
+# created_at :datetime not null
+# updated_at :datetime not null
+#
+# Indexes
+#
+# index_import_reports_on_import_type (import_type)
+#
+class ImportReport < ApplicationRecord
+ has_many :import_errors, dependent: :destroy
+
+ validates :import_type, presence: true
+
+ enum :status, { pending: "pending", planned: "planned", completed: "completed", failed: "failed" }
+
+ scope :recent, -> { order(created_at: :desc) }
+ scope :by_type, ->(type) { where(import_type: type) }
+end
diff --git a/app/views/import_reports/index.html.erb b/app/views/import_reports/index.html.erb
new file mode 100644
index 00000000..b9c70555
--- /dev/null
+++ b/app/views/import_reports/index.html.erb
@@ -0,0 +1,79 @@
+<% content_for :title, "Import Reports" %>
+
+
+
+
+
+
+
+
+
View and monitor import operations and their status.
+
+
+
+
+ <%= form_with url: import_reports_path, method: :get, local: true, class: "d-flex gap-2" do |form| %>
+ <%= form.select :status, options_for_select([["All Statuses", ""]] + @available_statuses.map { |status| [status.humanize, status] }, params[:status]), {}, { class: "form-select", onchange: "this.form.submit();" } %>
+ <% end %>
+
+
+
+
+
+
+
+ | ID |
+ Import Type |
+ Status |
+ Started At |
+ Completed At |
+ Errors |
+ Actions |
+
+
+
+ <% @import_reports.each do |import_report| %>
+
+ | <%= import_report.id %> |
+ <%= import_report.import_type.humanize %> |
+
+ ">
+ <%= import_report.status.humanize %>
+
+ |
+
+ <%= import_report.started_at&.strftime("%m/%d/%Y %I:%M %p") || "Not started" %>
+ |
+
+ <%= import_report.completed_at&.strftime("%m/%d/%Y %I:%M %p") || "Not completed" %>
+ |
+
+ <% if import_report.import_errors.any? %>
+ <%= import_report.import_errors.count %>
+ <% else %>
+ 0
+ <% end %>
+ |
+
+ <%= link_to import_report_path(import_report), class: "btn btn-secondary btn-sm" do %>
+ View
+ <% end %>
+ |
+
+ <% end %>
+
+
+
+
+
+
+ <%== pagy_nav(@pagy) if @pagy.pages > 1 %>
+
+
+
+
+
+
+
diff --git a/app/views/import_reports/show.html.erb b/app/views/import_reports/show.html.erb
new file mode 100644
index 00000000..b0bc5a67
--- /dev/null
+++ b/app/views/import_reports/show.html.erb
@@ -0,0 +1,132 @@
+<% content_for :title, "Import Report ##{@import_report.id}" %>
+
+
+
Import Report: #<%= @import_report.id %>
+
+
+
+
+
Status:
+
+
+ <%= @import_report.status %>
+
+
+
+
+
Import Type:
+
<%= @import_report.import_type.humanize %>
+
+
+
Started At:
+
+ <%= @import_report.started_at&.strftime('%m/%d/%Y %I:%M %p') || "Not started" %>
+
+
+
+
Completed At:
+
+ <%= @import_report.completed_at&.strftime('%m/%d/%Y %I:%M %p') || "Not completed" %>
+
+
+ <% if @import_report.summary_stats.present? %>
+
+
Summary Stats:
+
+
<%= JSON.pretty_generate(@import_report.summary_stats) %>
+
+
+ <% end %>
+
+
+
+
+
+
+
+
+ Import Errors
+
+ <%= @import_errors.count %>
+
+
+
+
+ <% if @import_errors.any? %>
+
+
+
+
+
+
+
+ | Error Type |
+ File Name |
+ Error Message |
+ Topic ID |
+
+
+
+ <% @import_errors.each do |error| %>
+
+ |
+ <%= error.error_type %>
+ |
+
+
+ <%= error.file_name || "N/A" %>
+
+ |
+
+ <% if error.error_message.present? %>
+
+ <%= error.error_message %>
+
+ <% else %>
+ N/A
+ <% end %>
+ |
+
+ <%= error.topic_id || "N/A" %>
+ |
+
+ <% if error.metadata.present? %>
+
+
+ Metadata:
+ <%= JSON.pretty_generate(error.metadata) %>
+ |
+
+ <% end %>
+ <% end %>
+
+
+
+
+
+
+ <% else %>
+
+
+
+
+
+
No Errors Found
+
This import completed successfully without any errors.
+
+
+ <% end %>
+
+
+
+ <%= link_to "Back to Import Reports", import_reports_path, class: "btn btn-secondary" %>
+
diff --git a/app/views/layouts/_sidebar.html.erb b/app/views/layouts/_sidebar.html.erb
index 813ead02..f50d2964 100644
--- a/app/views/layouts/_sidebar.html.erb
+++ b/app/views/layouts/_sidebar.html.erb
@@ -88,6 +88,13 @@
Users
<% end %>
+
+
<% end %>
diff --git a/config/routes.rb b/config/routes.rb
index 99145c9c..c338683c 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -17,6 +17,7 @@
get :tags
end
end
+ resources :import_reports, only: %i[index show]
resource :settings, only: [] do
put :provider, on: :collection
end
diff --git a/db/migrate/20250625101008_create_import_reports.rb b/db/migrate/20250625101008_create_import_reports.rb
new file mode 100644
index 00000000..239ba154
--- /dev/null
+++ b/db/migrate/20250625101008_create_import_reports.rb
@@ -0,0 +1,17 @@
+class CreateImportReports < ActiveRecord::Migration[8.0]
+ def change
+ create_table :import_reports do |t|
+ t.string :import_type, null: false
+ t.datetime :started_at
+ t.datetime :completed_at
+ t.json :summary_stats
+ t.json :unmatched_files
+ t.json :error_details
+ t.string :status, default: 'pending'
+
+ t.timestamps
+ end
+
+ add_index :import_reports, :import_type
+ end
+end
diff --git a/db/migrate/20250625101109_create_import_errors.rb b/db/migrate/20250625101109_create_import_errors.rb
new file mode 100644
index 00000000..ec984499
--- /dev/null
+++ b/db/migrate/20250625101109_create_import_errors.rb
@@ -0,0 +1,17 @@
+class CreateImportErrors < ActiveRecord::Migration[7.0]
+ def change
+ create_table :import_errors do |t|
+ t.references :import_report, null: false, foreign_key: true
+ t.string :error_type, null: false
+ t.string :file_name
+ t.integer :topic_id
+ t.text :error_message
+ t.json :metadata
+
+ t.timestamps
+ end
+
+ add_index :import_errors, :error_type
+ add_index :import_errors, :file_name
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 6d21a792..aec9c135 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[8.0].define(version: 2025_03_29_174617) do
+ActiveRecord::Schema[8.0].define(version: 2025_06_25_101109) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
@@ -60,6 +60,33 @@
t.index ["user_id"], name: "index_contributors_on_user_id"
end
+ create_table "import_errors", force: :cascade do |t|
+ t.bigint "import_report_id", null: false
+ t.string "error_type", null: false
+ t.string "file_name"
+ t.integer "topic_id"
+ t.text "error_message"
+ t.json "metadata"
+ t.datetime "created_at", null: false
+ t.datetime "updated_at", null: false
+ t.index ["error_type"], name: "index_import_errors_on_error_type"
+ t.index ["file_name"], name: "index_import_errors_on_file_name"
+ t.index ["import_report_id"], name: "index_import_errors_on_import_report_id"
+ end
+
+ create_table "import_reports", force: :cascade do |t|
+ t.string "import_type", null: false
+ t.datetime "started_at"
+ t.datetime "completed_at"
+ t.json "summary_stats"
+ t.json "unmatched_files"
+ t.json "error_details"
+ t.string "status", default: "pending"
+ t.datetime "created_at", null: false
+ t.datetime "updated_at", null: false
+ t.index ["import_type"], name: "index_import_reports_on_import_type"
+ end
+
create_table "languages", force: :cascade do |t|
t.string "name"
t.datetime "created_at", null: false
@@ -157,6 +184,7 @@
add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id"
add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id"
+ add_foreign_key "import_errors", "import_reports"
add_foreign_key "sessions", "users"
add_foreign_key "tag_cognates", "tags"
add_foreign_key "tag_cognates", "tags", column: "cognate_id"
diff --git a/lib/autorequire/data_import.rb b/lib/autorequire/data_import.rb
index cb98f9e7..cc2608f1 100644
--- a/lib/autorequire/data_import.rb
+++ b/lib/autorequire/data_import.rb
@@ -8,6 +8,21 @@ def self.reset
self.import_all
end
+ # Imports all data except for training documents.
+ def self.quick_reset
+ self.destroy_all_data
+ self.import_regions
+ self.import_providers
+ self.import_languages
+ self.import_topics
+ self.import_tags
+ self.import_topic_tags
+ self.restore_default_users
+ end
+
+ # This method will destroy all data in the database.
+ # Use with caution!
+
def self.destroy_all_data
TagCognate.destroy_all
ActsAsTaggableOn::Tagging.destroy_all
@@ -187,17 +202,41 @@ def self.restore_default_users
end
def self.import_training_documents
- csv_data = load_training_documents_csv
- import_stats = initialize_import_stats
+ report = ImportReport.create!(
+ import_type: "training_documents",
+ started_at: Time.current,
+ status: "pending"
+ )
- valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats)
- azure_files = fetch_azure_files
- importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files)
+ begin
+ csv_data = load_training_documents_csv
+ import_stats = initialize_import_stats
+
+ valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats)
+ azure_files = fetch_azure_files
+ importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files)
+ unmatched_files = collect_unmatched_files(csv_data, azure_files, importable_rows, report)
+
+ report.update!(
+ summary_stats: build_summary_stats(import_stats, csv_data, azure_files),
+ unmatched_files: unmatched_files,
+ status: "planned"
+ )
- log_import_summary(valid_csv_rows, azure_files, importable_rows)
+ process_document_attachments(importable_rows, import_stats, report)
- process_document_attachments(importable_rows, import_stats)
- log_final_results(import_stats)
+ report.update!(
+ completed_at: Time.current,
+ status: "completed",
+ summary_stats: build_summary_stats(import_stats, csv_data, azure_files),
+ unmatched_files: unmatched_files
+ )
+
+ log_final_results(import_stats)
+ rescue => e
+ report.update!(status: "failed", error_details: [ { error: e.message } ])
+ raise
+ end
end
private
@@ -233,16 +272,16 @@ def self.match_csv_with_azure_files(csv_rows, azure_files)
end
end
- def self.process_document_attachments(rows, stats)
+ def self.process_document_attachments(rows, stats, report)
rows.each do |row|
topic = Topic.find_by(id: row["Topic_ID"])
next unless topic
- attach_document_to_topic(topic, row, stats)
+ attach_document_to_topic(topic, row, stats, report)
end
end
- def self.attach_document_to_topic(topic, row, stats)
+ def self.attach_document_to_topic(topic, row, stats, report)
file_path = get_file_path(topic.state, topic.language.name)
filename = row["File_Name"]
@@ -264,7 +303,7 @@ def self.attach_document_to_topic(topic, row, stats)
end
rescue AzureFileShares::Errors::ApiError, URI::InvalidURIError => e
- handle_attachment_error(topic, filename, e, stats)
+ handle_attachment_error(topic, filename, e, stats, report)
end
end
@@ -277,20 +316,66 @@ def self.download_azure_file(file_path, filename)
)
end
- def self.handle_attachment_error(topic, filename, error, stats)
- error_info = {
+ def self.handle_attachment_error(topic, filename, error, stats, report)
+ ImportError.create!(
+ import_report_id: report.id,
+ topic_id: topic.id,
+ file_name: filename,
+ error_type: error.class.to_s,
+ error_message: error.message
+ )
+
+ stats[:error_files] << {
topic: topic,
file: filename,
error: error.message,
}
- stats[:error_files] << error_info
puts "Error with file: #{filename} for topic #{topic.title} - #{error.message}"
end
- def self.log_import_summary(csv_rows, azure_files, importable_rows)
- puts "CSV rows with topics: #{csv_rows.size}"
- puts "Azure files found: #{azure_files.size}"
- puts "Importable files: #{importable_rows.size}"
+ def self.collect_unmatched_files(csv_data, azure_files, importable_rows, report)
+ csv_file_names = csv_data.map { |row| row["File_Name"] }
+ azure_file_names = azure_files.map { |file| file[:name] }
+ matched_file_names = importable_rows.map { |row| row["File_Name"] }
+
+ r = {
+ csv_without_azure_files: csv_file_names - azure_file_names,
+ azure_files_without_csv: azure_file_names - csv_file_names,
+ total_unmatched: (csv_file_names + azure_file_names - matched_file_names).uniq,
+ }
+
+ r[:csv_without_azure_files].each do |csv_row_name|
+ ImportError.create!(
+ import_report_id: report.id,
+ file_name: csv_row_name,
+ error_type: "CSV::Row::File not found in Azure",
+ error_message: "CSV Row File not found in Azure",
+ topic_id: csv_row_name.split("_").first
+ )
+ end
+
+ r[:azure_files_without_csv].each do |azure_file_name|
+ ImportError.create!(
+ import_report_id: report.id,
+ file_name: azure_file_name,
+ error_type: "Azure::File not found in CSV",
+ error_message: "Azure::File not found in CSV",
+ topic_id: azure_file_name.split("_").first
+ )
+ end
+
+ r
+ end
+
+ def self.build_summary_stats(import_stats, csv_data, azure_files)
+ {
+ total_csv_files: csv_data.size,
+ total_azure_files: azure_files.size,
+ successful_attachments: import_stats[:successful_attachments].size,
+ failed_attachments: import_stats[:failed_attachments].size,
+ topics_without_csv: import_stats[:topics_without_csv].size,
+ error_files: import_stats[:error_files].size,
+ }
end
def self.log_final_results(stats)