diff --git a/app/controllers/import_reports_controller.rb b/app/controllers/import_reports_controller.rb new file mode 100644 index 00000000..9b6c5ca1 --- /dev/null +++ b/app/controllers/import_reports_controller.rb @@ -0,0 +1,30 @@ +class ImportReportsController < ApplicationController + include Pagy::Backend + + before_action :set_import_report, only: [ :show ] + + def index + @pagy, @import_reports = pagy(scope.includes(:import_errors)) + + # Optional filtering by status + @import_reports = @import_reports.where(status: params[:status]) if params[:status].present? + + # Collect unique statuses and import types for filter dropdowns + @available_statuses = ImportReport.statuses.keys + @available_import_types = scope.distinct.pluck(:import_type).compact.sort + end + + def show + @import_errors = @import_report.import_errors + end + + private + + def set_import_report + @import_report = ImportReport.includes(:import_errors).find(params[:id]) + end + + def scope + @scope ||= ImportReport.all + end +end diff --git a/app/models/import_error.rb b/app/models/import_error.rb new file mode 100644 index 00000000..0b20b124 --- /dev/null +++ b/app/models/import_error.rb @@ -0,0 +1,32 @@ +# == Schema Information +# +# Table name: import_errors +# +# id :bigint not null, primary key +# error_message :text +# error_type :string not null +# file_name :string +# metadata :json +# created_at :datetime not null +# updated_at :datetime not null +# import_report_id :bigint not null +# topic_id :integer +# +# Indexes +# +# index_import_errors_on_error_type (error_type) +# index_import_errors_on_file_name (file_name) +# index_import_errors_on_import_report_id (import_report_id) +# +# Foreign Keys +# +# fk_rails_... (import_report_id => import_reports.id) +# +class ImportError < ApplicationRecord + belongs_to :import_report + + validates :error_type, presence: true + + scope :by_type, ->(type) { where(error_type: type) } + scope :with_files, -> { where.not(file_name: nil) } +end diff --git a/app/models/import_report.rb b/app/models/import_report.rb new file mode 100644 index 00000000..101f6ba5 --- /dev/null +++ b/app/models/import_report.rb @@ -0,0 +1,29 @@ +# == Schema Information +# +# Table name: import_reports +# +# id :bigint not null, primary key +# completed_at :datetime +# error_details :json +# import_type :string not null +# started_at :datetime +# status :string default("pending") +# summary_stats :json +# unmatched_files :json +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_import_reports_on_import_type (import_type) +# +class ImportReport < ApplicationRecord + has_many :import_errors, dependent: :destroy + + validates :import_type, presence: true + + enum :status, { pending: "pending", planned: "planned", completed: "completed", failed: "failed" } + + scope :recent, -> { order(created_at: :desc) } + scope :by_type, ->(type) { where(import_type: type) } +end diff --git a/app/views/import_reports/index.html.erb b/app/views/import_reports/index.html.erb new file mode 100644 index 00000000..b9c70555 --- /dev/null +++ b/app/views/import_reports/index.html.erb @@ -0,0 +1,79 @@ +<% content_for :title, "Import Reports" %> + +
+
+
+
+
+

Import Reports

+
+
+
+

View and monitor import operations and their status.

+ + +
+
+ <%= form_with url: import_reports_path, method: :get, local: true, class: "d-flex gap-2" do |form| %> + <%= form.select :status, options_for_select([["All Statuses", ""]] + @available_statuses.map { |status| [status.humanize, status] }, params[:status]), {}, { class: "form-select", onchange: "this.form.submit();" } %> + <% end %> +
+
+ +
+ + + + + + + + + + + + + + <% @import_reports.each do |import_report| %> + + + + + + + + + + <% end %> + +
IDImport TypeStatusStarted AtCompleted AtErrorsActions
<%= import_report.id %><%= import_report.import_type.humanize %> + "> + <%= import_report.status.humanize %> + + + <%= import_report.started_at&.strftime("%m/%d/%Y %I:%M %p") || "Not started" %> + + <%= import_report.completed_at&.strftime("%m/%d/%Y %I:%M %p") || "Not completed" %> + + <% if import_report.import_errors.any? %> + <%= import_report.import_errors.count %> + <% else %> + 0 + <% end %> + + <%= link_to import_report_path(import_report), class: "btn btn-secondary btn-sm" do %> + View + <% end %> +
+
+ + +
+ <%== pagy_nav(@pagy) if @pagy.pages > 1 %> +
+
+
+
+
+
+
diff --git a/app/views/import_reports/show.html.erb b/app/views/import_reports/show.html.erb new file mode 100644 index 00000000..b0bc5a67 --- /dev/null +++ b/app/views/import_reports/show.html.erb @@ -0,0 +1,132 @@ +<% content_for :title, "Import Report ##{@import_report.id}" %> + +
+

Import Report: #<%= @import_report.id %>

+
+
+
+

<%= @import_report.import_type.humanize %> Import

+
+
+
+
+
Status:
+
+ + <%= @import_report.status %> + +
+
+
+
Import Type:
+
<%= @import_report.import_type.humanize %>
+
+
+
Started At:
+
+ <%= @import_report.started_at&.strftime('%m/%d/%Y %I:%M %p') || "Not started" %> +
+
+
+
Completed At:
+
+ <%= @import_report.completed_at&.strftime('%m/%d/%Y %I:%M %p') || "Not completed" %> +
+
+ <% if @import_report.summary_stats.present? %> +
+
Summary Stats:
+
+
<%= JSON.pretty_generate(@import_report.summary_stats) %>
+
+
+ <% end %> +
+ +
+
+ +
+
+

+ Import Errors + + <%= @import_errors.count %> + +

+
+ + <% if @import_errors.any? %> +
+
+
+
+ + + + + + + + + + + <% @import_errors.each do |error| %> + + + + + + + <% if error.metadata.present? %> + + + + <% end %> + <% end %> + +
Error TypeFile NameError MessageTopic ID
+ <%= error.error_type %> + +
+ <%= error.file_name || "N/A" %> +
+
+ <% if error.error_message.present? %> +
+ <%= error.error_message %> +
+ <% else %> + N/A + <% end %> +
+ <%= error.topic_id || "N/A" %> +
+ Metadata: +
<%= JSON.pretty_generate(error.metadata) %>
+
+
+
+
+
+ <% else %> +
+
+
+ +
+

No Errors Found

+

This import completed successfully without any errors.

+
+
+ <% end %> +
+ +
+ <%= link_to "Back to Import Reports", import_reports_path, class: "btn btn-secondary" %> +
diff --git a/app/views/layouts/_sidebar.html.erb b/app/views/layouts/_sidebar.html.erb index 813ead02..f50d2964 100644 --- a/app/views/layouts/_sidebar.html.erb +++ b/app/views/layouts/_sidebar.html.erb @@ -88,6 +88,13 @@ Users <% end %> + + <% end %> diff --git a/config/routes.rb b/config/routes.rb index 99145c9c..c338683c 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -17,6 +17,7 @@ get :tags end end + resources :import_reports, only: %i[index show] resource :settings, only: [] do put :provider, on: :collection end diff --git a/db/migrate/20250625101008_create_import_reports.rb b/db/migrate/20250625101008_create_import_reports.rb new file mode 100644 index 00000000..239ba154 --- /dev/null +++ b/db/migrate/20250625101008_create_import_reports.rb @@ -0,0 +1,17 @@ +class CreateImportReports < ActiveRecord::Migration[8.0] + def change + create_table :import_reports do |t| + t.string :import_type, null: false + t.datetime :started_at + t.datetime :completed_at + t.json :summary_stats + t.json :unmatched_files + t.json :error_details + t.string :status, default: 'pending' + + t.timestamps + end + + add_index :import_reports, :import_type + end +end diff --git a/db/migrate/20250625101109_create_import_errors.rb b/db/migrate/20250625101109_create_import_errors.rb new file mode 100644 index 00000000..ec984499 --- /dev/null +++ b/db/migrate/20250625101109_create_import_errors.rb @@ -0,0 +1,17 @@ +class CreateImportErrors < ActiveRecord::Migration[7.0] + def change + create_table :import_errors do |t| + t.references :import_report, null: false, foreign_key: true + t.string :error_type, null: false + t.string :file_name + t.integer :topic_id + t.text :error_message + t.json :metadata + + t.timestamps + end + + add_index :import_errors, :error_type + add_index :import_errors, :file_name + end +end diff --git a/db/schema.rb b/db/schema.rb index 6d21a792..aec9c135 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_03_29_174617) do +ActiveRecord::Schema[8.0].define(version: 2025_06_25_101109) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -60,6 +60,33 @@ t.index ["user_id"], name: "index_contributors_on_user_id" end + create_table "import_errors", force: :cascade do |t| + t.bigint "import_report_id", null: false + t.string "error_type", null: false + t.string "file_name" + t.integer "topic_id" + t.text "error_message" + t.json "metadata" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["error_type"], name: "index_import_errors_on_error_type" + t.index ["file_name"], name: "index_import_errors_on_file_name" + t.index ["import_report_id"], name: "index_import_errors_on_import_report_id" + end + + create_table "import_reports", force: :cascade do |t| + t.string "import_type", null: false + t.datetime "started_at" + t.datetime "completed_at" + t.json "summary_stats" + t.json "unmatched_files" + t.json "error_details" + t.string "status", default: "pending" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["import_type"], name: "index_import_reports_on_import_type" + end + create_table "languages", force: :cascade do |t| t.string "name" t.datetime "created_at", null: false @@ -157,6 +184,7 @@ add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id" add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id" + add_foreign_key "import_errors", "import_reports" add_foreign_key "sessions", "users" add_foreign_key "tag_cognates", "tags" add_foreign_key "tag_cognates", "tags", column: "cognate_id" diff --git a/lib/autorequire/data_import.rb b/lib/autorequire/data_import.rb index cb98f9e7..cc2608f1 100644 --- a/lib/autorequire/data_import.rb +++ b/lib/autorequire/data_import.rb @@ -8,6 +8,21 @@ def self.reset self.import_all end + # Imports all data except for training documents. + def self.quick_reset + self.destroy_all_data + self.import_regions + self.import_providers + self.import_languages + self.import_topics + self.import_tags + self.import_topic_tags + self.restore_default_users + end + + # This method will destroy all data in the database. + # Use with caution! + def self.destroy_all_data TagCognate.destroy_all ActsAsTaggableOn::Tagging.destroy_all @@ -187,17 +202,41 @@ def self.restore_default_users end def self.import_training_documents - csv_data = load_training_documents_csv - import_stats = initialize_import_stats + report = ImportReport.create!( + import_type: "training_documents", + started_at: Time.current, + status: "pending" + ) - valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats) - azure_files = fetch_azure_files - importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files) + begin + csv_data = load_training_documents_csv + import_stats = initialize_import_stats + + valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats) + azure_files = fetch_azure_files + importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files) + unmatched_files = collect_unmatched_files(csv_data, azure_files, importable_rows, report) + + report.update!( + summary_stats: build_summary_stats(import_stats, csv_data, azure_files), + unmatched_files: unmatched_files, + status: "planned" + ) - log_import_summary(valid_csv_rows, azure_files, importable_rows) + process_document_attachments(importable_rows, import_stats, report) - process_document_attachments(importable_rows, import_stats) - log_final_results(import_stats) + report.update!( + completed_at: Time.current, + status: "completed", + summary_stats: build_summary_stats(import_stats, csv_data, azure_files), + unmatched_files: unmatched_files + ) + + log_final_results(import_stats) + rescue => e + report.update!(status: "failed", error_details: [ { error: e.message } ]) + raise + end end private @@ -233,16 +272,16 @@ def self.match_csv_with_azure_files(csv_rows, azure_files) end end - def self.process_document_attachments(rows, stats) + def self.process_document_attachments(rows, stats, report) rows.each do |row| topic = Topic.find_by(id: row["Topic_ID"]) next unless topic - attach_document_to_topic(topic, row, stats) + attach_document_to_topic(topic, row, stats, report) end end - def self.attach_document_to_topic(topic, row, stats) + def self.attach_document_to_topic(topic, row, stats, report) file_path = get_file_path(topic.state, topic.language.name) filename = row["File_Name"] @@ -264,7 +303,7 @@ def self.attach_document_to_topic(topic, row, stats) end rescue AzureFileShares::Errors::ApiError, URI::InvalidURIError => e - handle_attachment_error(topic, filename, e, stats) + handle_attachment_error(topic, filename, e, stats, report) end end @@ -277,20 +316,66 @@ def self.download_azure_file(file_path, filename) ) end - def self.handle_attachment_error(topic, filename, error, stats) - error_info = { + def self.handle_attachment_error(topic, filename, error, stats, report) + ImportError.create!( + import_report_id: report.id, + topic_id: topic.id, + file_name: filename, + error_type: error.class.to_s, + error_message: error.message + ) + + stats[:error_files] << { topic: topic, file: filename, error: error.message, } - stats[:error_files] << error_info puts "Error with file: #{filename} for topic #{topic.title} - #{error.message}" end - def self.log_import_summary(csv_rows, azure_files, importable_rows) - puts "CSV rows with topics: #{csv_rows.size}" - puts "Azure files found: #{azure_files.size}" - puts "Importable files: #{importable_rows.size}" + def self.collect_unmatched_files(csv_data, azure_files, importable_rows, report) + csv_file_names = csv_data.map { |row| row["File_Name"] } + azure_file_names = azure_files.map { |file| file[:name] } + matched_file_names = importable_rows.map { |row| row["File_Name"] } + + r = { + csv_without_azure_files: csv_file_names - azure_file_names, + azure_files_without_csv: azure_file_names - csv_file_names, + total_unmatched: (csv_file_names + azure_file_names - matched_file_names).uniq, + } + + r[:csv_without_azure_files].each do |csv_row_name| + ImportError.create!( + import_report_id: report.id, + file_name: csv_row_name, + error_type: "CSV::Row::File not found in Azure", + error_message: "CSV Row File not found in Azure", + topic_id: csv_row_name.split("_").first + ) + end + + r[:azure_files_without_csv].each do |azure_file_name| + ImportError.create!( + import_report_id: report.id, + file_name: azure_file_name, + error_type: "Azure::File not found in CSV", + error_message: "Azure::File not found in CSV", + topic_id: azure_file_name.split("_").first + ) + end + + r + end + + def self.build_summary_stats(import_stats, csv_data, azure_files) + { + total_csv_files: csv_data.size, + total_azure_files: azure_files.size, + successful_attachments: import_stats[:successful_attachments].size, + failed_attachments: import_stats[:failed_attachments].size, + topics_without_csv: import_stats[:topics_without_csv].size, + error_files: import_stats[:error_files].size, + } end def self.log_final_results(stats)