Skip to content

Commit a00da48

Browse files
committed
wip: save non matching data and errors #216
1 parent 528d493 commit a00da48

File tree

1 file changed

+52
-8
lines changed

1 file changed

+52
-8
lines changed

lib/autorequire/data_import.rb

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,38 @@ def self.restore_default_users
187187
end
188188

189189
def self.import_training_documents
190-
csv_data = load_training_documents_csv
191-
import_stats = initialize_import_stats
190+
report = ImportReport.create!(
191+
import_type: "training_documents",
192+
started_at: Time.current,
193+
status: "pending"
194+
)
195+
196+
begin
197+
csv_data = load_training_documents_csv
198+
import_stats = initialize_import_stats
199+
200+
valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats)
201+
azure_files = fetch_azure_files
202+
importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files)
203+
unmatched_files = collect_unmatched_files(csv_data, azure_files, importable_rows)
192204

193-
valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats)
194-
azure_files = fetch_azure_files
195-
importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files)
205+
log_import_summary(valid_csv_rows, azure_files, importable_rows)
196206

197-
log_import_summary(valid_csv_rows, azure_files, importable_rows)
207+
process_document_attachments(importable_rows, import_stats)
198208

199-
process_document_attachments(importable_rows, import_stats)
200-
log_final_results(import_stats)
209+
report.update!(
210+
completed_at: Time.current,
211+
status: "completed",
212+
summary_stats: build_summary_stats(import_stats, csv_data, azure_files),
213+
unmatched_files: unmatched_files,
214+
error_details: import_stats[:error_files]
215+
)
216+
217+
log_final_results(import_stats)
218+
rescue => e
219+
report.update!(status: "failed", error_details: [ { error: e.message } ])
220+
raise
221+
end
201222
end
202223

203224
private
@@ -287,6 +308,29 @@ def self.handle_attachment_error(topic, filename, error, stats)
287308
puts "Error with file: #{filename} for topic #{topic.title} - #{error.message}"
288309
end
289310

311+
def self.collect_unmatched_files(csv_data, azure_files, importable_rows)
312+
csv_file_names = csv_data.map { |row| row["File_Name"] }
313+
azure_file_names = azure_files.map { |file| file[:name] }
314+
matched_file_names = importable_rows.map { |row| row["File_Name"] }
315+
316+
{
317+
csv_without_azure: csv_file_names - azure_file_names,
318+
azure_without_csv: azure_file_names - csv_file_names,
319+
total_unmatched: (csv_file_names + azure_file_names - matched_file_names).uniq,
320+
}
321+
end
322+
323+
def self.build_summary_stats(import_stats, csv_data, azure_files)
324+
{
325+
total_csv_files: csv_data.size,
326+
total_azure_files: azure_files.size,
327+
successful_attachments: import_stats[:successful_attachments].size,
328+
failed_attachments: import_stats[:failed_attachments].size,
329+
topics_without_csv: import_stats[:topics_without_csv].size,
330+
error_files: import_stats[:error_files].size,
331+
}
332+
end
333+
290334
def self.log_import_summary(csv_rows, azure_files, importable_rows)
291335
puts "CSV rows with topics: #{csv_rows.size}"
292336
puts "Azure files found: #{azure_files.size}"

0 commit comments

Comments
 (0)