Skip to content

Commit ad08252

Browse files
authored
Import training documents (#210)
2 parents efd1510 + 5c8a5a7 commit ad08252

File tree

9 files changed

+347
-49
lines changed

9 files changed

+347
-49
lines changed

Makefile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ RESET := \033[0m
1010

1111
# Commands configuration
1212
COMPOSE_CMD = COMPOSE_PROJECT_NAME=$(PROJECT_NAME) docker compose -f docker-compose.dev.yml
13-
DOCKER_TEST_CMD = $(COMPOSE_CMD) exec app bundle exec bash -c "export RAILS_ENV=test && rspec --format documentation"
1413
EXEC_CMD = $(COMPOSE_CMD) exec app
1514

1615
.PHONY: help build rebuild stop start restart logs shell console format test test_fast db_reset migrate clean clean_volumes
@@ -69,10 +68,10 @@ format:
6968
$(EXEC_CMD) bundle exec rubocop --autocorrect-all
7069

7170
test:
72-
$(DOCKER_TEST_CMD)
71+
$(COMPOSE_CMD) exec app bundle exec bash -c "export RAILS_ENV=test && rspec --format documentation"
7372

7473
test_fast:
75-
$(DOCKER_TEST_CMD) --fail-fast
74+
$(COMPOSE_CMD) exec app bundle exec bash -c "export RAILS_ENV=test && rspec --format documentation --fail-fast"
7675

7776
migrate:
7877
$(EXEC_CMD) bundle exec rails db:migrate

app/controllers/topics_controller.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
class TopicsController < ApplicationController
2+
include ActiveStorage::SetCurrent
23
include Pagy::Backend
34

45
before_action :set_topic, only: [ :show, :edit, :tags, :update, :destroy, :archive ]

app/helpers/topics_helper.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
module TopicsHelper
2+
def card_preview_media(file)
3+
case file.content_type
4+
in /image/ then render_image(file)
5+
in /pdf/ then render_pdf(file)
6+
in /video/ then render_video(file)
7+
in /audio/ then render_audio(file)
8+
else render_download_link(file)
9+
end
10+
end
11+
12+
private
13+
14+
def render_image(file)
15+
image_tag(file.url, class: "img-fluid w-100")
16+
end
17+
18+
def render_pdf(file)
19+
content_tag(:div, class: "embed-responsive embed-responsive-item embed-responsive-16by9 w-100") do
20+
content_tag(:object, data: file.url, type: "application/pdf", width: "100%", height: "400px") do
21+
content_tag(:iframe, "", src: file.url, width: "100%", height: "100%", style: "border: none;") do
22+
content_tag(:p, "Your browser does not support PDF viewing. #{link_to('Download the PDF', file.url)}")
23+
end
24+
end
25+
end
26+
end
27+
28+
def render_video(file)
29+
video_tag(file.url, style: "width: 100%")
30+
end
31+
32+
def render_audio(file)
33+
audio_tag(file.url, controls: true, style: "width: 100%")
34+
end
35+
36+
def render_download_link(file)
37+
link_to file.filename, file.url
38+
end
39+
end

app/models/topic.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ class Topic < ApplicationRecord
2626
include Taggable
2727

2828
STATES = %i[active archived].freeze
29-
CONTENT_TYPES = %w[image/jpeg image/png image/svg+xml image/webp image/avif image/gif video/mp4 application/pdf].freeze
29+
CONTENT_TYPES = %w[image/jpeg image/png image/svg+xml image/webp image/avif image/gif video/mp4 application/pdf audio/mpeg].freeze
3030

3131
belongs_to :language
3232
belongs_to :provider
3333
has_many_attached :documents
3434

3535
validates :title, :language_id, :provider_id, :published_at, presence: true
36-
validates :documents, content_type: CONTENT_TYPES, size: { less_than: 10.megabytes }
36+
validates :documents, content_type: CONTENT_TYPES, size: { less_than: 200.megabytes }
3737

3838
enum :state, STATES.map.with_index.to_h
3939

app/views/topics/_topic.html.erb

Lines changed: 72 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,79 @@
11
<div id="<%= dom_id topic %>">
2-
<p>
3-
<strong>UID:</strong>
4-
<%= topic.uid %>
5-
</p>
6-
7-
<p>
8-
<strong>Title:</strong>
9-
<%= topic.title %>
10-
</p>
11-
12-
<p>
13-
<strong>Description:</strong>
14-
<%= topic.description %>
15-
</p>
16-
17-
<p>
18-
<strong>Provider:</strong>
19-
<%= link_to topic.provider.name, topic.provider %>
20-
</p>
21-
22-
<p>
23-
<strong>Language:</strong>
24-
<%= link_to topic.language.name, topic.language %>
25-
</p>
26-
27-
<p>
28-
<strong>Publishing at:</strong>
29-
<%= topic.published_at.strftime('%m/%d/%Y') %>
30-
</p>
31-
32-
<div>
33-
<p>
34-
<strong>Tags:</strong>
35-
<% topic.current_tags.each do |tag| %>
36-
<%= link_to tag.name, tag_path(tag), class: "badge bg-success", target: "_blank" %>
37-
<% end %>
38-
</p>
2+
<div class="section">
3+
<h3 class="mb-4">Topic: <%= topic.id %></h3>
4+
<div class="card mb-6">
5+
<div class="card-header">
6+
<div class="card-title">
7+
<h3><%= topic.title %></h3>
8+
</div>
9+
</div>
10+
<div class="card-body">
11+
<div class="row mb-2">
12+
<div class="col-md-3"><strong>UID:</strong></div>
13+
<div class="col-md-9"><%= topic.uid %></div>
14+
</div>
15+
<div class="row mb-2">
16+
<div class="col-md-3"><strong>Description:</strong></div>
17+
<div class="col-md-9"><%= topic.description %></div>
18+
</div>
19+
<div class="row mb-2">
20+
<div class="col-md-3"><strong>Provider:</strong></div>
21+
<div class="col-md-9"><%= link_to topic.provider.name, topic.provider, class: "text-decoration-none" %></div>
22+
</div>
23+
<div class="row mb-2">
24+
<div class="col-md-3"><strong>Language:</strong></div>
25+
<div class="col-md-9"><%= link_to topic.language.name, topic.language, class: "text-decoration-none" %></div>
26+
</div>
27+
<div class="row mb-2">
28+
<div class="col-md-3"><strong>Publishing at:</strong></div>
29+
<div class="col-md-9"><%= topic.published_at.strftime('%m/%d/%Y') %></div>
30+
</div>
31+
</div>
32+
<div class="card-footer">
33+
<strong>Tags:</strong>
34+
<% topic.current_tags.each do |tag| %>
35+
<%= link_to tag.name, tag_path(tag), class: "badge bg-success text-decoration-none me-1", target: "_blank" %>
36+
<% end %>
37+
</div>
38+
</div>
3939
</div>
4040

41-
<div>
42-
<strong>Documents:</strong>
43-
<ul>
41+
<div class="section">
42+
<div class="col-12">
43+
<h3 class="mb-4">Documents</h3>
44+
</div>
45+
<div>
4446
<% topic.documents.each do |document| %>
45-
<li><%= link_to document.filename, rails_blob_path(document), target: "_blank"%></li>
47+
<div class="card">
48+
<div class="card-content">
49+
<div class="card-body">
50+
<div class="card-title">
51+
<h4><%= document.filename %></h4>
52+
</div>
53+
</div>
54+
55+
<%= card_preview_media(document) %>
56+
57+
<div class="card-body">
58+
<div class="d-flex justify-content-between">
59+
<div>
60+
<span class="btn btn-sm btn-outline-secondary">
61+
<i class="bi bi-calendar-date"></i>
62+
<%= document.created_at.strftime('%m/%d/%Y') %>
63+
</span>
64+
<span class="btn btn-sm btn-outline-secondary">
65+
<i class="bi bi-clipboard-data"></i>
66+
<%= number_to_human_size(document.byte_size) %>
67+
</span>
68+
</div>
69+
<%= link_to rails_blob_path(document), target: "_blank", class: "btn btn-primary" do %>
70+
<i class="bi bi-file-arrow-down"></i> Download
71+
<% end %>
72+
</div>
73+
</div>
74+
</div>
75+
</div>
4676
<% end %>
47-
</ul>
77+
</div>
4878
</div>
4979
</div>

lib/autorequire/data_import.rb

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def self.import_all
2626
import_topics
2727
import_tags
2828
import_topic_tags
29+
import_training_documents
2930
restore_default_users
3031
end
3132

@@ -184,4 +185,163 @@ def self.restore_default_users
184185

185186
Provider.first.users << me unless Provider.first.users.include?(me)
186187
end
188+
189+
def self.import_training_documents
190+
csv_data = load_training_documents_csv
191+
import_stats = initialize_import_stats
192+
193+
valid_csv_rows = filter_rows_with_existing_topics(csv_data, import_stats)
194+
azure_files = fetch_azure_files
195+
importable_rows = match_csv_with_azure_files(valid_csv_rows, azure_files)
196+
197+
log_import_summary(valid_csv_rows, azure_files, importable_rows)
198+
199+
process_document_attachments(importable_rows, import_stats)
200+
log_final_results(import_stats)
201+
end
202+
203+
private
204+
205+
def self.load_training_documents_csv
206+
CSV.read(file_path("CMEFiles.csv"), headers: true)
207+
end
208+
209+
def self.initialize_import_stats
210+
{
211+
topics_without_csv: [],
212+
successful_attachments: [],
213+
failed_attachments: [],
214+
error_files: [],
215+
}
216+
end
217+
218+
def self.filter_rows_with_existing_topics(csv_data, stats)
219+
csv_data.filter_map do |row|
220+
topic_id = row["Topic_ID"].to_i
221+
if Topic.find_by(id: topic_id)
222+
row
223+
else
224+
stats[:topics_without_csv] << topic_id
225+
nil
226+
end
227+
end
228+
end
229+
230+
def self.match_csv_with_azure_files(csv_rows, azure_files)
231+
azure_files.filter_map do |file|
232+
csv_rows.find { |row| row["File_Name"] == file[:name] }
233+
end
234+
end
235+
236+
def self.process_document_attachments(rows, stats)
237+
rows.each do |row|
238+
topic = Topic.find_by(id: row["Topic_ID"])
239+
next unless topic
240+
241+
attach_document_to_topic(topic, row, stats)
242+
end
243+
end
244+
245+
def self.attach_document_to_topic(topic, row, stats)
246+
file_path = get_file_path(topic.state, topic.language.name)
247+
filename = row["File_Name"]
248+
249+
puts "Requesting: #{file_path}/#{filename}"
250+
251+
begin
252+
file_content = download_azure_file(file_path, filename)
253+
254+
topic.documents.attach(
255+
io: StringIO.new(file_content),
256+
filename: filename,
257+
content_type: detect_content_type(row["File_Type"])
258+
)
259+
260+
if topic.save!
261+
stats[:successful_attachments] << [ row, topic ]
262+
else
263+
stats[:failed_attachments] << [ row, topic ]
264+
end
265+
266+
rescue AzureFileShares::Errors::ApiError, URI::InvalidURIError => e
267+
handle_attachment_error(topic, filename, e, stats)
268+
end
269+
end
270+
271+
def self.download_azure_file(file_path, filename)
272+
encoded_filename = URI.encode_www_form_component(filename)
273+
AzureFileShares.client.files.download_file(
274+
ENV["AZURE_STORAGE_SHARE_NAME"],
275+
file_path,
276+
encoded_filename
277+
)
278+
end
279+
280+
def self.handle_attachment_error(topic, filename, error, stats)
281+
error_info = {
282+
topic: topic,
283+
file: filename,
284+
error: error.message,
285+
}
286+
stats[:error_files] << error_info
287+
puts "Error with file: #{filename} for topic #{topic.title} - #{error.message}"
288+
end
289+
290+
def self.log_import_summary(csv_rows, azure_files, importable_rows)
291+
puts "CSV rows with topics: #{csv_rows.size}"
292+
puts "Azure files found: #{azure_files.size}"
293+
puts "Importable files: #{importable_rows.size}"
294+
end
295+
296+
def self.log_final_results(stats)
297+
puts "Topics not found: #{stats[:topics_without_csv].size}"
298+
puts "Successful attachments: #{stats[:successful_attachments].size}"
299+
puts "Failed attachments: #{stats[:failed_attachments].size}"
300+
puts "Files with errors: #{stats[:error_files].size}"
301+
end
302+
303+
private
304+
305+
def self.get_file_path(state, language)
306+
case [ state, language ]
307+
in [ "active", "english" ]
308+
"CMES-Pi/assets/Content"
309+
in [ "archived", "english" ]
310+
"CMES-Pi_Archive"
311+
in [ "active", "spanish" ]
312+
"SP_CMES-Pi/assets/Content"
313+
in [ "archived", "spanish" ]
314+
"SP_CMES-Pi_Archive"
315+
end
316+
end
317+
318+
def self.fetch_azure_files
319+
client = AzureFileShares.client
320+
azure_active_en = client.files.list(ENV["AZURE_STORAGE_SHARE_NAME"], self.get_file_path("active", "english"))
321+
azure_active_es = client.files.list(ENV["AZURE_STORAGE_SHARE_NAME"], self.get_file_path("active", "spanish"))
322+
azure_archived_en = client.files.list(ENV["AZURE_STORAGE_SHARE_NAME"], self.get_file_path("archived", "english"))
323+
azure_archived_es = client.files.list(ENV["AZURE_STORAGE_SHARE_NAME"], self.get_file_path("archived", "spanish"))
324+
325+
[
326+
azure_active_en[:files],
327+
azure_active_es[:files],
328+
azure_archived_en[:files],
329+
azure_archived_es[:files],
330+
].flatten
331+
end
332+
333+
def self.detect_content_type(filename)
334+
case File.extname(filename).downcase
335+
when ".mp3"
336+
"audio/mpeg"
337+
when ".pdf"
338+
"application/pdf"
339+
when ".jpg", ".jpeg"
340+
"image/jpeg"
341+
when ".png"
342+
"image/png"
343+
else
344+
"application/octet-stream"
345+
end
346+
end
187347
end

0 commit comments

Comments
 (0)