Skip to content

Commit 8d1dbc1

Browse files
committed
chore: Extract CSV parsing logic and optimize site import flow
- Introduce `CsvSiteParser` - Simplify `SiteUpload` model - Add `ProcessSiteUploadJob` and `ProcessSingleSiteJob` for parallelized site processing - Fix audit callback to use `after_create` instead of `after_create_commit`
1 parent 9d3e51b commit 8d1dbc1

File tree

7 files changed

+98
-51
lines changed

7 files changed

+98
-51
lines changed

app/controllers/sites_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def create
4242
def upload
4343
@upload = SiteUpload.new(site_upload_params)
4444
if @upload.save
45-
redirect_to sites_path, notice: t(".uploaded", count: @upload.count)
45+
redirect_to sites_path, notice: t(".started")
4646
else
4747
render :new, status: :unprocessable_content
4848
end
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class ProcessSingleSiteJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(site_data, team_id, tag_ids)
5+
team = Team.find(team_id)
6+
7+
url = site_data["url"]
8+
name = site_data["name"]
9+
tag_names = site_data["tag_names"] || []
10+
11+
row_tag_ids = tag_names.map { |tag_name| team.tags.find_or_create_by(name: tag_name).id }
12+
combined_tag_ids = (tag_ids + row_tag_ids).uniq
13+
14+
site = team.sites.find_by_url(url: url)
15+
16+
if site
17+
site.tag_ids = combined_tag_ids.union(site.tag_ids)
18+
site.name = name if name.present? && site.name.blank?
19+
site.save!
20+
site.audit!
21+
else
22+
Site.create!(url: url, team: team, name: name, tag_ids: combined_tag_ids)
23+
end
24+
end
25+
end
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
class ProcessSiteUploadJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(sites_data, team_id, tag_ids)
5+
site_jobs = []
6+
7+
sites_data.each do |site_data|
8+
site_jobs << ProcessSingleSiteJob.new(site_data, team_id, tag_ids)
9+
end
10+
11+
ActiveJob.perform_all_later(site_jobs) if site_jobs.any?
12+
end
13+
end

app/models/audit.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ class Audit < ApplicationRecord
22
belongs_to :site, touch: true, counter_cache: true
33
has_many :checks, -> { prioritized }, dependent: :destroy
44

5-
after_create_commit :create_checks
5+
after_create :create_checks
66
after_create_commit :schedule
77

88
validates :url, presence: true, url: true

app/models/site_upload.rb

Lines changed: 7 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,23 @@ class SiteUpload
1515
].freeze
1616
MAX_FILE_SIZE = 5.megabytes
1717
REQUIRED_HEADERS = ["url"].freeze
18-
SUPPORTED_SEPARATORS = [",", ";"].freeze
19-
BOM = /^\xEF\xBB\xBF/
2018

21-
attr_accessor :file, :team, :tag_ids, :tags, :new_sites, :existing_sites
19+
attr_accessor :file, :team, :tag_ids
2220

2321
validates :file, :team, presence: true
2422
validate :valid_file_size, :valid_file_format, :valid_headers, if: :file
2523

26-
delegate :create!, :transaction, :human, to: :Site
27-
2824
def initialize(attributes = {})
2925
super
3026
@tag_ids ||= []
31-
@new_sites = {}
32-
@existing_sites = {}
3327
end
3428

3529
def save
3630
return false unless valid?
31+
sites_data = parser.parse
3732

38-
parse_sites
33+
ProcessSiteUploadJob.perform_later(sites_data, team.id, tag_ids)
3934

40-
transaction do
41-
create!(new_sites.values) if new_sites.any?
42-
existing_sites.values.each { |site| site.save && site.audit! }
43-
end
4435
true
4536
end
4637

@@ -58,43 +49,13 @@ def assign_attributes(attributes)
5849
end
5950

6051
def count
61-
(new_sites&.length || 0) + (existing_sites&.length || 0)
62-
end
63-
64-
def parse_sites
65-
require "csv"
66-
67-
CSV.foreach(file.path, headers: true, encoding: "bom|utf-8", col_sep:) do |row|
68-
row = row.to_h.transform_keys { |header| header.to_s.downcase } # Case-insensitive headers
69-
70-
url = Link.normalize(row["url"])
71-
name = row["nom"] || row["name"]
72-
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
73-
74-
row_tag_ids = tag_names.map { |n| team.tags.find_or_create_by(name: n).id }
75-
combined_tag_ids = (tag_ids + row_tag_ids).uniq
76-
existing_site = team.sites.find_by_url(url:)
77-
78-
if existing_site
79-
existing_site.assign_attributes(tag_ids: combined_tag_ids.union(existing_site.tag_ids))
80-
existing_site.assign_attributes(name:) unless existing_site.name
81-
self.existing_sites[url] = existing_site
82-
else
83-
self.new_sites[url] = { url:, team:, name:, tag_ids: combined_tag_ids }
84-
end
85-
end
52+
file ? parser.count : 0
8653
end
8754

8855
private
8956

90-
def first_line
91-
@first_line ||= File.open(file.path, &:gets)&.strip&.sub(BOM, "") || ""
92-
end
93-
94-
def col_sep
95-
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
96-
rescue StandardError
97-
SUPPORTED_SEPARATORS.first
57+
def parser
58+
@parser ||= CsvSiteParser.new(file)
9859
end
9960

10061
def valid_file_size
@@ -107,10 +68,7 @@ def valid_file_format
10768
end
10869

10970
def valid_headers
110-
headers = CSV.parse_line(first_line, col_sep:) || []
111-
missing_headers = REQUIRED_HEADERS - headers.compact.collect(&:downcase)
71+
missing_headers = REQUIRED_HEADERS - parser.headers
11272
errors.add(:file, :invalid_headers) unless missing_headers.empty?
113-
rescue CSV::MalformedCSVError, StandardError
114-
errors.add(:file, :invalid_headers)
11573
end
11674
end

app/services/csv_site_parser.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
class CsvSiteParser
2+
require "csv"
3+
4+
BOM = /^\xEF\xBB\xBF/
5+
SUPPORTED_SEPARATORS = [",", ";"].freeze
6+
7+
def initialize(file)
8+
@file = file
9+
end
10+
11+
def parse
12+
sites_by_url = {}
13+
14+
CSV.foreach(@file.path, headers: true, encoding: "bom|utf-8", col_sep: detect_col_sep) do |row|
15+
row = row.to_h.transform_keys { |header| header.to_s.downcase }
16+
17+
url = Link.normalize(row["url"]).to_s
18+
name = row["nom"] || row["name"]
19+
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
20+
21+
if sites_by_url[url]
22+
sites_by_url[url]["tag_names"] = (sites_by_url[url]["tag_names"] + tag_names).uniq
23+
else
24+
sites_by_url[url] = {
25+
"url" => url,
26+
"name" => name,
27+
"tag_names" => tag_names
28+
}
29+
end
30+
end
31+
32+
sites_by_url.values
33+
end
34+
35+
def headers
36+
@headers ||= (CSV.parse_line(first_line, col_sep: detect_col_sep) || []).compact_blank.map(&:downcase)
37+
end
38+
39+
private
40+
41+
def first_line
42+
@first_line ||= File.open(@file.path, &:gets)&.strip&.sub(BOM, "") || ""
43+
end
44+
45+
def detect_col_sep
46+
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
47+
rescue StandardError
48+
SUPPORTED_SEPARATORS.first
49+
end
50+
end

config/locales/fr.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ fr:
291291
new_audit: Site existant, nouvel audit programmé
292292
upload:
293293
title: Importer des sites
294+
started: Import démarré
294295
uploaded:
295296
zero: Aucun site ajouté
296297
one: Un site ajouté

0 commit comments

Comments
 (0)