Skip to content

Commit cbb5b73

Browse files
committed
chore: Extract CSV parsing logic and optimize site import flow
- Introduce `CsvSiteParser` - Simplify `SiteUpload` model - Add `ProcessSiteUploadJob` and `ProcessSingleSiteJob` for parallelized site processing - Fix audit callback to use `after_create` instead of `after_create_commit`
1 parent b2260f3 commit cbb5b73

File tree

14 files changed

+401
-254
lines changed

14 files changed

+401
-254
lines changed

app/controllers/sites_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def create
4242
def upload
4343
@upload = SiteUpload.new(site_upload_params)
4444
if @upload.save
45-
redirect_to sites_path, notice: t(".uploaded", count: @upload.count)
45+
redirect_to sites_path, notice: t(".started")
4646
else
4747
render :new, status: :unprocessable_content
4848
end
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class ProcessSingleSiteJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(site_data, team_id, tag_ids)
5+
team = Team.find(team_id)
6+
7+
url = site_data["url"]
8+
name = site_data["name"]
9+
tag_names = site_data["tag_names"] || []
10+
11+
row_tag_ids = tag_names.map { |tag_name| team.tags.find_or_create_by(name: tag_name).id }
12+
combined_tag_ids = (tag_ids + row_tag_ids).uniq
13+
14+
site = team.sites.find_by_url(url: url)
15+
16+
if site
17+
site.tag_ids = combined_tag_ids.union(site.tag_ids)
18+
site.name = name if name.present? && site.name.blank?
19+
site.save!
20+
site.audit!
21+
else
22+
Site.create!(url: url, team: team, name: name, tag_ids: combined_tag_ids)
23+
end
24+
end
25+
end
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
class ProcessSiteUploadJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(sites_data, team_id, tag_ids)
5+
site_jobs = []
6+
7+
sites_data.each do |site_data|
8+
site_jobs << ProcessSingleSiteJob.new(site_data, team_id, tag_ids)
9+
end
10+
11+
ActiveJob.perform_all_later(site_jobs) if site_jobs.any?
12+
end
13+
end

app/models/audit.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ class Audit < ApplicationRecord
22
belongs_to :site, touch: true, counter_cache: true
33
has_many :checks, -> { prioritized }, dependent: :destroy
44

5-
after_create_commit :create_checks
5+
after_create :create_checks
66
after_create_commit :schedule
77

88
validates :url, presence: true, url: true

app/models/site.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def url=(new_url)
4343
def url_without_scheme_and_www = Link.url_without_scheme_and_www(audit.url)
4444

4545
def name_with_fallback = name.presence || url_without_scheme_and_www
46+
4647
alias to_title name_with_fallback
4748
alias to_s name_with_fallback
4849

app/models/site_upload.rb

Lines changed: 6 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,23 @@ class SiteUpload
1515
].freeze
1616
MAX_FILE_SIZE = 5.megabytes
1717
REQUIRED_HEADERS = ["url"].freeze
18-
SUPPORTED_SEPARATORS = [",", ";"].freeze
19-
BOM = /^\xEF\xBB\xBF/
2018

21-
attr_accessor :file, :team, :tag_ids, :tags, :new_sites, :existing_sites
19+
attr_accessor :file, :team, :tag_ids
2220

2321
validates :file, :team, presence: true
2422
validate :valid_file_size, :valid_file_format, :valid_headers, if: :file
2523

26-
delegate :create!, :transaction, :human, to: :Site
27-
2824
def initialize(attributes = {})
2925
super
3026
@tag_ids ||= []
31-
@new_sites = {}
32-
@existing_sites = {}
3327
end
3428

3529
def save
3630
return false unless valid?
31+
sites_data = parser.parse
3732

38-
parse_sites
33+
ProcessSiteUploadJob.perform_later(sites_data, team.id, tag_ids)
3934

40-
transaction do
41-
create!(new_sites.values) if new_sites.any?
42-
existing_sites.values.each { |site| site.save && site.audit! }
43-
end
4435
true
4536
end
4637

@@ -57,44 +48,10 @@ def assign_attributes(attributes)
5748
super(attributes.slice(:team).merge(attributes))
5849
end
5950

60-
def count
61-
(new_sites&.length || 0) + (existing_sites&.length || 0)
62-
end
63-
64-
def parse_sites
65-
require "csv"
66-
67-
CSV.foreach(file.path, headers: true, encoding: "bom|utf-8", col_sep:) do |row|
68-
row = row.to_h.transform_keys { |header| header.to_s.downcase } # Case-insensitive headers
69-
70-
url = Link.normalize(row["url"])
71-
name = row["nom"] || row["name"]
72-
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
73-
74-
row_tag_ids = tag_names.map { |n| team.tags.find_or_create_by(name: n).id }
75-
combined_tag_ids = (tag_ids + row_tag_ids).uniq
76-
existing_site = team.sites.find_by_url(url:)
77-
78-
if existing_site
79-
existing_site.assign_attributes(tag_ids: combined_tag_ids.union(existing_site.tag_ids))
80-
existing_site.assign_attributes(name:) unless existing_site.name
81-
self.existing_sites[url] = existing_site
82-
else
83-
self.new_sites[url] = { url:, team:, name:, tag_ids: combined_tag_ids }
84-
end
85-
end
86-
end
87-
8851
private
8952

90-
def first_line
91-
@first_line ||= File.open(file.path, &:gets)&.strip&.sub(BOM, "") || ""
92-
end
93-
94-
def col_sep
95-
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
96-
rescue StandardError
97-
SUPPORTED_SEPARATORS.first
53+
def parser
54+
@parser ||= CsvSiteParser.new(file)
9855
end
9956

10057
def valid_file_size
@@ -107,8 +64,7 @@ def valid_file_format
10764
end
10865

10966
def valid_headers
110-
headers = CSV.parse_line(first_line, col_sep:) || []
111-
missing_headers = REQUIRED_HEADERS - headers.compact.collect(&:downcase)
67+
missing_headers = REQUIRED_HEADERS - parser.headers
11268
errors.add(:file, :invalid_headers) unless missing_headers.empty?
11369
rescue CSV::MalformedCSVError, StandardError
11470
errors.add(:file, :invalid_headers)

app/services/csv_site_parser.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
class CsvSiteParser
2+
require "csv"
3+
4+
BOM = /^\xEF\xBB\xBF/
5+
SUPPORTED_SEPARATORS = [",", ";"].freeze
6+
7+
def initialize(file)
8+
@file = file
9+
end
10+
11+
def parse
12+
sites_by_url = {}
13+
14+
CSV.foreach(@file.path, headers: true, encoding: "bom|utf-8", col_sep: detect_col_sep) do |row|
15+
row = row.to_h.transform_keys { |header| header.to_s.downcase }
16+
17+
url = Link.normalize(row["url"]).to_s
18+
name = row["nom"] || row["name"]
19+
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
20+
21+
if sites_by_url[url]
22+
sites_by_url[url]["tag_names"] = (sites_by_url[url]["tag_names"] + tag_names).uniq
23+
else
24+
sites_by_url[url] = {
25+
"url" => url,
26+
"name" => name,
27+
"tag_names" => tag_names
28+
}
29+
end
30+
end
31+
32+
sites_by_url.values
33+
end
34+
35+
def headers
36+
@headers ||= (CSV.parse_line(first_line, col_sep: detect_col_sep) || []).compact_blank.map(&:downcase)
37+
end
38+
39+
private
40+
41+
def first_line
42+
@first_line ||= File.open(@file.path, &:gets)&.strip&.sub(BOM, "") || ""
43+
end
44+
45+
def detect_col_sep
46+
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
47+
rescue StandardError
48+
SUPPORTED_SEPARATORS.first
49+
end
50+
end

config/locales/fr.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ fr:
328328
new_audit: Site existant, nouvel audit programmé
329329
upload:
330330
title: Importer des sites
331+
started: Import démarré
331332
uploaded:
332333
zero: Aucun site ajouté
333334
one: Un site ajouté
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
require "rails_helper"
2+
3+
RSpec.describe ProcessSingleSiteJob do
4+
subject(:job) { described_class.new }
5+
6+
let(:team) { create(:team) }
7+
let(:site_data) { { "url" => "https://example.com/", "name" => "Example Site", "tag_names" => ["tag1", "tag2"] } }
8+
let(:tag_ids) { [] }
9+
10+
describe "#perform" do
11+
context "when site does not exist" do
12+
it "creates a new site with tags" do
13+
expect {
14+
job.perform(site_data, team.id, tag_ids)
15+
}.to change(Site, :count).by(1)
16+
17+
site = Site.last
18+
expect(site.url).to eq("https://example.com/")
19+
expect(site.name).to eq("Example Site")
20+
expect(site.tags.pluck(:name)).to match_array(["tag1", "tag2"])
21+
end
22+
23+
it "creates tags if they don't exist" do
24+
expect {
25+
job.perform(site_data, team.id, tag_ids)
26+
}.to change(Tag, :count).by(2)
27+
end
28+
29+
context "with form tag_ids" do
30+
let!(:form_tag) { create(:tag, name: "form_tag", team: team) }
31+
let(:tag_ids) { [form_tag.id] }
32+
33+
it "combines CSV tags with form tags" do
34+
job.perform(site_data, team.id, tag_ids)
35+
36+
site = Site.last
37+
expect(site.tags.pluck(:name)).to match_array(["tag1", "tag2", "form_tag"])
38+
end
39+
end
40+
41+
context "without name in CSV" do
42+
let(:site_data) { { "url" => "https://example.com/", "name" => nil, "tag_names" => [] } }
43+
44+
it "creates site without name" do
45+
job.perform(site_data, team.id, tag_ids)
46+
47+
site = Site.last
48+
expect(site.name).to be_nil
49+
end
50+
end
51+
end
52+
53+
context "when site already exists" do
54+
let!(:existing_site) { create(:site, url: "https://example.com/", name: nil, team: team) }
55+
let!(:existing_tag) { create(:tag, name: "existing_tag", team: team) }
56+
57+
before do
58+
existing_site.tags << existing_tag
59+
end
60+
61+
it "does not create a new site" do
62+
expect {
63+
job.perform(site_data, team.id, tag_ids)
64+
}.not_to change(Site, :count)
65+
end
66+
67+
it "merges tags with existing tags" do
68+
job.perform(site_data, team.id, tag_ids)
69+
70+
existing_site.reload
71+
expect(existing_site.tags.pluck(:name)).to match_array(["existing_tag", "tag1", "tag2"])
72+
end
73+
74+
it "sets name if site has no name" do
75+
job.perform(site_data, team.id, tag_ids)
76+
77+
existing_site.reload
78+
expect(existing_site.name).to eq("Example Site")
79+
end
80+
81+
it "does not overwrite existing name" do
82+
existing_site.update!(name: "Old Name")
83+
84+
job.perform(site_data, team.id, tag_ids)
85+
86+
existing_site.reload
87+
expect(existing_site.name).to eq("Old Name")
88+
end
89+
90+
it "triggers audit on existing site" do
91+
expect(existing_site).to receive(:audit!)
92+
93+
job.perform(site_data, team.id, tag_ids)
94+
end
95+
96+
context "with form tag_ids" do
97+
let!(:form_tag) { create(:tag, name: "form_tag", team: team) }
98+
let(:tag_ids) { [form_tag.id] }
99+
100+
it "combines all tags" do
101+
job.perform(site_data, team.id, tag_ids)
102+
103+
existing_site.reload
104+
expect(existing_site.tags.pluck(:name)).to match_array(["existing_tag", "tag1", "tag2", "form_tag"])
105+
end
106+
end
107+
end
108+
109+
context "with duplicate tags in CSV" do
110+
let(:site_data) { { "url" => "https://example.com/", "name" => "Example Site", "tag_names" => ["tag1", "tag1", "tag2"] } }
111+
112+
it "deduplicates tags" do
113+
expect {
114+
job.perform(site_data, team.id, tag_ids)
115+
}.to change(Tag, :count).by(1)
116+
117+
site = Site.last
118+
expect(site.tags.pluck(:name)).to match_array(["tag1", "tag2"])
119+
end
120+
end
121+
122+
context "with existing tags" do
123+
let!(:existing_tag) { create(:tag, name: "tag1", team: team) }
124+
125+
it "reuses existing tags instead of creating new ones" do
126+
expect {
127+
job.perform(site_data, team.id, tag_ids)
128+
}.to change(Tag, :count).by(1)
129+
130+
site = Site.last
131+
expect(site.tags).to include(existing_tag)
132+
end
133+
end
134+
end
135+
end

0 commit comments

Comments
 (0)