Skip to content

Commit f270a56

Browse files
Josyannfreesteph
authored andcommitted
chore: Extract CSV parsing logic and optimize site import flow
- Introduce `CsvSiteParser` - Simplify `SiteUpload` model - Add `ProcessSiteUploadJob` and `ProcessSingleSiteJob` for parallelized site processing - Fix audit callback to use `after_create` instead of `after_create_commit` Signed-off-by: Manon Budin <manon.budin@beta.gouv.fr>
1 parent cbd934a commit f270a56

File tree

13 files changed

+327
-260
lines changed

13 files changed

+327
-260
lines changed

app/controllers/sites_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def create
4242
def upload
4343
@upload = SiteUpload.new(site_upload_params)
4444
if @upload.save
45-
redirect_to sites_path, notice: t(".uploaded", count: @upload.count)
45+
redirect_to sites_path, notice: t(".started")
4646
else
4747
render :new, status: :unprocessable_content
4848
end
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class ProcessSingleSiteJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(site_data, team_id, tag_ids)
5+
team = Team.find(team_id)
6+
7+
url = site_data["url"]
8+
name = site_data["name"]
9+
tag_names = site_data["tag_names"] || []
10+
11+
row_tag_ids = tag_names.map { |tag_name| team.tags.find_or_create_by(name: tag_name).id }
12+
combined_tag_ids = (tag_ids + row_tag_ids).uniq
13+
14+
site = team.sites.find_by_url(url: url)
15+
16+
if site
17+
site.tag_ids = combined_tag_ids.union(site.tag_ids)
18+
site.name = name if name.present? && site.name.blank?
19+
site.save!
20+
site.audit!
21+
else
22+
Site.create!(url: url, team: team, name: name, tag_ids: combined_tag_ids)
23+
end
24+
end
25+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
class ProcessSiteUploadJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(sites_data, team_id, tag_ids)
5+
site_jobs = sites_data.map do |site_data|
6+
ProcessSingleSiteJob.new(site_data, team_id, tag_ids)
7+
end
8+
9+
ActiveJob.perform_all_later(site_jobs) if site_jobs.any?
10+
end
11+
end

app/models/audit.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ class Audit < ApplicationRecord
22
belongs_to :site, touch: true, counter_cache: true
33
has_many :checks, -> { prioritized }, dependent: :destroy
44

5-
after_create_commit :fetch_resources!, :create_checks
5+
after_create_commit :fetch_resources!, :create_checks, :schedule
66

77
validates :url, presence: true, url: true
88
normalizes :url, with: ->(url) { Link.normalize(url).to_s }
@@ -33,6 +33,10 @@ def page(kind)
3333
build_page(kind, page_url)
3434
end
3535

36+
def schedule
37+
ProcessAuditJob.set(group: "audit_#{id}").perform_later(self)
38+
end
39+
3640
def all_checks
3741
Check.names.map { |name| public_send(name) }
3842
end

app/models/site_upload.rb

Lines changed: 6 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,23 @@ class SiteUpload
1515
].freeze
1616
MAX_FILE_SIZE = 5.megabytes
1717
REQUIRED_HEADERS = ["url"].freeze
18-
SUPPORTED_SEPARATORS = [",", ";"].freeze
19-
BOM = /^\xEF\xBB\xBF/
2018

21-
attr_accessor :file, :team, :tag_ids, :tags, :new_sites, :existing_sites
19+
attr_accessor :file, :team, :tag_ids
2220

2321
validates :file, :team, presence: true
2422
validate :valid_file_size, :valid_file_format, :valid_headers, if: :file
2523

26-
delegate :create!, :transaction, :human, to: :Site
27-
2824
def initialize(attributes = {})
2925
super
3026
@tag_ids ||= []
31-
@new_sites = {}
32-
@existing_sites = {}
3327
end
3428

3529
def save
3630
return false unless valid?
31+
sites_data = parser.parse_data!
3732

38-
parse_sites
33+
ProcessSiteUploadJob.perform_later(sites_data, team.id, tag_ids)
3934

40-
transaction do
41-
create!(new_sites.values) if new_sites.any?
42-
existing_sites.values.each { |site| site.save && site.audit! }
43-
end
4435
true
4536
end
4637

@@ -59,44 +50,10 @@ def assign_attributes(attributes)
5950
super(attributes.slice(:team).merge(attributes))
6051
end
6152

62-
def count
63-
(new_sites&.length || 0) + (existing_sites&.length || 0)
64-
end
65-
66-
def parse_sites
67-
require "csv"
68-
69-
CSV.foreach(file.path, headers: true, encoding: "bom|utf-8", col_sep:) do |row|
70-
row = row.to_h.transform_keys { |header| header.to_s.downcase } # Case-insensitive headers
71-
72-
url = Link.normalize(row["url"])
73-
name = row["nom"] || row["name"]
74-
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
75-
76-
row_tag_ids = tag_names.map { |n| team.tags.find_or_create_by(name: n).id }
77-
combined_tag_ids = (tag_ids + row_tag_ids).uniq
78-
existing_site = team.sites.find_by_url(url:)
79-
80-
if existing_site
81-
existing_site.assign_attributes(tag_ids: combined_tag_ids.union(existing_site.tag_ids))
82-
existing_site.assign_attributes(name:) unless existing_site.name
83-
self.existing_sites[url] = existing_site
84-
else
85-
self.new_sites[url] = { url:, team:, name:, tag_ids: combined_tag_ids }
86-
end
87-
end
88-
end
89-
9053
private
9154

92-
def first_line
93-
@first_line ||= File.open(file.path, &:gets)&.strip&.sub(BOM, "") || ""
94-
end
95-
96-
def col_sep
97-
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
98-
rescue StandardError
99-
SUPPORTED_SEPARATORS.first
55+
def parser
56+
@parser ||= CsvSiteParser.new(file)
10057
end
10158

10259
def valid_file_size
@@ -109,8 +66,7 @@ def valid_file_format
10966
end
11067

11168
def valid_headers
112-
headers = CSV.parse_line(first_line, col_sep:) || []
113-
missing_headers = REQUIRED_HEADERS - headers.compact.collect(&:downcase)
69+
missing_headers = REQUIRED_HEADERS - parser.headers
11470
errors.add(:file, :invalid_headers) unless missing_headers.empty?
11571
rescue CSV::MalformedCSVError, StandardError
11672
errors.add(:file, :invalid_headers)

app/services/csv_site_parser.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
class CsvSiteParser
2+
require "csv"
3+
4+
BOM = /^\xEF\xBB\xBF/
5+
SUPPORTED_SEPARATORS = [",", ";"].freeze
6+
7+
def initialize(file)
8+
@file = file
9+
end
10+
11+
def parse_data!
12+
sites_by_url = {}
13+
14+
CSV.foreach(@file.path, headers: true, encoding: "bom|utf-8", col_sep: detect_col_sep) do |row|
15+
row = row.to_h.transform_keys { |header| header.to_s.downcase }
16+
17+
url = Link.normalize(row["url"]).to_s
18+
name = row["nom"] || row["name"]
19+
tag_names = row["tags"].present? ? row["tags"].split(",").map(&:strip).compact_blank.uniq : []
20+
21+
if sites_by_url[url]
22+
sites_by_url[url]["tag_names"] = (sites_by_url[url]["tag_names"] + tag_names).uniq
23+
else
24+
sites_by_url[url] = {
25+
"url" => url,
26+
"name" => name,
27+
"tag_names" => tag_names
28+
}
29+
end
30+
end
31+
32+
sites_by_url.values
33+
end
34+
35+
def headers
36+
@headers ||= (CSV.parse_line(first_line, col_sep: detect_col_sep) || []).compact_blank.map(&:downcase)
37+
end
38+
39+
private
40+
41+
def first_line
42+
@first_line ||= File.open(@file.path, &:gets)&.strip&.sub(BOM, "") || ""
43+
end
44+
45+
def detect_col_sep
46+
SUPPORTED_SEPARATORS.max_by { |sep| first_line.count(sep) }
47+
rescue StandardError
48+
SUPPORTED_SEPARATORS.first
49+
end
50+
end

config/locales/fr.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ fr:
328328
new_audit: Site existant, nouvel audit programmé
329329
upload:
330330
title: Importer des sites
331+
started: Import démarré
331332
uploaded:
332333
zero: Aucun site ajouté
333334
one: Un site ajouté
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
require "rails_helper"
2+
3+
RSpec.describe ProcessSingleSiteJob do
4+
describe "#perform" do
5+
subject(:run_process_single_site_job) { described_class.new.perform(site_data, team.id, extra_tag_ids) }
6+
7+
let(:team) { create(:team) }
8+
let(:site_data) do
9+
{
10+
"url" => "https://example.com/",
11+
"name" => "New Name",
12+
"tag_names" => ["tag_1", "tag_2"]
13+
}
14+
end
15+
let(:extra_tag_ids) { [] }
16+
17+
context "when the site does not exist" do
18+
it "creates a new site" do
19+
expect { run_process_single_site_job }.to change(Site, :count).by(1)
20+
end
21+
22+
it "sets the correct attributes" do
23+
run_process_single_site_job
24+
25+
site = Site.last
26+
expect(site.url).to eq("https://example.com/")
27+
expect(site.name).to eq("New Name")
28+
expect(site.team).to eq(team)
29+
end
30+
31+
it "creates and associates the tags" do
32+
run_process_single_site_job
33+
34+
site = Site.last
35+
expect(site.tags.map(&:name)).to contain_exactly("tag_1", "tag_2")
36+
end
37+
38+
context "with extra tags provided" do
39+
let(:extra_tag) { create(:tag, team: team, name: "extra_tag") }
40+
let(:extra_tag_ids) { [extra_tag.id] }
41+
42+
it "associates both CSV tags and extra tags" do
43+
run_process_single_site_job
44+
45+
expect(Site.last.tags.map(&:name)).to contain_exactly("tag_1", "tag_2", "extra_tag")
46+
end
47+
end
48+
49+
context "when the name is missing in the CSV" do
50+
let(:site_data) { { "url" => "https://example.com/", "tag_names" => [] } }
51+
52+
it "creates the site without a name" do
53+
run_process_single_site_job
54+
55+
expect(Site.last.name).to be_nil
56+
end
57+
end
58+
end
59+
60+
context "when the site already exists" do
61+
let!(:site) { create(:site, team: team, url: "https://example.com/", name: "Original Name") }
62+
let!(:existing_tag) { create(:tag, team: team, name: "existing_tag") }
63+
64+
before do
65+
site.tags << existing_tag
66+
end
67+
68+
it "does not create a new site" do
69+
expect { run_process_single_site_job }.not_to change(Site, :count)
70+
end
71+
72+
it "creates only new tags" do
73+
expect { run_process_single_site_job }.to change(Tag, :count).by(2)
74+
end
75+
76+
it "merges the new tags with the existing ones" do
77+
run_process_single_site_job
78+
79+
expect(site.reload.tags.map(&:name)).to contain_exactly("existing_tag", "tag_1", "tag_2")
80+
end
81+
82+
context "when the site name is blank" do
83+
before { site.update!(name: nil) }
84+
85+
it "updates the name from the CSV" do
86+
expect { run_process_single_site_job }.to change { site.reload.name }.from(nil).to("New Name")
87+
end
88+
end
89+
90+
context "when the site name is already present" do
91+
it "does not overwrite the existing name" do
92+
expect { run_process_single_site_job }.not_to change { site.reload.name }
93+
end
94+
end
95+
end
96+
97+
context "with duplicate tags" do
98+
let(:site_data) do
99+
{
100+
"url" => "https://example.com/",
101+
"tag_names" => ["tag", "tag"]
102+
}
103+
end
104+
let(:duplicate_tag) { create(:tag, team: team, name: "tag") }
105+
let(:extra_tag_ids) { [duplicate_tag.id] }
106+
107+
it "deduplicates the tags" do
108+
run_process_single_site_job
109+
110+
expect(Site.last.tags.map(&:name)).to contain_exactly("tag")
111+
end
112+
end
113+
end
114+
end
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
require "rails_helper"
2+
3+
RSpec.describe ProcessSiteUploadJob do
4+
subject(:job) { described_class.new }
5+
6+
let(:team) { create(:team) }
7+
let(:sites_data) do
8+
[
9+
{ "url" => "https://example.com/", "name" => "Example Site", "tag_names" => ["tag1"] },
10+
{ "url" => "https://test.com/", "name" => "Test Site", "tag_names" => ["tag2"] }
11+
]
12+
end
13+
let(:tag_ids) { [] }
14+
15+
describe "#perform" do
16+
it "creates ProcessSingleSiteJob for each site" do
17+
expect { job.perform(sites_data, team.id, tag_ids) }
18+
.to have_enqueued_job(ProcessSingleSiteJob).exactly(:twice)
19+
end
20+
end
21+
end

spec/models/check_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
end
5757

5858
describe "#run" do
59-
let(:check) { build(:check, :accessibility_mention) }
59+
let(:check) { create(:check, :accessibility_mention) }
6060

6161
context "when the analyze method goes well" do
6262
before do

0 commit comments

Comments
 (0)