Skip to content

Commit d2d3b5d

Browse files
Rework batch CSV generator for multiple versions
The CSV generator now takes a submission query, and creates a set of CSVs based on how many different versions it can pick out. If two submissions have different versions but would still have the same CSV headers, then they will be part of the same CSV. Co-authored-by: Stephen Daly <stephen.daly@digital.cabinet-office.gov.uk>
1 parent 21fcaa3 commit d2d3b5d

File tree

8 files changed

+199
-165
lines changed

8 files changed

+199
-165
lines changed

app/jobs/send_submission_batch_job.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def perform(form_id:, mode_string:, date:, delivery:)
1818
mode = Mode.new(mode_string)
1919
set_submission_batch_logging_attributes(form:, mode:)
2020

21-
message_id = AwsSesSubmissionBatchService.new(submissions:, form:, date:, mode:).send_batch
21+
message_id = AwsSesSubmissionBatchService.new(submissions_query: submissions, form:, date:, mode:).send_batch
2222

2323
delivery.update!(
2424
delivery_reference: message_id,

app/lib/csv_generator.rb

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,22 @@ def generate_submission(submission:, is_s3_submission:)
99
end
1010
end
1111

12-
def generate_batched_submissions(submissions:, is_s3_submission:)
13-
CSV.generate do |csv|
14-
csv << headers(submissions.first, is_s3_submission)
15-
submissions.each do |submission|
16-
csv << values_for_submission(submission, is_s3_submission)
12+
def generate_batched_submissions(submissions_query:, is_s3_submission:)
13+
sorted_submissions = submissions_query.ordered_by_form_version_and_date
14+
15+
rows_by_version = []
16+
17+
sorted_submissions.each do |submission|
18+
current_headers = rows_by_version.last&.first
19+
unless current_headers == headers(submission, is_s3_submission)
20+
# Start a new CSV if the headers are different to the previous submission
21+
rows_by_version << [headers(submission, is_s3_submission)]
1722
end
23+
rows_by_version.last << values_for_submission(submission, is_s3_submission)
24+
end
25+
26+
rows_by_version.map do |rows|
27+
CSV.generate { |csv| rows.each { |line| csv << line } }
1828
end
1929
end
2030

app/models/submission.rb

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,25 +42,6 @@ def self.sent?(reference)
4242
submission&.single_submission_delivery&.delivery_reference&.present?
4343
end
4444

45-
def self.group_by_form_version(submissions)
46-
submission_by_version = {}
47-
last_version = nil
48-
49-
# For forms that have the same updated_at timestamp, we know they will be identical. If two forms have different
50-
# updated_at timestamps, we check to see if their steps are the same. If they are, we group those forms' submissions
51-
# together.
52-
submissions.group_by { |submission| submission.form.updated_at }.sort.to_h.each do |updated_at, submissions|
53-
if last_version && last_version.steps == submissions.first.form.steps
54-
submission_by_version[last_version.updated_at].push(*submissions)
55-
else
56-
submission_by_version[updated_at] = submissions
57-
last_version = submissions.first.form
58-
end
59-
end
60-
61-
submission_by_version
62-
end
63-
6445
private
6546

6647
def mode_object

app/services/aws_ses_submission_batch_service.rb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
class AwsSesSubmissionBatchService
2-
def initialize(submissions:, form:, date:, mode:)
3-
@submissions = submissions
2+
def initialize(submissions_query:, form:, date:, mode:)
3+
@submissions_query = submissions_query
44
@form = form
55
@date = date
66
@mode = mode
@@ -24,11 +24,11 @@ def send_batch
2424
def deliver_batch_email
2525
files = {}
2626

27-
submissions_by_version = Submission.group_by_form_version(@submissions)
28-
submissions_by_version.each_value.with_index(1) do |submissions, version_number|
29-
form_version = submissions_by_version.size > 1 ? version_number : nil
30-
filename = SubmissionFilenameGenerator.batch_csv_filename(form_name: @form.name, date: @date, mode: @mode, form_version: form_version)
31-
files[filename] = CsvGenerator.generate_batched_submissions(submissions: submissions, is_s3_submission: false)
27+
csvs = CsvGenerator.generate_batched_submissions(submissions_query: @submissions_query, is_s3_submission: false)
28+
csvs.each.with_index(1) do |csv, index|
29+
csv_version = csvs.size > 1 ? index : nil
30+
filename = SubmissionFilenameGenerator.batch_csv_filename(form_name: @form.name, date: @date, mode: @mode, form_version: csv_version)
31+
files[filename] = csv
3232
end
3333

3434
mail = AwsSesSubmissionBatchMailer.batch_submission_email(form: @form, date: @date, mode: @mode, files:).deliver_now

spec/factories/v2_step.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,25 @@
8686
end
8787
end
8888

89+
trait :with_name_settings do
90+
transient do
91+
input_type { "first_and_last_name" }
92+
title_needed { "false" }
93+
end
94+
95+
answer_type { "name" }
96+
answer_settings do
97+
DataStruct.new(
98+
input_type:,
99+
title_needed:,
100+
)
101+
end
102+
end
103+
104+
trait :with_file_upload_settings do
105+
answer_type { "file" }
106+
end
107+
89108
trait :with_repeatable do
90109
is_repeatable { true }
91110
end

spec/lib/csv_generator_spec.rb

Lines changed: 124 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
require "rails_helper"
22

33
RSpec.describe CsvGenerator do
4-
let(:form_document) { build(:v2_form_document, form_id: 42, available_languages:) }
5-
let(:submission) { build(:submission, form_document:, created_at: timestamp, reference: submission_reference, mode:, submission_locale:) }
6-
let(:page) { build :page }
7-
let(:text_question) { build :text, :with_answer, question_text: "What is the meaning of life?" }
8-
let(:name_question) { build :first_middle_last_name_question, question_text: "What is your name?" }
9-
let(:file_question) { build :file, :with_uploaded_file, question_text: "Upload a file", original_filename: "test.txt" }
10-
let(:first_step) { build :step, question: text_question }
11-
let(:second_step) { build :step, question: name_question }
12-
let(:third_step) { build :step, question: file_question }
13-
let(:all_steps) { [first_step, second_step, third_step] }
14-
let(:journey) { instance_double(Flow::Journey, all_steps:, completed_file_upload_questions: [file_question]) }
4+
let(:form_document) { build(:v2_form_document, form_id: 42, available_languages:, steps: [text_step, name_step, file_upload_step], start_page: text_step[:id]) }
5+
let(:submission) { create(:submission, form_document:, created_at: timestamp, reference: submission_reference, mode:, submission_locale:, answers:) }
6+
let(:text_step) { build :v2_question_page_step, :with_text_settings, question_text: "What is the meaning of life?", next_step_id: name_step[:id] }
7+
let(:name_step) { build :v2_question_page_step, :with_name_settings, question_text: "What is your name?", next_step_id: file_upload_step[:id] }
8+
let(:file_upload_step) { build :v2_question_page_step, :with_file_upload_settings, question_text: "Upload a file" }
9+
let(:text_answer) { "blue" }
10+
let(:first_name_answer) { "Alice" }
11+
let(:last_name_answer) { "Smith" }
12+
let(:file_upload_answer) { "file.txt" }
13+
let(:answers) do
14+
{
15+
text_step[:id] => { "text" => text_answer },
16+
name_step[:id] => { "first_name" => first_name_answer, "last_name" => last_name_answer },
17+
file_upload_step[:id] => { "original_filename" => file_upload_answer },
18+
}
19+
end
20+
1521
let(:mode) { "form" }
1622
let(:submission_reference) { Faker::Alphanumeric.alphanumeric(number: 8).upcase }
1723
let(:submission_locale) { :en }
@@ -20,10 +26,6 @@
2026
Time.use_zone("London") { Time.zone.local(2022, 9, 14, 8, 0o0, 0o0) }
2127
end
2228

23-
before do
24-
allow(Flow::Journey).to receive(:new).and_return(journey)
25-
end
26-
2729
describe "#generate_submission" do
2830
subject(:csv) { described_class.generate_submission(submission:, is_s3_submission:) }
2931

@@ -38,27 +40,38 @@
3840
expect(CSV.parse(csv)).to eq(
3941
[
4042
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
41-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference}.txt"],
43+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference}.txt"],
4244
],
4345
)
4446
end
4547

4648
context "when a question is optional and answer is not provided" do
47-
let(:text_question) { build :text, question_text: "What is the meaning of life?", is_optional: true, text: nil }
49+
let(:text_answer) { "" }
4850

4951
it "generates a CSV including blank column for unanswered optional question" do
5052
expect(CSV.parse(csv)).to eq(
5153
[
5254
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
53-
[submission_reference, "2022-09-14T08:00:00+01:00", "", name_question.first_name, name_question.last_name, "test_#{submission_reference}.txt"],
55+
[submission_reference, "2022-09-14T08:00:00+01:00", "", first_name_answer, last_name_answer, "file_#{submission_reference}.txt"],
5456
],
5557
)
5658
end
5759
end
5860

5961
context "when there are repeated steps" do
60-
let(:name_question_repeated) { build :first_middle_last_name_question, question_text: "What is your name?" }
61-
let(:second_step) { build :repeatable_step, questions: [name_question, name_question_repeated] }
62+
let(:name_step) { build :v2_question_page_step, :with_name_settings, question_text: "What is your name?", next_step_id: file_upload_step[:id], is_repeatable: true }
63+
let(:another_first_name_answer) { "John" }
64+
let(:another_last_name_answer) { "Smith" }
65+
let(:answers) do
66+
{
67+
text_step[:id] => { "text" => text_answer },
68+
name_step[:id] => [
69+
{ "first_name" => first_name_answer, "last_name" => last_name_answer },
70+
{ "first_name" => another_first_name_answer, "last_name" => another_last_name_answer },
71+
],
72+
file_upload_step[:id] => { "original_filename" => file_upload_answer },
73+
}
74+
end
6275

6376
it "generates a CSV with headers containing suffixes for the repeated steps" do
6477
expect(CSV.parse(csv)).to eq(
@@ -76,12 +89,12 @@
7689
[
7790
submission_reference,
7891
"2022-09-14T08:00:00+01:00",
79-
text_question.text,
80-
name_question.first_name,
81-
name_question.last_name,
82-
name_question_repeated.first_name,
83-
name_question_repeated.last_name,
84-
"test_#{submission_reference}.txt",
92+
text_answer,
93+
first_name_answer,
94+
last_name_answer,
95+
another_first_name_answer,
96+
another_last_name_answer,
97+
"file_#{submission_reference}.txt",
8598
],
8699
],
87100
)
@@ -95,7 +108,7 @@
95108
expect(CSV.parse(csv)).to eq(
96109
[
97110
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file", "Language"],
98-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference}.txt", "en"],
111+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference}.txt", "en"],
99112
],
100113
)
101114
end
@@ -109,57 +122,112 @@
109122
expect(CSV.parse(csv)).to eq(
110123
[
111124
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
112-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, file_question.original_filename],
125+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, file_upload_answer],
113126
],
114127
)
115128
end
116129
end
117130
end
118131

119132
describe "#generate_batched_submissions" do
120-
subject(:csv) { described_class.generate_batched_submissions(submissions:, is_s3_submission:) }
133+
subject(:csv_list) { described_class.generate_batched_submissions(submissions_query:, is_s3_submission:) }
121134

122-
let(:submission_2) { build(:submission, form_document:, created_at: timestamp + 1.hour, reference: submission_reference_2, mode:, submission_locale: :cy) }
123135
let(:submission_reference_2) { Faker::Alphanumeric.alphanumeric(number: 8).upcase }
124-
let(:submissions) { [submission, submission_2] }
125136
let(:is_s3_submission) { false }
137+
let(:submissions_query) { Submission.all }
126138

127-
it "generates a CSV with multiple rows for the submissions" do
128-
expect(CSV.parse(csv)).to eq(
129-
[
130-
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
131-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference}.txt"],
132-
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference_2}.txt"],
133-
],
134-
)
135-
end
139+
context "when all submissions result in the same CSV headers" do
140+
before do
141+
submission
142+
create(:submission, form_document:, created_at: timestamp + 1.hour, reference: submission_reference_2, mode:, submission_locale: :cy, answers:)
143+
end
136144

137-
context "when the form has multiple available languages" do
138-
let(:available_languages) { %w[en cy] }
145+
it "returns an array with a single CSV string" do
146+
expect(csv_list).to be_a(Array)
147+
expect(csv_list.size).to eq(1)
148+
expect(csv_list.first).to be_a(String)
149+
end
139150

140-
it "generates a CSV with a language column" do
141-
expect(CSV.parse(csv)).to eq(
151+
it "generates a CSV with multiple rows for the submissions" do
152+
csv = CSV.parse(csv_list.first)
153+
expect(csv.size).to eq(3) # header row + 2 submissions
154+
expect(csv).to eq(
142155
[
143-
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file", "Language"],
144-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference}.txt", "en"],
145-
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, "test_#{submission_reference_2}.txt", "cy"],
156+
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
157+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference}.txt"],
158+
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference_2}.txt"],
146159
],
147160
)
148161
end
162+
163+
context "when the form has multiple available languages" do
164+
let(:available_languages) { %w[en cy] }
165+
166+
it "generates a CSV with a language column" do
167+
expect(CSV.parse(csv_list.first)).to eq(
168+
[
169+
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file", "Language"],
170+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference}.txt", "en"],
171+
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_answer, first_name_answer, last_name_answer, "file_#{submission_reference_2}.txt", "cy"],
172+
],
173+
)
174+
end
175+
end
176+
177+
context "when the CSV is being sent to an S3 bucket" do
178+
let(:is_s3_submission) { true }
179+
180+
it "generates a CSV without including the submission reference in the filename for the file upload question" do
181+
expect(CSV.parse(csv_list.first)).to eq(
182+
[
183+
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
184+
[submission_reference, "2022-09-14T08:00:00+01:00", text_answer, first_name_answer, last_name_answer, file_upload_answer],
185+
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_answer, first_name_answer, last_name_answer, file_upload_answer],
186+
],
187+
)
188+
end
189+
end
149190
end
150191

151-
context "when the CSV is being sent to an S3 bucket" do
152-
let(:is_s3_submission) { true }
192+
context "when all submissions result in different CSV headers" do
193+
let(:form_document) do
194+
build(
195+
:v2_form_document,
196+
steps: [text_step, name_step, file_upload_step],
197+
start_page: text_step[:id],
198+
updated_at: Time.utc(2022, 9, 14, 7, 0, 0).iso8601(3),
199+
)
200+
end
153201

154-
it "generates a CSV without including the submission reference in the filename for the file upload question" do
155-
expect(CSV.parse(csv)).to eq(
156-
[
157-
["Reference", "Submitted at", "What is the meaning of life?", "What is your name? - First name", "What is your name? - Last name", "Upload a file"],
158-
[submission_reference, "2022-09-14T08:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, file_question.original_filename],
159-
[submission_reference_2, "2022-09-14T09:00:00+01:00", text_question.text, name_question.first_name, name_question.last_name, file_question.original_filename],
160-
],
202+
let(:form_document_same_steps) do
203+
build(
204+
:v2_form_document,
205+
steps: [text_step, name_step, file_upload_step],
206+
start_page: text_step[:id],
207+
updated_at: Time.utc(2022, 9, 15, 7, 0, 0).iso8601(3),
161208
)
162209
end
210+
211+
let(:form_document_different_steps) do
212+
build(
213+
:v2_form_document,
214+
steps: [name_step, file_upload_step],
215+
start_page: name_step[:id],
216+
updated_at: Time.utc(2022, 9, 16, 7, 0, 0).iso8601(3),
217+
)
218+
end
219+
220+
before do
221+
create_list(:submission, 2, form_document:, answers:)
222+
create_list(:submission, 3, form_document: form_document_same_steps, answers:)
223+
create_list(:submission, 4, form_document: form_document_different_steps, answers:)
224+
end
225+
226+
it "creates a CSV for each incompatible set of form versions" do
227+
expect(csv_list.count).to eq(2)
228+
expect(CSV.parse(csv_list[0]).count).to eq(6)
229+
expect(CSV.parse(csv_list[1]).count).to eq(5)
230+
end
163231
end
164232
end
165233
end

0 commit comments

Comments
 (0)