Skip to content

Commit ce3a82e

Browse files
committed
Support adding ndjson and csv docs in batches
1 parent 9123ebb commit ce3a82e

File tree

2 files changed

+103
-42
lines changed

2 files changed

+103
-42
lines changed

lib/meilisearch/index.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,20 @@ def add_documents_in_batches(documents, batch_size = 1000, primary_key = nil)
161161
end
162162
end
163163

164+
def add_documents_ndjson_in_batches(documents, batch_size = 1000, primary_key = nil)
165+
documents.lines.each_slice(batch_size).map do |batch|
166+
add_documents_ndjson(batch.join, primary_key)
167+
end
168+
end
169+
170+
def add_documents_csv_in_batches(documents, batch_size = 1000, primary_key = nil, delimiter = nil)
171+
lines = documents.lines
172+
heading = lines.first
173+
lines.drop(1).each_slice(batch_size).map do |batch|
174+
add_documents_csv(heading + batch.join, primary_key, delimiter)
175+
end
176+
end
177+
164178
def add_documents_in_batches!(documents, batch_size = 1000, primary_key = nil)
165179
Utils.soft_deprecate(
166180
'Index#add_documents_in_batches!',

spec/meilisearch/index/documents_spec.rb

Lines changed: 89 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -49,48 +49,6 @@
4949
expect(index.documents['results'].count).to eq(5)
5050
end
5151

52-
it 'adds NDJSON documents' do
53-
documents = <<~NDJSON
54-
{ "objectRef": 123, "title": "Pride and Prejudice", "comment": "A great book" }
55-
{ "objectRef": 456, "title": "Le Petit Prince", "comment": "A french book" }
56-
{ "objectRef": 1, "title": "Alice In Wonderland", "comment": "A weird book" }
57-
{ "objectRef": 4, "title": "Harry Potter and the Half-Blood Prince", "comment": "The best book" }
58-
NDJSON
59-
index.add_documents_ndjson(documents, 'objectRef').await
60-
61-
expect(index.documents['results'].count).to eq(4)
62-
end
63-
64-
it 'adds CSV documents' do
65-
documents = <<~CSV
66-
"objectRef:number","title:string","comment:string"
67-
"1239","Pride and Prejudice","A great book"
68-
"4569","Le Petit Prince","A french book"
69-
"49","Harry Potter and the Half-Blood Prince","The best book"
70-
CSV
71-
index.add_documents_csv(documents, 'objectRef').await
72-
73-
expect(index.documents['results'].count).to eq(3)
74-
end
75-
76-
it 'adds CSV documents with different separator' do
77-
documents = <<~CSV
78-
"objectRef:number"|"title:string"|"comment:string"
79-
"1239"|"Pride and Prejudice"|"A great book"
80-
"4569"|"Le Petit Prince"|"A french book"
81-
"49"|"Harry Potter and the Half-Blood Prince"|"The best book"
82-
CSV
83-
84-
index.add_documents_csv(documents, 'objectRef', '|').await
85-
86-
expect(index.documents['results'].count).to eq(3)
87-
expect(index.documents['results'][1]).to match(
88-
'objectRef' => 4569,
89-
'title' => 'Le Petit Prince',
90-
'comment' => 'A french book'
91-
)
92-
end
93-
9452
it 'infers order of fields' do
9553
index.add_documents(documents).await
9654
task = index.document(1)
@@ -127,6 +85,7 @@
12785
expect(index.documents['results']).to contain_exactly(*documents_with_string_keys)
12886
end
12987

88+
13089
context 'given a single document' do
13190
it 'adds only one document to index (as an hash of one document)' do
13291
new_doc = { objectId: 30, title: 'Hamlet' }
@@ -162,6 +121,94 @@
162121
end
163122
end
164123

124+
describe 'ndjson and csv methods' do
125+
let(:ndjson_docs) do
126+
<<~NDJSON
127+
{ "objectRef": 123, "title": "Pride and Prejudice", "comment": "A great book" }
128+
{ "objectRef": 456, "title": "Le Petit Prince", "comment": "A french book" }
129+
{ "objectRef": 4, "title": "Harry Potter and the Half-Blood Prince", "comment": "The best book" }
130+
{ "objectRef": 55, "title": "The Three Body Problem", "comment": "An interesting book" }
131+
{ "objectRef": 200, "title": "Project Hail Mary", "comment": "A lonely book" }
132+
NDJSON
133+
end
134+
135+
let(:csv_docs) do
136+
<<~CSV
137+
"objectRef:number","title:string","comment:string"
138+
"1239","Pride and Prejudice","A great book"
139+
"456","Le Petit Prince","A french book"
140+
"49","Harry Potter and the Half-Blood Prince","The best book"
141+
"55","The Three Body Problem","An interesting book"
142+
"200","Project Hail Mary","A lonely book"
143+
CSV
144+
end
145+
146+
let(:csv_docs_custom_delim) do
147+
<<~CSV
148+
"objectRef:number"|"title:string"|"comment:string"
149+
"1239"|"Pride and Prejudice"|"A great book"
150+
"456"|"Le Petit Prince"|"A french book"
151+
"49"|"Harry Potter and the Half-Blood Prince"|"The best book"
152+
"55"|"The Three Body Problem"|"An interesting book"
153+
"200"|"Project Hail Mary"|"A lonely book"
154+
CSV
155+
end
156+
157+
let(:batch1_doc) do
158+
{
159+
'objectRef' => 456,
160+
'title' => 'Le Petit Prince',
161+
'comment' => 'A french book'
162+
}
163+
end
164+
165+
let(:batch2_doc) do
166+
{
167+
'objectRef' => 200,
168+
'title' => 'Project Hail Mary',
169+
'comment' => 'A lonely book'
170+
}
171+
end
172+
173+
it '#add_documents_ndjson' do
174+
index.add_documents_ndjson(ndjson_docs, 'objectRef').await
175+
176+
expect(index.documents['results'].count).to eq(5)
177+
expect(index.documents['results']).to include(batch1_doc, batch2_doc)
178+
end
179+
180+
it '#add_documents_csv' do
181+
index.add_documents_csv(csv_docs, 'objectRef').await
182+
expect(index.documents['results'].count).to eq(5)
183+
end
184+
185+
it '#add_documents_csv with a custom delimiter' do
186+
index.add_documents_csv(csv_docs_custom_delim, 'objectRef', '|').await
187+
188+
expect(index.documents['results'].count).to eq(5)
189+
expect(index.documents['results']).to include(batch1_doc, batch2_doc)
190+
end
191+
192+
it '#add_documents_ndjson_in_batches' do
193+
tasks = index.add_documents_ndjson_in_batches(ndjson_docs, 4, 'objectRef')
194+
expect(tasks).to contain_exactly(a_kind_of(Meilisearch::Models::Task),
195+
a_kind_of(Meilisearch::Models::Task))
196+
tasks.each(&:await)
197+
expect(index.documents['results']).to include(batch1_doc, batch2_doc)
198+
end
199+
200+
it '#add_documents_csv_in_batches' do
201+
tasks = index.add_documents_csv_in_batches(
202+
csv_docs_custom_delim, 4, 'objectRef', '|'
203+
)
204+
expect(tasks).to contain_exactly(a_kind_of(Meilisearch::Models::Task),
205+
a_kind_of(Meilisearch::Models::Task))
206+
tasks.each(&:await)
207+
208+
expect(index.documents['results']).to include(batch1_doc, batch2_doc)
209+
end
210+
end
211+
165212
describe '#add_documents!' do
166213
before { allow(Meilisearch::Utils).to receive(:soft_deprecate).and_return(nil) }
167214

0 commit comments

Comments
 (0)