@@ -11,88 +11,219 @@ defmodule Mongo.BulkWrite do
11
11
12
12
import Mongo.Utils
13
13
alias Mongo.UnorderedBulk
14
+ alias Mongo.OrderedBulk
14
15
15
16
@ doc """
16
17
Unordered bulk write operations:
17
- Executes first insert commands, then all update commands and after that all delete commands are executed. If a group (inserts, updates or deletes) exceeds the limit
18
+ Executes first insert commands, then updates commands and after that all delete commands are executed. If a group (inserts, updates or deletes) exceeds the limit
18
19
maxWriteBatchSize it will be split into chunks. Everything is done in memory, so this use case is limited by memory. A better approach seems to use streaming bulk writes.
19
20
"""
20
- def bulk_write ( topology_pid , % UnorderedBulk { } = bulk , opts \\ [ ] ) do
21
-
22
- write_concern = % {
23
- w: Keyword . get ( opts , :w ) ,
24
- j: Keyword . get ( opts , :j ) ,
25
- wtimeout: Keyword . get ( opts , :wtimeout )
26
- } |> filter_nils ( )
27
-
28
- with { :ok , conn , _ , _ } <- Mongo . select_server ( topology_pid , :write , opts ) ,
29
- inserts <- conn |> run_commands ( get_insert_cmds ( bulk , write_concern ) , opts ) |> collect ( :inserts ) ,
30
- updates <- conn |> run_commands ( get_update_cmds ( bulk , write_concern , opts ) , opts ) |> collect ( :updates ) ,
31
- deletes <- conn |> run_commands ( get_delete_cmds ( bulk , write_concern , opts ) , opts ) |> collect ( :deletes ) do
32
- inserts ++ updates ++ deletes
21
+ def bulk_write ( topology_pid , % UnorderedBulk { } = bulk , opts ) do
22
+
23
+ write_concern = write_concern ( opts )
24
+ with { :ok , conn , _ , _ } <- Mongo . select_server ( topology_pid , :write , opts ) do
25
+ one_bulk_write ( conn , bulk , write_concern , opts )
33
26
end
34
27
end
35
28
36
- def collect ( doc , :inserts ) do
29
+ @ doc """
30
+ Schreibt den OrderedBulk in die Datenbank. Es erfolgt eine kleine Optimierung. Folgen von gleichen Operationen
31
+ werden zusammengefasst und als ein Befehl gesendet.
32
+ """
33
+ def bulk_write ( topology_pid , % OrderedBulk { coll: coll , ops: ops } = bulk , opts ) do
34
+
35
+ write_concern = write_concern ( opts )
36
+ with { :ok , conn , _ , _ } <- Mongo . select_server ( topology_pid , :write , opts ) do
37
+ get_op_sequence ( coll , ops )
38
+ |> Enum . map ( fn { cmd , docs } -> one_bulk_write_operation ( conn , cmd , coll , docs , write_concern , opts ) end )
39
+ |> Enum . each ( fn { cmd , count } -> IO . puts "#{ cmd } : #{ count } " end )
40
+ end
41
+ end
37
42
43
+ ##
44
+ # returns the current write concerns from `opts`
45
+ #
46
+ defp write_concern ( opts ) do
47
+ % {
48
+ w: Keyword . get ( opts , :w ) ,
49
+ j: Keyword . get ( opts , :j ) ,
50
+ wtimeout: Keyword . get ( opts , :wtimeout )
51
+ } |> filter_nils ( )
38
52
end
39
53
40
- def collect ( doc , :updates ) do
54
+ @ doc """
55
+ Executues one unordered bulk write. The execution order of operation groups is
41
56
57
+ * inserts
58
+ * updates
59
+ * deletes
60
+
61
+ The function returns a keyword list with the results of each operation group:
62
+ For the details see https://github.com/mongodb/specifications/blob/master/source/crud/crud.rst#results
63
+ """
64
+ def one_bulk_write ( conn , % UnorderedBulk { coll: coll , inserts: inserts , updates: updates , deletes: deletes } = bulk , write_concern , opts ) do
65
+
66
+ with { _ , inserts } <- one_bulk_write_operation ( conn , :insert , coll , inserts , write_concern , opts ) ,
67
+ { _ , updates } <- one_bulk_write_operation ( conn , :update , coll , updates , write_concern , opts ) ,
68
+ { _ , deletes } <- one_bulk_write_operation ( conn , :delete , coll , deletes , write_concern , opts ) do
69
+ [
70
+ acknowledged: acknowledged ( write_concern ) ,
71
+ insertedCount: inserts ,
72
+ matchedCount: updates ,
73
+ deletedCount: deletes ,
74
+ upsertedCount: 0 ,
75
+ upsertedIds: [ ] ,
76
+ insertedIds: [ ] ,
77
+ ]
78
+ end
79
+ end
80
+
81
+ ###
82
+ # Executes the command `cmd` and collects the result.
83
+ #
84
+ def one_bulk_write_operation ( conn , cmd , coll , docs , write_concern , opts ) do
85
+ with result <- conn |> run_commands ( get_cmds ( cmd , coll , docs , write_concern , opts ) , opts ) |> collect ( cmd ) do
86
+ { cmd , result }
87
+ end
42
88
end
43
89
44
- def collect ( doc , :deletes ) do
90
+ ##
91
+ # Converts the list of operations into insert/update/delete commands
92
+ #
93
+ defp get_cmds ( :insert , coll , docs , write_concern , opts ) , do: get_insert_cmds ( coll , docs , write_concern , opts )
94
+ defp get_cmds ( :update , coll , docs , write_concern , opts ) , do: get_update_cmds ( coll , docs , write_concern , opts )
95
+ defp get_cmds ( :delete , coll , docs , write_concern , opts ) , do: get_delete_cmds ( coll , docs , write_concern , opts )
96
+
97
+ defp acknowledged ( % { w: w } ) when w > 0 , do: true
98
+ defp acknowledged ( % { } ) , do: false
99
+
100
+ ###
101
+ # Converts the list of operations into list of lists with same operations.
102
+ #
103
+ # [inserts, inserts, updates] -> [[inserts, inserts],[updates]]
104
+ #
105
+ defp get_op_sequence ( coll , ops ) do
106
+ get_op_sequence ( coll , ops , [ ] )
107
+ end
108
+ defp get_op_sequence ( coll , [ ] , acc ) , do: acc
109
+ defp get_op_sequence ( coll , ops , acc ) do
110
+ [ { kind , _doc } | _rest ] = ops
111
+ { docs , rest } = find_max_sequence ( kind , ops )
112
+ get_op_sequence ( coll , rest , [ { kind , docs } | acc ] )
113
+ end
114
+
115
+ ###
116
+ # Splits the sequence of operations into two parts
117
+ # 1) sequence of operations of kind `kind`
118
+ # 2) rest of operations
119
+ #
120
+ defp find_max_sequence ( kind , rest ) do
121
+ find_max_sequence ( kind , rest , [ ] )
122
+ end
123
+ defp find_max_sequence ( _kind , [ ] , acc ) do
124
+ { acc , [ ] }
125
+ end
126
+ defp find_max_sequence ( kind , [ { other , desc } | rest ] , acc ) when kind == other do
127
+ find_max_sequence ( kind , rest , [ desc | acc ] )
128
+ end
129
+ defp find_max_sequence ( _kind , rest , acc ) do
130
+ { acc , rest }
131
+ end
132
+
133
+ # {
134
+ #"acknowledged" : true,
135
+ #"deletedCount" : 1,
136
+ #"insertedCount" : 2,
137
+ # "matchedCount" : 2,
138
+ #"upsertedCount" : 0,
139
+ #"insertedIds" : {
140
+ # "0" : 4,
141
+ #"1" : 5
142
+ #},
143
+ #"upsertedIds" : {
144
+ #
145
+ # }
146
+ # }
147
+
148
+ def collect ( docs , :insert ) do
149
+ docs
150
+ |> Enum . map ( fn
151
+ { :ok , % { "n" => n } } -> n
152
+ { :ok , _other } -> 0
153
+ end )
154
+ |> Enum . reduce ( 0 , fn x , acc -> x + acc end )
155
+ end
156
+
157
+ def collect ( docs , :update ) do
158
+ docs
159
+ |> Enum . map ( fn
160
+ { :ok , % { "n" => n } } -> n
161
+ { :ok , _other } -> 0
162
+ end )
163
+ |> Enum . reduce ( 0 , fn x , acc -> x + acc end )
164
+ end
45
165
166
+ def collect ( docs , :delete ) do
167
+ docs
168
+ |> Enum . map ( fn
169
+ { :ok , % { "n" => n } } -> n
170
+ { :ok , _other } -> 0
171
+ end )
172
+ |> Enum . reduce ( 0 , fn x , acc -> x + acc end )
46
173
end
47
174
48
175
defp run_commands ( conn , cmds , opts ) do
49
176
50
- IO . puts "Running cmsd #{ inspect cmds } "
177
+ IO . puts "Running cmds #{ inspect cmds } "
51
178
52
179
cmds
53
180
|> Enum . map ( fn cmd -> Mongo . direct_command ( conn , cmd , opts ) end )
54
181
|> Enum . map ( fn { :ok , doc } -> { :ok , doc } end )
55
182
end
56
183
57
- def get_insert_cmds ( % UnorderedBulk { coll: coll , inserts: all_inserts } , write_concern ) do
184
+ def get_insert_cmds ( coll , docs , write_concern , _opts ) do
58
185
59
186
max_batch_size = 10 ## only for test maxWriteBatchSize
60
187
61
- { _ids , all_inserts } = assign_ids ( all_inserts )
188
+ { _ids , docs } = assign_ids ( docs )
62
189
63
- all_inserts
190
+ docs
64
191
|> Enum . chunk_every ( max_batch_size )
65
192
|> Enum . map ( fn inserts -> get_insert_cmd ( coll , inserts , write_concern ) end )
66
193
67
194
end
68
195
69
196
defp get_insert_cmd ( coll , inserts , write_concern ) do
70
- filter_nils ( [ insert: coll , documents: inserts , writeConcern: write_concern ] )
197
+ [ insert: coll ,
198
+ documents: inserts ,
199
+ writeConcern: write_concern ] |> filter_nils ( )
71
200
end
72
201
73
- defp get_delete_cmds ( % UnorderedBulk { coll: coll , deletes: all_deletes } , write_concern , opts ) do
202
+ defp get_delete_cmds ( coll , docs , write_concern , opts ) do
74
203
75
204
max_batch_size = 10 ## only for test maxWriteBatchSize
76
- all_deletes
205
+ docs
77
206
|> Enum . chunk_every ( max_batch_size )
78
207
|> Enum . map ( fn deletes -> get_delete_cmd ( coll , deletes , write_concern , opts ) end )
79
208
80
209
end
81
210
82
211
defp get_delete_cmd ( coll , deletes , write_concern , opts ) do
83
- filter_nils ( [ delete: coll ,
84
- deletes: Enum . map ( deletes , fn delete -> get_delete_doc ( delete ) end ) ,
85
- ordered: Keyword . get ( opts , :ordered ) ,
86
- writeConcern: write_concern ] )
212
+ [ delete: coll ,
213
+ deletes: Enum . map ( deletes , fn delete -> get_delete_doc ( delete ) end ) ,
214
+ ordered: Keyword . get ( opts , :ordered ) ,
215
+ writeConcern: write_concern ] |> filter_nils ( )
87
216
end
88
- defp get_delete_doc ( { filter , collaction , limit } ) do
89
- % { q: filter , limit: limit , collation: collaction } |> filter_nils ( )
217
+ defp get_delete_doc ( { filter , opts } ) do
218
+ [ q: filter ,
219
+ limit: Keyword . get ( opts , :limit ) ,
220
+ collation: Keyword . get ( opts , :collaction ) ] |> filter_nils ( )
90
221
end
91
222
92
- defp get_update_cmds ( % UnorderedBulk { coll: coll , updates: all_updates } , write_concern , opts ) do
223
+ defp get_update_cmds ( coll , docs , write_concern , opts ) do
93
224
94
225
max_batch_size = 10 ## only for test maxWriteBatchSize
95
- all_updates
226
+ docs
96
227
|> Enum . chunk_every ( max_batch_size )
97
228
|> Enum . map ( fn updates -> get_update_cmd ( coll , updates , write_concern , opts ) end )
98
229
0 commit comments