@@ -18,8 +18,12 @@ use crate::{
18
18
fsverity:: Sha256HashValue ,
19
19
oci:: tar:: { get_entry, split_async} ,
20
20
repository:: Repository ,
21
- splitstream:: DigestMap ,
21
+ splitstream:: {
22
+ handle_external_object, DigestMap , EnsureObjectMessages , ResultChannelReceiver ,
23
+ ResultChannelSender , WriterMessages ,
24
+ } ,
22
25
util:: parse_sha256,
26
+ zstd_encoder,
23
27
} ;
24
28
25
29
pub fn import_layer (
@@ -83,6 +87,7 @@ impl<'repo> ImageOp<'repo> {
83
87
let proxy = containers_image_proxy:: ImageProxy :: new_with_config ( config) . await ?;
84
88
let img = proxy. open_image ( imgref) . await . context ( "Opening image" ) ?;
85
89
let progress = MultiProgress :: new ( ) ;
90
+
86
91
Ok ( ImageOp {
87
92
repo,
88
93
proxy,
@@ -95,47 +100,49 @@ impl<'repo> ImageOp<'repo> {
95
100
& self ,
96
101
layer_sha256 : & Sha256HashValue ,
97
102
descriptor : & Descriptor ,
98
- ) -> Result < Sha256HashValue > {
103
+ layer_num : usize ,
104
+ object_sender : crossbeam:: channel:: Sender < EnsureObjectMessages > ,
105
+ ) -> Result < ( ) > {
99
106
// We need to use the per_manifest descriptor to download the compressed layer but it gets
100
107
// stored in the repository via the per_config descriptor. Our return value is the
101
108
// fsverity digest for the corresponding splitstream.
102
109
103
- if let Some ( layer_id) = self . repo . check_stream ( layer_sha256) ? {
104
- self . progress
105
- . println ( format ! ( "Already have layer {}" , hex:: encode( layer_sha256) ) ) ?;
106
- Ok ( layer_id)
107
- } else {
108
- // Otherwise, we need to fetch it...
109
- let ( blob_reader, driver) = self . proxy . get_descriptor ( & self . img , descriptor) . await ?;
110
-
111
- // See https://github.com/containers/containers-image-proxy-rs/issues/71
112
- let blob_reader = blob_reader. take ( descriptor. size ( ) ) ;
113
-
114
- let bar = self . progress . add ( ProgressBar :: new ( descriptor. size ( ) ) ) ;
115
- bar. set_style ( ProgressStyle :: with_template ( "[eta {eta}] {bar:40.cyan/blue} {decimal_bytes:>7}/{decimal_total_bytes:7} {msg}" )
116
- . unwrap ( )
117
- . progress_chars ( "##-" ) ) ;
118
- let progress = bar. wrap_async_read ( blob_reader) ;
119
- self . progress
120
- . println ( format ! ( "Fetching layer {}" , hex:: encode( layer_sha256) ) ) ?;
110
+ // Otherwise, we need to fetch it...
111
+ let ( blob_reader, driver) = self . proxy . get_descriptor ( & self . img , descriptor) . await ?;
112
+
113
+ // See https://github.com/containers/containers-image-proxy-rs/issues/71
114
+ let blob_reader = blob_reader. take ( descriptor. size ( ) ) ;
115
+
116
+ let bar = self . progress . add ( ProgressBar :: new ( descriptor. size ( ) ) ) ;
117
+ bar. set_style (
118
+ ProgressStyle :: with_template (
119
+ "[eta {eta}] {bar:40.cyan/blue} {decimal_bytes:>7}/{decimal_total_bytes:7} {msg}" ,
120
+ )
121
+ . unwrap ( )
122
+ . progress_chars ( "##-" ) ,
123
+ ) ;
124
+ let progress = bar. wrap_async_read ( blob_reader) ;
125
+ self . progress
126
+ . println ( format ! ( "Fetching layer {}" , hex:: encode( layer_sha256) ) ) ?;
127
+
128
+ let mut splitstream =
129
+ self . repo
130
+ . create_stream ( Some ( * layer_sha256) , None , Some ( object_sender) ) ;
131
+ match descriptor. media_type ( ) {
132
+ MediaType :: ImageLayer => {
133
+ split_async ( progress, & mut splitstream, layer_num) . await ?;
134
+ }
135
+ MediaType :: ImageLayerGzip => {
136
+ split_async ( GzipDecoder :: new ( progress) , & mut splitstream, layer_num) . await ?;
137
+ }
138
+ MediaType :: ImageLayerZstd => {
139
+ split_async ( ZstdDecoder :: new ( progress) , & mut splitstream, layer_num) . await ?;
140
+ }
141
+ other => bail ! ( "Unsupported layer media type {:?}" , other) ,
142
+ } ;
143
+ driver. await ?;
121
144
122
- let mut splitstream = self . repo . create_stream ( Some ( * layer_sha256) , None ) ;
123
- match descriptor. media_type ( ) {
124
- MediaType :: ImageLayer => {
125
- split_async ( progress, & mut splitstream) . await ?;
126
- }
127
- MediaType :: ImageLayerGzip => {
128
- split_async ( GzipDecoder :: new ( progress) , & mut splitstream) . await ?;
129
- }
130
- MediaType :: ImageLayerZstd => {
131
- split_async ( ZstdDecoder :: new ( progress) , & mut splitstream) . await ?;
132
- }
133
- other => bail ! ( "Unsupported layer media type {:?}" , other) ,
134
- } ;
135
- let layer_id = self . repo . write_stream ( splitstream, None ) ?;
136
- driver. await ?;
137
- Ok ( layer_id)
138
- }
145
+ Ok ( ( ) )
139
146
}
140
147
141
148
pub async fn ensure_config (
@@ -154,7 +161,6 @@ impl<'repo> ImageOp<'repo> {
154
161
} else {
155
162
// We need to add the config to the repo. We need to parse the config and make sure we
156
163
// have all of the layers first.
157
- //
158
164
self . progress
159
165
. println ( format ! ( "Fetching config {}" , hex:: encode( config_sha256) ) ) ?;
160
166
@@ -169,26 +175,157 @@ impl<'repo> ImageOp<'repo> {
169
175
let raw_config = config?;
170
176
let config = ImageConfiguration :: from_reader ( & raw_config[ ..] ) ?;
171
177
178
+ let ( done_chan_sender, done_chan_recver, object_sender) = self . spawn_threads ( & config) ;
179
+
172
180
let mut config_maps = DigestMap :: new ( ) ;
173
- for ( mld, cld) in zip ( manifest_layers, config. rootfs ( ) . diff_ids ( ) ) {
181
+
182
+ for ( idx, ( mld, cld) ) in zip ( manifest_layers, config. rootfs ( ) . diff_ids ( ) ) . enumerate ( ) {
174
183
let layer_sha256 = sha256_from_digest ( cld) ?;
175
- let layer_id = self
176
- . ensure_layer ( & layer_sha256, mld)
177
- . await
178
- . with_context ( || format ! ( "Failed to fetch layer {cld} via {mld:?}" ) ) ?;
184
+
185
+ if let Some ( layer_id) = self . repo . check_stream ( & layer_sha256) ? {
186
+ self . progress
187
+ . println ( format ! ( "Already have layer {}" , hex:: encode( layer_sha256) ) ) ?;
188
+
189
+ config_maps. insert ( & layer_sha256, & layer_id) ;
190
+ } else {
191
+ self . ensure_layer ( & layer_sha256, mld, idx, object_sender. clone ( ) )
192
+ . await
193
+ . with_context ( || format ! ( "Failed to fetch layer {cld} via {mld:?}" ) ) ?;
194
+ }
195
+ }
196
+
197
+ drop ( done_chan_sender) ;
198
+
199
+ while let Ok ( res) = done_chan_recver. recv ( ) {
200
+ let ( layer_sha256, layer_id) = res?;
179
201
config_maps. insert ( & layer_sha256, & layer_id) ;
180
202
}
181
203
182
- let mut splitstream = self
183
- . repo
184
- . create_stream ( Some ( config_sha256) , Some ( config_maps) ) ;
204
+ let mut splitstream =
205
+ self . repo
206
+ . create_stream ( Some ( config_sha256) , Some ( config_maps) , None ) ;
185
207
splitstream. write_inline ( & raw_config) ;
186
208
let config_id = self . repo . write_stream ( splitstream, None ) ?;
187
209
188
210
Ok ( ( config_sha256, config_id) )
189
211
}
190
212
}
191
213
214
+ fn spawn_threads (
215
+ & self ,
216
+ config : & ImageConfiguration ,
217
+ ) -> (
218
+ ResultChannelSender ,
219
+ ResultChannelReceiver ,
220
+ crossbeam:: channel:: Sender < EnsureObjectMessages > ,
221
+ ) {
222
+ use crossbeam:: channel:: { unbounded, Receiver , Sender } ;
223
+
224
+ let encoder_threads = 2 ;
225
+ let external_object_writer_threads = 4 ;
226
+
227
+ let pool = rayon:: ThreadPoolBuilder :: new ( )
228
+ . num_threads ( encoder_threads + external_object_writer_threads)
229
+ . build ( )
230
+ . unwrap ( ) ;
231
+
232
+ // We need this as writers have internal state that can't be shared between threads
233
+ //
234
+ // We'll actually need as many writers (not writer threads, but writer instances) as there are layers.
235
+ let zstd_writer_channels: Vec < ( Sender < WriterMessages > , Receiver < WriterMessages > ) > =
236
+ ( 0 ..encoder_threads) . map ( |_| unbounded ( ) ) . collect ( ) ;
237
+
238
+ let ( object_sender, object_receiver) = unbounded :: < EnsureObjectMessages > ( ) ;
239
+
240
+ // (layer_sha256, layer_id)
241
+ let ( done_chan_sender, done_chan_recver) =
242
+ std:: sync:: mpsc:: channel :: < Result < ( Sha256HashValue , Sha256HashValue ) > > ( ) ;
243
+
244
+ let chunk_len = ( config. rootfs ( ) . diff_ids ( ) . len ( ) + encoder_threads - 1 ) / encoder_threads;
245
+
246
+ // Divide the layers into chunks of some specific size so each worker
247
+ // thread can work on multiple deterministic layers
248
+ let mut chunks: Vec < Vec < Sha256HashValue > > = config
249
+ . rootfs ( )
250
+ . diff_ids ( )
251
+ . iter ( )
252
+ . map ( |x| sha256_from_digest ( x) . unwrap ( ) )
253
+ . collect :: < Vec < Sha256HashValue > > ( )
254
+ . chunks ( chunk_len)
255
+ . map ( |x| x. to_vec ( ) )
256
+ . collect ( ) ;
257
+
258
+ // Mapping from layer_id -> index in writer_channels
259
+ // This is to make sure that all messages relating to a particular layer
260
+ // always reach the same writer
261
+ let layers_to_chunks = chunks
262
+ . iter ( )
263
+ . enumerate ( )
264
+ . map ( |( i, chunk) | std:: iter:: repeat ( i) . take ( chunk. len ( ) ) . collect :: < Vec < _ > > ( ) )
265
+ . flatten ( )
266
+ . collect :: < Vec < _ > > ( ) ;
267
+
268
+ let _ = ( 0 ..encoder_threads)
269
+ . map ( |i| {
270
+ let repository = self . repo . try_clone ( ) . unwrap ( ) ;
271
+ let object_sender = object_sender. clone ( ) ;
272
+ let done_chan_sender = done_chan_sender. clone ( ) ;
273
+ let chunk = std:: mem:: take ( & mut chunks[ i] ) ;
274
+ let receiver = zstd_writer_channels[ i] . 1 . clone ( ) ;
275
+
276
+ pool. spawn ( {
277
+ move || {
278
+ let start = i * ( chunk_len) ;
279
+ let end = start + chunk_len;
280
+
281
+ let enc = zstd_encoder:: MultipleZstdWriters :: new (
282
+ chunk,
283
+ repository,
284
+ object_sender,
285
+ done_chan_sender,
286
+ ) ;
287
+
288
+ if let Err ( e) = enc. recv_data ( receiver, start, end) {
289
+ eprintln ! ( "zstd_encoder returned with error: {}" , e. to_string( ) ) ;
290
+ return ;
291
+ }
292
+ }
293
+ } ) ;
294
+ } )
295
+ . collect :: < Vec < ( ) > > ( ) ;
296
+
297
+ let _ = ( 0 ..external_object_writer_threads)
298
+ . map ( |_| {
299
+ pool. spawn ( {
300
+ let repository = self . repo . try_clone ( ) . unwrap ( ) ;
301
+ let zstd_writer_channels = zstd_writer_channels
302
+ . iter ( )
303
+ . map ( |( s, _) | s. clone ( ) )
304
+ . collect :: < Vec < _ > > ( ) ;
305
+ let layers_to_chunks = layers_to_chunks. clone ( ) ;
306
+ let external_object_receiver = object_receiver. clone ( ) ;
307
+
308
+ move || {
309
+ if let Err ( e) = handle_external_object (
310
+ repository,
311
+ external_object_receiver,
312
+ zstd_writer_channels,
313
+ layers_to_chunks,
314
+ ) {
315
+ eprintln ! (
316
+ "handle_external_object returned with error: {}" ,
317
+ e. to_string( )
318
+ ) ;
319
+ return ;
320
+ }
321
+ }
322
+ } ) ;
323
+ } )
324
+ . collect :: < Vec < _ > > ( ) ;
325
+
326
+ return ( done_chan_sender, done_chan_recver, object_sender) ;
327
+ }
328
+
192
329
pub async fn pull ( & self ) -> Result < ( Sha256HashValue , Sha256HashValue ) > {
193
330
let ( _manifest_digest, raw_manifest) = self
194
331
. proxy
@@ -201,6 +338,7 @@ impl<'repo> ImageOp<'repo> {
201
338
let manifest = ImageManifest :: from_reader ( raw_manifest. as_slice ( ) ) ?;
202
339
let config_descriptor = manifest. config ( ) ;
203
340
let layers = manifest. layers ( ) ;
341
+
204
342
self . ensure_config ( layers, config_descriptor)
205
343
. await
206
344
. with_context ( || format ! ( "Failed to pull config {config_descriptor:?}" ) )
@@ -280,7 +418,7 @@ pub fn write_config(
280
418
let json = config. to_string ( ) ?;
281
419
let json_bytes = json. as_bytes ( ) ;
282
420
let sha256 = hash ( json_bytes) ;
283
- let mut stream = repo. create_stream ( Some ( sha256) , Some ( refs) ) ;
421
+ let mut stream = repo. create_stream ( Some ( sha256) , Some ( refs) , None ) ;
284
422
stream. write_inline ( json_bytes) ;
285
423
let id = repo. write_stream ( stream, None ) ?;
286
424
Ok ( ( sha256, id) )
0 commit comments