@@ -18,8 +18,12 @@ use crate::{
18
18
fsverity:: Sha256HashValue ,
19
19
oci:: tar:: { get_entry, split_async} ,
20
20
repository:: Repository ,
21
- splitstream:: DigestMap ,
21
+ splitstream:: {
22
+ handle_external_object, DigestMap , EnsureObjectMessages , ResultChannelReceiver ,
23
+ ResultChannelSender , WriterMessages ,
24
+ } ,
22
25
util:: parse_sha256,
26
+ zstd_encoder,
23
27
} ;
24
28
25
29
pub fn import_layer (
@@ -83,6 +87,7 @@ impl<'repo> ImageOp<'repo> {
83
87
let proxy = containers_image_proxy:: ImageProxy :: new_with_config ( config) . await ?;
84
88
let img = proxy. open_image ( imgref) . await . context ( "Opening image" ) ?;
85
89
let progress = MultiProgress :: new ( ) ;
90
+
86
91
Ok ( ImageOp {
87
92
repo,
88
93
proxy,
@@ -95,47 +100,49 @@ impl<'repo> ImageOp<'repo> {
95
100
& self ,
96
101
layer_sha256 : & Sha256HashValue ,
97
102
descriptor : & Descriptor ,
98
- ) -> Result < Sha256HashValue > {
103
+ layer_num : usize ,
104
+ object_sender : crossbeam:: channel:: Sender < EnsureObjectMessages > ,
105
+ ) -> Result < ( ) > {
99
106
// We need to use the per_manifest descriptor to download the compressed layer but it gets
100
107
// stored in the repository via the per_config descriptor. Our return value is the
101
108
// fsverity digest for the corresponding splitstream.
102
109
103
- if let Some ( layer_id) = self . repo . check_stream ( layer_sha256) ? {
104
- self . progress
105
- . println ( format ! ( "Already have layer {}" , hex:: encode( layer_sha256) ) ) ?;
106
- Ok ( layer_id)
107
- } else {
108
- // Otherwise, we need to fetch it...
109
- let ( blob_reader, driver) = self . proxy . get_descriptor ( & self . img , descriptor) . await ?;
110
-
111
- // See https://github.com/containers/containers-image-proxy-rs/issues/71
112
- let blob_reader = blob_reader. take ( descriptor. size ( ) ) ;
113
-
114
- let bar = self . progress . add ( ProgressBar :: new ( descriptor. size ( ) ) ) ;
115
- bar. set_style ( ProgressStyle :: with_template ( "[eta {eta}] {bar:40.cyan/blue} {decimal_bytes:>7}/{decimal_total_bytes:7} {msg}" )
116
- . unwrap ( )
117
- . progress_chars ( "##-" ) ) ;
118
- let progress = bar. wrap_async_read ( blob_reader) ;
119
- self . progress
120
- . println ( format ! ( "Fetching layer {}" , hex:: encode( layer_sha256) ) ) ?;
110
+ // Otherwise, we need to fetch it...
111
+ let ( blob_reader, driver) = self . proxy . get_descriptor ( & self . img , descriptor) . await ?;
112
+
113
+ // See https://github.com/containers/containers-image-proxy-rs/issues/71
114
+ let blob_reader = blob_reader. take ( descriptor. size ( ) ) ;
115
+
116
+ let bar = self . progress . add ( ProgressBar :: new ( descriptor. size ( ) ) ) ;
117
+ bar. set_style (
118
+ ProgressStyle :: with_template (
119
+ "[eta {eta}] {bar:40.cyan/blue} {decimal_bytes:>7}/{decimal_total_bytes:7} {msg}" ,
120
+ )
121
+ . unwrap ( )
122
+ . progress_chars ( "##-" ) ,
123
+ ) ;
124
+ let progress = bar. wrap_async_read ( blob_reader) ;
125
+ self . progress
126
+ . println ( format ! ( "Fetching layer {}" , hex:: encode( layer_sha256) ) ) ?;
127
+
128
+ let mut splitstream =
129
+ self . repo
130
+ . create_stream ( Some ( * layer_sha256) , None , Some ( object_sender) ) ;
131
+ match descriptor. media_type ( ) {
132
+ MediaType :: ImageLayer => {
133
+ split_async ( progress, & mut splitstream, layer_num) . await ?;
134
+ }
135
+ MediaType :: ImageLayerGzip => {
136
+ split_async ( GzipDecoder :: new ( progress) , & mut splitstream, layer_num) . await ?;
137
+ }
138
+ MediaType :: ImageLayerZstd => {
139
+ split_async ( ZstdDecoder :: new ( progress) , & mut splitstream, layer_num) . await ?;
140
+ }
141
+ other => bail ! ( "Unsupported layer media type {:?}" , other) ,
142
+ } ;
143
+ driver. await ?;
121
144
122
- let mut splitstream = self . repo . create_stream ( Some ( * layer_sha256) , None ) ;
123
- match descriptor. media_type ( ) {
124
- MediaType :: ImageLayer => {
125
- split_async ( progress, & mut splitstream) . await ?;
126
- }
127
- MediaType :: ImageLayerGzip => {
128
- split_async ( GzipDecoder :: new ( progress) , & mut splitstream) . await ?;
129
- }
130
- MediaType :: ImageLayerZstd => {
131
- split_async ( ZstdDecoder :: new ( progress) , & mut splitstream) . await ?;
132
- }
133
- other => bail ! ( "Unsupported layer media type {:?}" , other) ,
134
- } ;
135
- let layer_id = self . repo . write_stream ( splitstream, None ) ?;
136
- driver. await ?;
137
- Ok ( layer_id)
138
- }
145
+ Ok ( ( ) )
139
146
}
140
147
141
148
pub async fn ensure_config (
@@ -154,32 +161,162 @@ impl<'repo> ImageOp<'repo> {
154
161
} else {
155
162
// We need to add the config to the repo. We need to parse the config and make sure we
156
163
// have all of the layers first.
157
- //
158
164
self . progress
159
165
. println ( format ! ( "Fetching config {}" , hex:: encode( config_sha256) ) ) ?;
160
166
let raw_config = self . proxy . fetch_config_raw ( & self . img ) . await ?;
161
167
let config = ImageConfiguration :: from_reader ( raw_config. as_slice ( ) ) ?;
162
168
169
+ let ( done_chan_sender, done_chan_recver, object_sender) = self . spawn_threads ( & config) ;
170
+
163
171
let mut config_maps = DigestMap :: new ( ) ;
164
- for ( mld, cld) in zip ( manifest_layers, config. rootfs ( ) . diff_ids ( ) ) {
172
+
173
+ for ( idx, ( mld, cld) ) in zip ( manifest_layers, config. rootfs ( ) . diff_ids ( ) ) . enumerate ( ) {
165
174
let layer_sha256 = sha256_from_digest ( cld) ?;
166
- let layer_id = self
167
- . ensure_layer ( & layer_sha256, mld)
168
- . await
169
- . with_context ( || format ! ( "Failed to fetch layer {cld} via {mld:?}" ) ) ?;
175
+
176
+ if let Some ( layer_id) = self . repo . check_stream ( & layer_sha256) ? {
177
+ self . progress
178
+ . println ( format ! ( "Already have layer {}" , hex:: encode( layer_sha256) ) ) ?;
179
+
180
+ config_maps. insert ( & layer_sha256, & layer_id) ;
181
+ } else {
182
+ self . ensure_layer ( & layer_sha256, mld, idx, object_sender. clone ( ) )
183
+ . await
184
+ . with_context ( || format ! ( "Failed to fetch layer {cld} via {mld:?}" ) ) ?;
185
+ }
186
+ }
187
+
188
+ drop ( done_chan_sender) ;
189
+
190
+ while let Ok ( res) = done_chan_recver. recv ( ) {
191
+ let ( layer_sha256, layer_id) = res?;
170
192
config_maps. insert ( & layer_sha256, & layer_id) ;
171
193
}
172
194
173
- let mut splitstream = self
174
- . repo
175
- . create_stream ( Some ( config_sha256) , Some ( config_maps) ) ;
195
+ let mut splitstream =
196
+ self . repo
197
+ . create_stream ( Some ( config_sha256) , Some ( config_maps) , None ) ;
176
198
splitstream. write_inline ( & raw_config) ;
177
199
let config_id = self . repo . write_stream ( splitstream, None ) ?;
178
200
179
201
Ok ( ( config_sha256, config_id) )
180
202
}
181
203
}
182
204
205
+ fn spawn_threads (
206
+ & self ,
207
+ config : & ImageConfiguration ,
208
+ ) -> (
209
+ ResultChannelSender ,
210
+ ResultChannelReceiver ,
211
+ crossbeam:: channel:: Sender < EnsureObjectMessages > ,
212
+ ) {
213
+ use crossbeam:: channel:: { unbounded, Receiver , Sender } ;
214
+
215
+ let encoder_threads = 2 ;
216
+ let external_object_writer_threads = 4 ;
217
+
218
+ let pool = rayon:: ThreadPoolBuilder :: new ( )
219
+ . num_threads ( encoder_threads + external_object_writer_threads)
220
+ . build ( )
221
+ . unwrap ( ) ;
222
+
223
+ // We need this as writers have internal state that can't be shared between threads
224
+ //
225
+ // We'll actually need as many writers (not writer threads, but writer instances) as there are layers.
226
+ let zstd_writer_channels: Vec < ( Sender < WriterMessages > , Receiver < WriterMessages > ) > =
227
+ ( 0 ..encoder_threads) . map ( |_| unbounded ( ) ) . collect ( ) ;
228
+
229
+ let ( object_sender, object_receiver) = unbounded :: < EnsureObjectMessages > ( ) ;
230
+
231
+ // (layer_sha256, layer_id)
232
+ let ( done_chan_sender, done_chan_recver) =
233
+ std:: sync:: mpsc:: channel :: < Result < ( Sha256HashValue , Sha256HashValue ) > > ( ) ;
234
+
235
+ let chunk_len = ( config. rootfs ( ) . diff_ids ( ) . len ( ) + encoder_threads - 1 ) / encoder_threads;
236
+
237
+ // Divide the layers into chunks of some specific size so each worker
238
+ // thread can work on multiple deterministic layers
239
+ let mut chunks: Vec < Vec < Sha256HashValue > > = config
240
+ . rootfs ( )
241
+ . diff_ids ( )
242
+ . iter ( )
243
+ . map ( |x| sha256_from_digest ( x) . unwrap ( ) )
244
+ . collect :: < Vec < Sha256HashValue > > ( )
245
+ . chunks ( chunk_len)
246
+ . map ( |x| x. to_vec ( ) )
247
+ . collect ( ) ;
248
+
249
+ // Mapping from layer_id -> index in writer_channels
250
+ // This is to make sure that all messages relating to a particular layer
251
+ // always reach the same writer
252
+ let layers_to_chunks = chunks
253
+ . iter ( )
254
+ . enumerate ( )
255
+ . map ( |( i, chunk) | std:: iter:: repeat ( i) . take ( chunk. len ( ) ) . collect :: < Vec < _ > > ( ) )
256
+ . flatten ( )
257
+ . collect :: < Vec < _ > > ( ) ;
258
+
259
+ let _ = ( 0 ..encoder_threads)
260
+ . map ( |i| {
261
+ let repository = self . repo . try_clone ( ) . unwrap ( ) ;
262
+ let object_sender = object_sender. clone ( ) ;
263
+ let done_chan_sender = done_chan_sender. clone ( ) ;
264
+ let chunk = std:: mem:: take ( & mut chunks[ i] ) ;
265
+ let receiver = zstd_writer_channels[ i] . 1 . clone ( ) ;
266
+
267
+ pool. spawn ( {
268
+ move || {
269
+ let start = i * ( chunk_len) ;
270
+ let end = start + chunk_len;
271
+
272
+ let enc = zstd_encoder:: MultipleZstdWriters :: new (
273
+ chunk,
274
+ repository,
275
+ object_sender,
276
+ done_chan_sender,
277
+ ) ;
278
+
279
+ if let Err ( e) = enc. recv_data ( receiver, start, end) {
280
+ eprintln ! ( "zstd_encoder returned with error: {}" , e. to_string( ) ) ;
281
+ return ;
282
+ }
283
+ }
284
+ } ) ;
285
+ } )
286
+ . collect :: < Vec < ( ) > > ( ) ;
287
+
288
+ let _ = ( 0 ..external_object_writer_threads)
289
+ . map ( |_| {
290
+ pool. spawn ( {
291
+ let repository = self . repo . try_clone ( ) . unwrap ( ) ;
292
+ let zstd_writer_channels = zstd_writer_channels
293
+ . iter ( )
294
+ . map ( |( s, _) | s. clone ( ) )
295
+ . collect :: < Vec < _ > > ( ) ;
296
+ let layers_to_chunks = layers_to_chunks. clone ( ) ;
297
+ let external_object_receiver = object_receiver. clone ( ) ;
298
+
299
+ move || {
300
+ if let Err ( e) = handle_external_object (
301
+ repository,
302
+ external_object_receiver,
303
+ zstd_writer_channels,
304
+ layers_to_chunks,
305
+ ) {
306
+ eprintln ! (
307
+ "handle_external_object returned with error: {}" ,
308
+ e. to_string( )
309
+ ) ;
310
+ return ;
311
+ }
312
+ }
313
+ } ) ;
314
+ } )
315
+ . collect :: < Vec < _ > > ( ) ;
316
+
317
+ return ( done_chan_sender, done_chan_recver, object_sender) ;
318
+ }
319
+
183
320
pub async fn pull ( & self ) -> Result < ( Sha256HashValue , Sha256HashValue ) > {
184
321
let ( _manifest_digest, raw_manifest) = self
185
322
. proxy
@@ -192,6 +329,7 @@ impl<'repo> ImageOp<'repo> {
192
329
let manifest = ImageManifest :: from_reader ( raw_manifest. as_slice ( ) ) ?;
193
330
let config_descriptor = manifest. config ( ) ;
194
331
let layers = manifest. layers ( ) ;
332
+
195
333
self . ensure_config ( layers, config_descriptor)
196
334
. await
197
335
. with_context ( || format ! ( "Failed to pull config {config_descriptor:?}" ) )
@@ -271,7 +409,7 @@ pub fn write_config(
271
409
let json = config. to_string ( ) ?;
272
410
let json_bytes = json. as_bytes ( ) ;
273
411
let sha256 = hash ( json_bytes) ;
274
- let mut stream = repo. create_stream ( Some ( sha256) , Some ( refs) ) ;
412
+ let mut stream = repo. create_stream ( Some ( sha256) , Some ( refs) , None ) ;
275
413
stream. write_inline ( json_bytes) ;
276
414
let id = repo. write_stream ( stream, None ) ?;
277
415
Ok ( ( sha256, id) )
0 commit comments