1
+ use std:: fs:: File ;
2
+ use std:: num:: NonZeroUsize ;
3
+ use std:: ptr:: NonNull ;
4
+
1
5
use crate :: config:: Config ;
2
6
use crate :: crates:: Crate ;
3
7
use crate :: experiments:: Experiment ;
@@ -7,6 +11,52 @@ use crate::results::{EncodedLog, EncodingType, ReadResults};
7
11
use flate2:: { write:: GzEncoder , Compression } ;
8
12
use indexmap:: IndexMap ;
9
13
use tar:: { Builder as TarBuilder , Header as TarHeader } ;
14
+ use tempfile:: tempfile;
15
+
16
+ #[ cfg( unix) ]
17
+ struct TempfileBackedBuffer {
18
+ _file : File ,
19
+ mmap : NonNull < [ u8 ] > ,
20
+ }
21
+
22
+ #[ cfg( unix) ]
23
+ impl TempfileBackedBuffer {
24
+ fn new ( file : File ) -> Fallible < TempfileBackedBuffer > {
25
+ let len = file. metadata ( ) ?. len ( ) . try_into ( ) . unwrap ( ) ;
26
+ unsafe {
27
+ let base = nix:: sys:: mman:: mmap (
28
+ None ,
29
+ NonZeroUsize :: new ( len) . unwrap ( ) ,
30
+ nix:: sys:: mman:: ProtFlags :: PROT_READ ,
31
+ nix:: sys:: mman:: MapFlags :: MAP_PRIVATE ,
32
+ Some ( & file) ,
33
+ 0 ,
34
+ ) ?;
35
+ let Some ( base) = NonNull :: new ( base as * mut u8 ) else {
36
+ panic ! ( "Failed to map file" ) ;
37
+ } ;
38
+ Ok ( TempfileBackedBuffer {
39
+ _file : file,
40
+ mmap : NonNull :: slice_from_raw_parts ( base, len) ,
41
+ } )
42
+ }
43
+ }
44
+
45
+ fn buffer ( & self ) -> & [ u8 ] {
46
+ unsafe { self . mmap . as_ref ( ) }
47
+ }
48
+ }
49
+
50
+ #[ cfg( unix) ]
51
+ impl Drop for TempfileBackedBuffer {
52
+ fn drop ( & mut self ) {
53
+ unsafe {
54
+ if let Err ( e) = nix:: sys:: mman:: munmap ( self . mmap . as_ptr ( ) as * mut _ , self . mmap . len ( ) ) {
55
+ eprintln ! ( "Failed to unmap temporary file: {:?}" , e) ;
56
+ }
57
+ }
58
+ }
59
+ }
10
60
11
61
#[ derive( Serialize ) ]
12
62
pub struct Archive {
@@ -92,6 +142,7 @@ fn iterate<'a, DB: ReadResults + 'a>(
92
142
} )
93
143
}
94
144
145
+ #[ allow( unused_mut) ]
95
146
fn write_all_archive < DB : ReadResults , W : ReportWriter > (
96
147
db : & DB ,
97
148
ex : & Experiment ,
@@ -100,18 +151,37 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
100
151
config : & Config ,
101
152
) -> Fallible < Archive > {
102
153
for i in 1 ..=RETRIES {
103
- let mut all = TarBuilder :: new ( GzEncoder :: new ( Vec :: new ( ) , Compression :: default ( ) ) ) ;
154
+ // We write this large-ish tarball into a tempfile, which moves the I/O to disk operations
155
+ // rather than keeping it in memory. This avoids complicating the code by doing incremental
156
+ // writes to S3 (requiring buffer management etc) while avoiding keeping the blob entirely
157
+ // in memory.
158
+ let backing = tempfile ( ) ?;
159
+ let mut all = TarBuilder :: new ( GzEncoder :: new ( backing, Compression :: default ( ) ) ) ;
104
160
for entry in iterate ( db, ex, crates, config) {
105
161
let entry = entry?;
106
162
let mut header = entry. header ( ) ;
107
163
all. append_data ( & mut header, & entry. path , & entry. log_bytes [ ..] ) ?;
108
164
}
109
165
110
- let data = all. into_inner ( ) ?. finish ( ) ?;
111
- let len = data. len ( ) ;
166
+ let mut data = all. into_inner ( ) ?. finish ( ) ?;
167
+ let mut buffer;
168
+ let view;
169
+ #[ cfg( unix) ]
170
+ {
171
+ buffer = TempfileBackedBuffer :: new ( data) ?;
172
+ view = buffer. buffer ( ) ;
173
+ }
174
+ #[ cfg( not( unix) ) ]
175
+ {
176
+ use std:: io:: { Read , Seek } ;
177
+ data. rewind ( ) ?;
178
+ buffer = Vec :: new ( ) ;
179
+ data. read_to_end ( & mut buffer) ?;
180
+ view = & buffer[ ..] ;
181
+ }
112
182
match dest. write_bytes (
113
183
"logs-archives/all.tar.gz" ,
114
- data ,
184
+ view ,
115
185
& "application/gzip" . parse ( ) . unwrap ( ) ,
116
186
EncodingType :: Plain ,
117
187
) {
@@ -123,7 +193,10 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
123
193
std:: thread:: sleep ( std:: time:: Duration :: from_secs ( 2 ) ) ;
124
194
warn ! (
125
195
"retry ({}/{}) writing logs-archives/all.tar.gz ({} bytes) (error: {:?})" ,
126
- i, RETRIES , len, e,
196
+ i,
197
+ RETRIES ,
198
+ view. len( ) ,
199
+ e,
127
200
) ;
128
201
continue ;
129
202
}
@@ -164,7 +237,7 @@ pub fn write_logs_archives<DB: ReadResults, W: ReportWriter>(
164
237
let data = archive. into_inner ( ) ?. finish ( ) ?;
165
238
dest. write_bytes (
166
239
format ! ( "logs-archives/{comparison}.tar.gz" ) ,
167
- data,
240
+ & data,
168
241
& "application/gzip" . parse ( ) . unwrap ( ) ,
169
242
EncodingType :: Plain ,
170
243
) ?;
0 commit comments