1
1
use crate :: error:: Result ;
2
2
use crate :: storage:: { compression:: CompressionAlgorithm , FileRange } ;
3
- use anyhow:: { bail , Context as _} ;
3
+ use anyhow:: Context as _;
4
4
use memmap2:: MmapOptions ;
5
5
use rusqlite:: { Connection , OptionalExtension } ;
6
6
use serde:: de:: DeserializeSeed ;
7
7
use serde:: de:: { IgnoredAny , MapAccess , Visitor } ;
8
8
use serde:: { Deserialize , Deserializer , Serialize } ;
9
+ use std:: io:: BufReader ;
9
10
use std:: { collections:: HashMap , fmt, fs, fs:: File , io, io:: Read , path:: Path } ;
11
+ use tempfile:: TempPath ;
10
12
11
13
use super :: sqlite_pool:: SqliteConnectionPool ;
12
14
13
15
static SQLITE_FILE_HEADER : & [ u8 ] = b"SQLite format 3\0 " ;
14
16
15
- #[ derive( Deserialize , Serialize ) ]
17
+ #[ derive( Deserialize , Serialize , PartialEq , Eq , Debug ) ]
16
18
pub ( crate ) struct FileInfo {
17
19
range : FileRange ,
18
20
compression : CompressionAlgorithm ,
@@ -27,63 +29,87 @@ impl FileInfo {
27
29
}
28
30
}
29
31
30
- #[ derive( Serialize ) ]
32
+ #[ derive( Deserialize , Serialize ) ]
31
33
struct Index {
32
34
files : HashMap < String , FileInfo > ,
33
35
}
34
36
35
- /// create an archive index based on a zipfile.
36
- ///
37
- /// Will delete the destination file if it already exists.
38
- pub ( crate ) fn create < R : io:: Read + io:: Seek , P : AsRef < Path > > (
39
- zipfile : & mut R ,
40
- destination : P ,
41
- ) -> Result < ( ) > {
42
- if destination. as_ref ( ) . exists ( ) {
43
- fs:: remove_file ( & destination) ?;
44
- }
37
+ impl Index {
38
+ pub ( crate ) fn write_sqlite < P : AsRef < Path > > ( & self , destination : P ) -> Result < ( ) > {
39
+ let destination = destination. as_ref ( ) ;
40
+ if destination. exists ( ) {
41
+ fs:: remove_file ( destination) ?;
42
+ }
45
43
46
- let mut archive = zip:: ZipArchive :: new ( zipfile) ?;
44
+ let conn = rusqlite:: Connection :: open ( destination) ?;
45
+ conn. execute ( "PRAGMA synchronous = FULL" , ( ) ) ?;
46
+ conn. execute ( "BEGIN" , ( ) ) ?;
47
+ conn. execute (
48
+ "
49
+ CREATE TABLE files (
50
+ id INTEGER PRIMARY KEY,
51
+ path TEXT UNIQUE,
52
+ start INTEGER,
53
+ end INTEGER,
54
+ compression INTEGER
55
+ );
56
+ " ,
57
+ ( ) ,
58
+ ) ?;
47
59
48
- let conn = rusqlite:: Connection :: open ( & destination) ?;
49
- conn. execute ( "PRAGMA synchronous = FULL" , ( ) ) ?;
50
- conn. execute ( "BEGIN" , ( ) ) ?;
51
- conn. execute (
52
- "
53
- CREATE TABLE files (
54
- id INTEGER PRIMARY KEY,
55
- path TEXT UNIQUE,
56
- start INTEGER,
57
- end INTEGER,
58
- compression INTEGER
59
- );
60
- " ,
61
- ( ) ,
62
- ) ?;
60
+ for ( name, info) in self . files . iter ( ) {
61
+ conn. execute (
62
+ "INSERT INTO files (path, start, end, compression) VALUES (?, ?, ?, ?)" ,
63
+ (
64
+ name,
65
+ info. range . start ( ) ,
66
+ info. range . end ( ) ,
67
+ info. compression as i32 ,
68
+ ) ,
69
+ ) ?;
70
+ }
63
71
64
- for i in 0 ..archive. len ( ) {
65
- let zf = archive. by_index ( i) ?;
72
+ conn. execute ( "CREATE INDEX idx_files_path ON files (path);" , ( ) ) ?;
73
+ conn. execute ( "END" , ( ) ) ?;
74
+ conn. execute ( "VACUUM" , ( ) ) ?;
75
+ Ok ( ( ) )
76
+ }
66
77
67
- let compression_bzip = CompressionAlgorithm :: Bzip2 as i32 ;
78
+ pub ( crate ) fn from_zip < R : io:: Read + io:: Seek > ( zipfile : & mut R ) -> Result < Self > {
79
+ let mut archive = zip:: ZipArchive :: new ( zipfile) ?;
68
80
69
- conn. execute (
70
- "INSERT INTO files (path, start, end, compression) VALUES (?, ?, ?, ?)" ,
71
- (
72
- zf. name ( ) ,
73
- zf. data_start ( ) ,
74
- zf. data_start ( ) + zf. compressed_size ( ) - 1 ,
75
- match zf. compression ( ) {
76
- zip:: CompressionMethod :: Bzip2 => compression_bzip,
77
- c => bail ! ( "unsupported compression algorithm {} in zip-file" , c) ,
81
+ let mut index = Index {
82
+ files : HashMap :: with_capacity ( archive. len ( ) ) ,
83
+ } ;
84
+
85
+ for i in 0 ..archive. len ( ) {
86
+ let zf = archive. by_index ( i) ?;
87
+
88
+ index. files . insert (
89
+ zf. name ( ) . to_owned ( ) ,
90
+ FileInfo {
91
+ range : FileRange :: new (
92
+ zf. data_start ( ) ,
93
+ zf. data_start ( ) + zf. compressed_size ( ) - 1 ,
94
+ ) ,
95
+ compression : CompressionAlgorithm :: Bzip2 ,
78
96
} ,
79
- ) ,
80
- ) ?;
97
+ ) ;
98
+ }
99
+ Ok ( index)
81
100
}
101
+ }
82
102
83
- conn. execute ( "CREATE INDEX idx_files_path ON files (path);" , ( ) ) ?;
84
- conn. execute ( "END" , ( ) ) ?;
85
- conn. execute ( "VACUUM" , ( ) ) ?;
86
-
103
+ /// create an archive index based on a zipfile.
104
+ ///
105
+ /// Will delete the destination file if it already exists.
106
+ pub ( crate ) fn create < R : io:: Read + io:: Seek , P : AsRef < Path > > (
107
+ zipfile : & mut R ,
108
+ destination : P ,
109
+ ) -> Result < ( ) > {
110
+ Index :: from_zip ( zipfile) ?
111
+ . write_sqlite ( & destination)
112
+ . context ( "error writing SQLite index" ) ?;
87
113
Ok ( ( ) )
88
114
}
89
115
@@ -227,7 +253,7 @@ fn find_in_sqlite_index(conn: &Connection, search_for: &str) -> Result<Option<Fi
227
253
/// > OFFSET SIZE DESCRIPTION
228
254
/// > 0 16 Header string: "SQLite format 3\000"
229
255
/// > [...]
230
- fn is_sqlite_file < P : AsRef < Path > > ( archive_index_path : P ) -> Result < bool > {
256
+ pub ( crate ) fn is_sqlite_file < P : AsRef < Path > > ( archive_index_path : P ) -> Result < bool > {
231
257
let mut f = File :: open ( archive_index_path) ?;
232
258
233
259
let mut buffer = [ 0 ; 16 ] ;
@@ -259,6 +285,20 @@ pub(crate) fn find_in_file<P: AsRef<Path>>(
259
285
}
260
286
}
261
287
288
+ pub ( crate ) fn convert_to_sqlite_index < P : AsRef < Path > > ( path : P ) -> Result < TempPath > {
289
+ let path = path. as_ref ( ) ;
290
+ let index: Index = { serde_cbor:: from_reader ( BufReader :: new ( File :: open ( path) ?) ) ? } ;
291
+
292
+ // write the new index into a temporary file so reads from ongoing requests
293
+ // can continue on the old index until the new one is fully written.
294
+ let tmp_path = tempfile:: NamedTempFile :: new ( ) ?. into_temp_path ( ) ;
295
+ index
296
+ . write_sqlite ( & tmp_path)
297
+ . context ( "error writing SQLite index" ) ?;
298
+
299
+ Ok ( tmp_path)
300
+ }
301
+
262
302
#[ cfg( test) ]
263
303
mod tests {
264
304
use super :: * ;
@@ -270,29 +310,7 @@ mod tests {
270
310
zipfile : & mut R ,
271
311
writer : & mut W ,
272
312
) -> Result < ( ) > {
273
- let mut archive = zip:: ZipArchive :: new ( zipfile) ?;
274
-
275
- // get file locations
276
- let mut files: HashMap < String , FileInfo > = HashMap :: with_capacity ( archive. len ( ) ) ;
277
- for i in 0 ..archive. len ( ) {
278
- let zf = archive. by_index ( i) ?;
279
-
280
- files. insert (
281
- zf. name ( ) . to_string ( ) ,
282
- FileInfo {
283
- range : FileRange :: new (
284
- zf. data_start ( ) ,
285
- zf. data_start ( ) + zf. compressed_size ( ) - 1 ,
286
- ) ,
287
- compression : match zf. compression ( ) {
288
- zip:: CompressionMethod :: Bzip2 => CompressionAlgorithm :: Bzip2 ,
289
- c => bail ! ( "unsupported compression algorithm {} in zip-file" , c) ,
290
- } ,
291
- } ,
292
- ) ;
293
- }
294
-
295
- serde_cbor:: to_writer ( writer, & Index { files } ) . context ( "serialization error" )
313
+ serde_cbor:: to_writer ( writer, & Index :: from_zip ( zipfile) ?) . context ( "serialization error" )
296
314
}
297
315
298
316
fn create_test_archive ( ) -> fs:: File {
@@ -312,6 +330,38 @@ mod tests {
312
330
tf
313
331
}
314
332
333
+ #[ test]
334
+ fn convert_to_sqlite ( ) {
335
+ let mut tf = create_test_archive ( ) ;
336
+ let mut cbor_buf = Vec :: new ( ) ;
337
+ create_cbor_index ( & mut tf, & mut cbor_buf) . unwrap ( ) ;
338
+ let mut cbor_index_file = tempfile:: NamedTempFile :: new ( ) . unwrap ( ) ;
339
+ io:: copy ( & mut & cbor_buf[ ..] , & mut cbor_index_file) . unwrap ( ) ;
340
+
341
+ assert ! ( !is_sqlite_file( & cbor_index_file) . unwrap( ) ) ;
342
+
343
+ let original_fi = find_in_file (
344
+ cbor_index_file. path ( ) ,
345
+ "testfile1" ,
346
+ & SqliteConnectionPool :: default ( ) ,
347
+ )
348
+ . unwrap ( )
349
+ . unwrap ( ) ;
350
+
351
+ let sqlite_index_file = convert_to_sqlite_index ( cbor_index_file) . unwrap ( ) ;
352
+ assert ! ( is_sqlite_file( & sqlite_index_file) . unwrap( ) ) ;
353
+
354
+ let migrated_fi = find_in_file (
355
+ sqlite_index_file,
356
+ "testfile1" ,
357
+ & SqliteConnectionPool :: default ( ) ,
358
+ )
359
+ . unwrap ( )
360
+ . unwrap ( ) ;
361
+
362
+ assert_eq ! ( migrated_fi, original_fi) ;
363
+ }
364
+
315
365
#[ test]
316
366
fn index_create_save_load_cbor_direct ( ) {
317
367
let mut tf = create_test_archive ( ) ;
0 commit comments