Skip to content

Commit ccaa77e

Browse files
committed
irmin-pack: remember expected mapping size in upper control file
1 parent 794b7f9 commit ccaa77e

File tree

9 files changed

+99
-70
lines changed

9 files changed

+99
-70
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
- Unhandled exceptions in GC worker process are now reported as a failure
4444
(#2163, @metanivek)
4545
- Fix the silent mode for the integrity checks. (#2179, @icristescu)
46+
- Fix file descriptor leak caused by `mmap`. (#2232, @art-w)
4647

4748
## 3.6.1 (2023-03-15)
4849

src/irmin-pack/unix/control_file.ml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ module Serde = struct
180180
generation = x.generation;
181181
latest_gc_target_offset = x.suffix_start_offset;
182182
suffix_dead_bytes = Int63.zero;
183+
mapping_end_poff = None;
183184
}
184185
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13
185186
| T14 | T15 ->
@@ -200,14 +201,33 @@ module Serde = struct
200201
volume_num = 0;
201202
}
202203

204+
let upgrade_status_from_v4 = function
205+
| Payload.Upper.V4.From_v1_v2_post_upgrade x ->
206+
Latest.From_v1_v2_post_upgrade x
207+
| No_gc_yet -> No_gc_yet
208+
| Used_non_minimal_indexing_strategy -> Used_non_minimal_indexing_strategy
209+
| Gced x ->
210+
Gced
211+
{
212+
suffix_start_offset = x.suffix_start_offset;
213+
generation = x.generation;
214+
latest_gc_target_offset = x.latest_gc_target_offset;
215+
suffix_dead_bytes = x.suffix_dead_bytes;
216+
mapping_end_poff = None;
217+
}
218+
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
219+
| T15 ->
220+
(* Unreachable *)
221+
assert false
222+
203223
let upgrade_from_v4 (pl : Payload.Upper.V4.t) : payload =
204224
{
205225
dict_end_poff = pl.dict_end_poff;
206226
appendable_chunk_poff = pl.appendable_chunk_poff;
207227
checksum = Int63.zero;
208228
chunk_start_idx = pl.chunk_start_idx;
209229
chunk_num = pl.chunk_num;
210-
status = pl.status;
230+
status = upgrade_status_from_v4 pl.status;
211231
upgraded_from = Some (Version.to_int `V4);
212232
volume_num = 0;
213233
}

src/irmin-pack/unix/control_file_intf.ml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,15 +193,16 @@ module Payload = struct
193193
end
194194

195195
module V5 = struct
196-
type gced = V4.gced = {
196+
type gced = {
197197
suffix_start_offset : int63;
198198
generation : int;
199199
latest_gc_target_offset : int63;
200200
suffix_dead_bytes : int63;
201+
mapping_end_poff : int63 option;
201202
}
202203
[@@deriving irmin]
203204

204-
type status = V4.status =
205+
type status =
205206
| From_v1_v2_post_upgrade of V3.from_v1_v2_post_upgrade
206207
| No_gc_yet
207208
| Used_non_minimal_indexing_strategy
@@ -239,6 +240,8 @@ module Payload = struct
239240
New fields
240241
241242
- [volume_num] stores the number of volumes in the lower layer.
243+
- [mapping_end_poff] stores the mapping file size (optional if missing
244+
or unknown after a migration from V4).
242245
243246
Changed fields
244247

src/irmin-pack/unix/file_manager.ml

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,13 @@ struct
7777
let register_suffix_consumer t ~after_flush =
7878
t.suffix_consumers <- { after_flush } :: t.suffix_consumers
7979

80-
let generation = function
81-
| Payload.From_v1_v2_post_upgrade _ | Used_non_minimal_indexing_strategy
82-
| No_gc_yet ->
83-
0
84-
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
85-
| T15 ->
86-
(* Unreachable *)
87-
assert false
88-
| Gced x -> x.generation
80+
let get_gced = function Payload.Gced x -> Some x | _ -> None
81+
82+
let generation payload =
83+
match get_gced payload with Some x -> x.generation | None -> 0
84+
85+
let mapping_size payload =
86+
match get_gced payload with Some x -> x.mapping_end_poff | None -> None
8987

9088
let notify_reload_consumers consumers =
9189
List.fold_left
@@ -215,20 +213,24 @@ struct
215213

216214
module Layout = Irmin_pack.Layout.V5
217215

218-
let open_prefix ~root ~generation =
216+
let open_prefix ~root ~generation ~mapping_size =
219217
let open Result_syntax in
220218
if generation = 0 then Ok None
221219
else
222220
let mapping = Layout.mapping ~generation ~root in
223221
let data = Layout.prefix ~root ~generation in
224-
let* mapping_size = Io.size_of_path mapping in
222+
let* mapping_size =
223+
match mapping_size with
224+
| Some size -> Ok size
225+
| None -> Io.size_of_path mapping
226+
in
225227
let mapping_size = Int63.to_int mapping_size in
226228
let+ prefix = Sparse.open_ro ~mapping_size ~mapping ~data in
227229
Some prefix
228230

229-
let reopen_prefix t ~generation =
231+
let reopen_prefix t ~generation ~mapping_size =
230232
let open Result_syntax in
231-
let* some_prefix = open_prefix ~root:t.root ~generation in
233+
let* some_prefix = open_prefix ~root:t.root ~generation ~mapping_size in
232234
match some_prefix with
233235
| None -> Ok ()
234236
| Some _ ->
@@ -312,12 +314,12 @@ struct
312314
let use_fsync = Irmin_pack.Conf.use_fsync config in
313315
let indexing_strategy = Conf.indexing_strategy config in
314316
let pl : Payload.t = Control.payload control in
315-
let generation =
317+
let generation, mapping_size =
316318
match pl.status with
317319
| From_v1_v2_post_upgrade _ | No_gc_yet
318320
| Used_non_minimal_indexing_strategy ->
319-
0
320-
| Gced x -> x.generation
321+
(0, None)
322+
| Gced x -> (x.generation, x.mapping_end_poff)
321323
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
322324
| T15 ->
323325
assert false
@@ -350,7 +352,7 @@ struct
350352
let cb _ = suffix_requires_a_flush_exn (get_instance ()) in
351353
make_suffix ~auto_flush_threshold ~auto_flush_procedure:(`External cb)
352354
in
353-
let* prefix = open_prefix ~root ~generation in
355+
let* prefix = open_prefix ~root ~generation ~mapping_size in
354356
let* dict =
355357
let path = Layout.dict ~root in
356358
let auto_flush_threshold =
@@ -437,7 +439,10 @@ struct
437439
in
438440
(* Step 3.2. Potentially reload prefix *)
439441
let* () =
440-
if gen0 = gen1 then Ok () else reopen_prefix t ~generation:gen1
442+
if gen0 = gen1 then Ok ()
443+
else
444+
reopen_prefix t ~generation:gen1
445+
~mapping_size:(mapping_size pl1.status)
441446
in
442447
(* Step 3.3. Potentially reload lower *)
443448
if gen0 = gen1 && pl0.volume_num = pl1.volume_num then Ok ()
@@ -577,6 +582,7 @@ struct
577582
generation;
578583
latest_gc_target_offset = Int63.zero;
579584
suffix_dead_bytes = Int63.zero;
585+
mapping_end_poff = Some Int63.zero;
580586
};
581587
}
582588
in
@@ -768,7 +774,9 @@ struct
768774
Suffix.open_ro ~root ~appendable_chunk_poff ~start_idx ~chunk_num
769775
~dead_header_size
770776
in
771-
let* prefix = open_prefix ~root ~generation in
777+
let* prefix =
778+
open_prefix ~root ~generation ~mapping_size:(mapping_size status)
779+
in
772780
let* dict =
773781
let path = Layout.dict ~root in
774782
Dict.open_ro ~path ~end_poff:dict_end_poff ~dead_header_size
@@ -833,8 +841,8 @@ struct
833841
| `Unknown_major_pack_version _ ) as e ->
834842
e)
835843

836-
let swap t ~generation ~suffix_start_offset ~chunk_start_idx ~chunk_num
837-
~suffix_dead_bytes ~latest_gc_target_offset ~volume =
844+
let swap t ~generation ~mapping_size ~suffix_start_offset ~chunk_start_idx
845+
~chunk_num ~suffix_dead_bytes ~latest_gc_target_offset ~volume =
838846
let open Result_syntax in
839847
[%log.debug
840848
"Gc in main: swap gen %d; suffix start %a; chunk start idx %d; chunk num \
@@ -845,7 +853,8 @@ struct
845853
let pl = Control.payload t.control in
846854

847855
(* Step 1. Reopen files *)
848-
let* () = reopen_prefix t ~generation in
856+
let mapping_size = Some mapping_size in
857+
let* () = reopen_prefix t ~generation ~mapping_size in
849858
let* () =
850859
reopen_suffix t ~chunk_start_idx ~chunk_num
851860
~appendable_chunk_poff:pl.appendable_chunk_poff
@@ -870,6 +879,7 @@ struct
870879
generation;
871880
latest_gc_target_offset;
872881
suffix_dead_bytes;
882+
mapping_end_poff = mapping_size;
873883
}
874884
in
875885

@@ -975,8 +985,7 @@ struct
975985
let lower = t.lower in
976986
cleanup ~root ~generation ~chunk_start_idx ~chunk_num ~lower
977987

978-
let create_one_commit_store t config ~generation ~latest_gc_target_offset
979-
~suffix_start_offset commit_key =
988+
let create_one_commit_store t config gced commit_key =
980989
let open Result_syntax in
981990
let src_root = t.root in
982991
let dst_root = Irmin_pack.Conf.root config in
@@ -992,15 +1001,7 @@ struct
9921001
in
9931002
let* () = Suffix.close suffix in
9941003
(* Step 3. Create the control file and close it. *)
995-
let status =
996-
Payload.Gced
997-
{
998-
suffix_start_offset;
999-
generation;
1000-
latest_gc_target_offset;
1001-
suffix_dead_bytes = Int63.zero;
1002-
}
1003-
in
1004+
let status = Payload.Gced gced in
10041005
let dict_end_poff = Io.size_of_path dst_dict |> Errs.raise_if_error in
10051006
let pl =
10061007
{

src/irmin-pack/unix/file_manager_intf.ml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ module type S = sig
263263
val swap :
264264
t ->
265265
generation:int ->
266+
mapping_size:int63 ->
266267
suffix_start_offset:int63 ->
267268
chunk_start_idx:int ->
268269
chunk_num:int ->
@@ -292,9 +293,7 @@ module type S = sig
292293
val create_one_commit_store :
293294
t ->
294295
Irmin.Backend.Conf.t ->
295-
generation:int ->
296-
latest_gc_target_offset:int63 ->
297-
suffix_start_offset:int63 ->
296+
Control_file.Payload.Upper.Latest.gced ->
298297
Index.key Pack_key.t ->
299298
(unit, [> open_rw_error | close_error ]) result
300299
(** [create_one_commit_store t conf generation new_store_root key] is called

src/irmin-pack/unix/gc.ml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,14 @@ module Make (Args : Gc_args.S) = struct
282282
match (status, gc_output) with
283283
| `Success, Ok gc_results ->
284284
Lwt.return
285-
( t.latest_gc_target_offset,
286-
t.new_suffix_start_offset,
287-
gc_results.mapping_size )
285+
{
286+
Control_file_intf.Payload.Upper.Latest.generation =
287+
Fm.generation t.fm + 1;
288+
latest_gc_target_offset = t.latest_gc_target_offset;
289+
suffix_start_offset = t.new_suffix_start_offset;
290+
suffix_dead_bytes = Int63.zero;
291+
mapping_end_poff = Some gc_results.mapping_size;
292+
}
288293
| _ ->
289294
let r = gc_errors status gc_output |> Errs.raise_if_error in
290295
Lwt.return r

src/irmin-pack/unix/gc.mli

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,10 @@ module Make (Args : Gc_args.S) : sig
5454

5555
val cancel : t -> bool
5656

57-
val finalise_without_swap : t -> (int63 * int63) Lwt.t
57+
val finalise_without_swap :
58+
t -> Control_file_intf.Payload.Upper.Latest.gced Lwt.t
5859
(** Waits for the current gc to finish and returns immediately without
5960
swapping the files and doing the other finalisation steps from [finalise].
60-
61-
It returns the [latest_gc_target_offset] and the
62-
[new_suffix_start_offset]. *)
61+
Returns the [gced] status to create a fresh control file for the snapshot. *)
6362
end
6463
with module Args = Args

src/irmin-pack/unix/gc_worker.ml

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ module Make (Args : Gc_args.S) = struct
282282
Live.to_list live_entries
283283
in
284284

285-
let () =
285+
let mapping_size =
286286
(* Step 4. Create the new prefix. *)
287287
stats := Gc_stats.Worker.finish_current_step !stats "prefix: start";
288288
let mapping =
@@ -294,9 +294,9 @@ module Make (Args : Gc_args.S) = struct
294294
(* Step 5. Transfer to the new prefix, flush and close. *)
295295
[%log.debug "GC: transfering to the new prefix"];
296296
stats := Gc_stats.Worker.finish_current_step !stats "prefix: transfer";
297-
Errors.finalise_exn (fun outcome ->
297+
Errors.finalise_exn (fun _ ->
298298
Sparse.Ao.flush prefix
299-
>>= (fun _ -> Sparse.Ao.close prefix >>= fun _ -> Ok outcome)
299+
>>= (fun _ -> Sparse.Ao.close prefix)
300300
|> Errs.log_if_error "GC: Close prefix after data copy")
301301
@@ fun () ->
302302
(* Step 5.1. Transfer all. *)
@@ -308,24 +308,27 @@ module Make (Args : Gc_args.S) = struct
308308
live_entries;
309309
Int63.to_int (Sparse.Ao.mapping_size prefix)
310310
in
311-
(* Step 5.2. Update the parent commits to be dangling: reopen the new
312-
prefix, this time in write-only as we have to
313-
modify data inside the file. *)
314-
stats :=
315-
Gc_stats.Worker.finish_current_step !stats
316-
"prefix: rewrite commit parents";
317-
let prefix =
318-
Sparse.Wo.open_wo ~mapping_size ~mapping ~data |> Errs.raise_if_error
311+
let () =
312+
(* Step 5.2. Update the parent commits to be dangling: reopen the new
313+
prefix, this time in write-only as we have to
314+
modify data inside the file. *)
315+
stats :=
316+
Gc_stats.Worker.finish_current_step !stats
317+
"prefix: rewrite commit parents";
318+
let prefix =
319+
Sparse.Wo.open_wo ~mapping_size ~mapping ~data |> Errs.raise_if_error
320+
in
321+
Errors.finalise_exn (fun _outcome ->
322+
Sparse.Wo.fsync prefix
323+
>>= (fun _ -> Sparse.Wo.close prefix)
324+
|> Errs.log_if_error "GC: Close prefix after parent rewrite")
325+
@@ fun () ->
326+
let write_exn = Sparse.Wo.write_exn prefix in
327+
List.iter
328+
(fun key -> transfer_parent_commit_exn ~write_exn key)
329+
(Commit_value.parents commit)
319330
in
320-
Errors.finalise_exn (fun _outcome ->
321-
Sparse.Wo.fsync prefix
322-
>>= (fun _ -> Sparse.Wo.close prefix)
323-
|> Errs.log_if_error "GC: Close prefix after parent rewrite")
324-
@@ fun () ->
325-
let write_exn = Sparse.Wo.write_exn prefix in
326-
List.iter
327-
(fun key -> transfer_parent_commit_exn ~write_exn key)
328-
(Commit_value.parents commit)
331+
Int63.of_int mapping_size
329332
in
330333
let () = report_new_file_sizes ~root ~generation stats |> ignore in
331334

src/irmin-pack/unix/store.ml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,16 +357,14 @@ module Maker (Config : Conf.S) = struct
357357
let () =
358358
if not launched then Errs.raise_error `Forbidden_during_gc
359359
in
360-
let* latest_gc_target_offset, suffix_start_offset =
360+
let* gced =
361361
match t.running_gc with
362362
| None -> assert false
363363
| Some { gc; _ } -> Gc.finalise_without_swap gc
364364
in
365-
let generation = File_manager.generation t.fm + 1 in
366365
let config = Irmin.Backend.Conf.add t.config Conf.Key.root path in
367366
let () =
368-
File_manager.create_one_commit_store t.fm config ~generation
369-
~latest_gc_target_offset ~suffix_start_offset commit_key
367+
File_manager.create_one_commit_store t.fm config gced commit_key
370368
|> Errs.raise_if_error
371369
in
372370
let branch_path = Irmin_pack.Layout.V4.branch ~root:path in

0 commit comments

Comments
 (0)