Skip to content

Commit 03ad27f

Browse files
authored
Merge pull request #2232 from art-w/no-mmap
2 parents 20048d1 + ccaa77e commit 03ad27f

File tree

14 files changed

+227
-174
lines changed

14 files changed

+227
-174
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
- Unhandled exceptions in GC worker process are now reported as a failure
4444
(#2163, @metanivek)
4545
- Fix the silent mode for the integrity checks. (#2179, @icristescu)
46+
- Fix file descriptor leak caused by `mmap`. (#2232, @art-w)
4647

4748
## 3.6.1 (2023-03-15)
4849

src/irmin-pack/unix/control_file.ml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ module Serde = struct
180180
generation = x.generation;
181181
latest_gc_target_offset = x.suffix_start_offset;
182182
suffix_dead_bytes = Int63.zero;
183+
mapping_end_poff = None;
183184
}
184185
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13
185186
| T14 | T15 ->
@@ -200,14 +201,33 @@ module Serde = struct
200201
volume_num = 0;
201202
}
202203

204+
let upgrade_status_from_v4 = function
205+
| Payload.Upper.V4.From_v1_v2_post_upgrade x ->
206+
Latest.From_v1_v2_post_upgrade x
207+
| No_gc_yet -> No_gc_yet
208+
| Used_non_minimal_indexing_strategy -> Used_non_minimal_indexing_strategy
209+
| Gced x ->
210+
Gced
211+
{
212+
suffix_start_offset = x.suffix_start_offset;
213+
generation = x.generation;
214+
latest_gc_target_offset = x.latest_gc_target_offset;
215+
suffix_dead_bytes = x.suffix_dead_bytes;
216+
mapping_end_poff = None;
217+
}
218+
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
219+
| T15 ->
220+
(* Unreachable *)
221+
assert false
222+
203223
let upgrade_from_v4 (pl : Payload.Upper.V4.t) : payload =
204224
{
205225
dict_end_poff = pl.dict_end_poff;
206226
appendable_chunk_poff = pl.appendable_chunk_poff;
207227
checksum = Int63.zero;
208228
chunk_start_idx = pl.chunk_start_idx;
209229
chunk_num = pl.chunk_num;
210-
status = pl.status;
230+
status = upgrade_status_from_v4 pl.status;
211231
upgraded_from = Some (Version.to_int `V4);
212232
volume_num = 0;
213233
}

src/irmin-pack/unix/control_file_intf.ml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,15 +193,16 @@ module Payload = struct
193193
end
194194

195195
module V5 = struct
196-
type gced = V4.gced = {
196+
type gced = {
197197
suffix_start_offset : int63;
198198
generation : int;
199199
latest_gc_target_offset : int63;
200200
suffix_dead_bytes : int63;
201+
mapping_end_poff : int63 option;
201202
}
202203
[@@deriving irmin]
203204

204-
type status = V4.status =
205+
type status =
205206
| From_v1_v2_post_upgrade of V3.from_v1_v2_post_upgrade
206207
| No_gc_yet
207208
| Used_non_minimal_indexing_strategy
@@ -239,6 +240,8 @@ module Payload = struct
239240
New fields
240241
241242
- [volume_num] stores the number of volumes in the lower layer.
243+
- [mapping_end_poff] stores the mapping file size (optional if missing
244+
or unknown after a migration from V4).
242245
243246
Changed fields
244247

src/irmin-pack/unix/file_manager.ml

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,13 @@ struct
7777
let register_suffix_consumer t ~after_flush =
7878
t.suffix_consumers <- { after_flush } :: t.suffix_consumers
7979

80-
let generation = function
81-
| Payload.From_v1_v2_post_upgrade _ | Used_non_minimal_indexing_strategy
82-
| No_gc_yet ->
83-
0
84-
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
85-
| T15 ->
86-
(* Unreachable *)
87-
assert false
88-
| Gced x -> x.generation
80+
let get_gced = function Payload.Gced x -> Some x | _ -> None
81+
82+
let generation payload =
83+
match get_gced payload with Some x -> x.generation | None -> 0
84+
85+
let mapping_size payload =
86+
match get_gced payload with Some x -> x.mapping_end_poff | None -> None
8987

9088
let notify_reload_consumers consumers =
9189
List.fold_left
@@ -215,18 +213,24 @@ struct
215213

216214
module Layout = Irmin_pack.Layout.V5
217215

218-
let open_prefix ~root ~generation =
216+
let open_prefix ~root ~generation ~mapping_size =
219217
let open Result_syntax in
220218
if generation = 0 then Ok None
221219
else
222220
let mapping = Layout.mapping ~generation ~root in
223221
let data = Layout.prefix ~root ~generation in
224-
let+ prefix = Sparse.open_ro ~mapping ~data in
222+
let* mapping_size =
223+
match mapping_size with
224+
| Some size -> Ok size
225+
| None -> Io.size_of_path mapping
226+
in
227+
let mapping_size = Int63.to_int mapping_size in
228+
let+ prefix = Sparse.open_ro ~mapping_size ~mapping ~data in
225229
Some prefix
226230

227-
let reopen_prefix t ~generation =
231+
let reopen_prefix t ~generation ~mapping_size =
228232
let open Result_syntax in
229-
let* some_prefix = open_prefix ~root:t.root ~generation in
233+
let* some_prefix = open_prefix ~root:t.root ~generation ~mapping_size in
230234
match some_prefix with
231235
| None -> Ok ()
232236
| Some _ ->
@@ -310,12 +314,12 @@ struct
310314
let use_fsync = Irmin_pack.Conf.use_fsync config in
311315
let indexing_strategy = Conf.indexing_strategy config in
312316
let pl : Payload.t = Control.payload control in
313-
let generation =
317+
let generation, mapping_size =
314318
match pl.status with
315319
| From_v1_v2_post_upgrade _ | No_gc_yet
316320
| Used_non_minimal_indexing_strategy ->
317-
0
318-
| Gced x -> x.generation
321+
(0, None)
322+
| Gced x -> (x.generation, x.mapping_end_poff)
319323
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
320324
| T15 ->
321325
assert false
@@ -348,7 +352,7 @@ struct
348352
let cb _ = suffix_requires_a_flush_exn (get_instance ()) in
349353
make_suffix ~auto_flush_threshold ~auto_flush_procedure:(`External cb)
350354
in
351-
let* prefix = open_prefix ~root ~generation in
355+
let* prefix = open_prefix ~root ~generation ~mapping_size in
352356
let* dict =
353357
let path = Layout.dict ~root in
354358
let auto_flush_threshold =
@@ -435,7 +439,10 @@ struct
435439
in
436440
(* Step 3.2. Potentially reload prefix *)
437441
let* () =
438-
if gen0 = gen1 then Ok () else reopen_prefix t ~generation:gen1
442+
if gen0 = gen1 then Ok ()
443+
else
444+
reopen_prefix t ~generation:gen1
445+
~mapping_size:(mapping_size pl1.status)
439446
in
440447
(* Step 3.3. Potentially reload lower *)
441448
if gen0 = gen1 && pl0.volume_num = pl1.volume_num then Ok ()
@@ -575,6 +582,7 @@ struct
575582
generation;
576583
latest_gc_target_offset = Int63.zero;
577584
suffix_dead_bytes = Int63.zero;
585+
mapping_end_poff = Some Int63.zero;
578586
};
579587
}
580588
in
@@ -766,7 +774,9 @@ struct
766774
Suffix.open_ro ~root ~appendable_chunk_poff ~start_idx ~chunk_num
767775
~dead_header_size
768776
in
769-
let* prefix = open_prefix ~root ~generation in
777+
let* prefix =
778+
open_prefix ~root ~generation ~mapping_size:(mapping_size status)
779+
in
770780
let* dict =
771781
let path = Layout.dict ~root in
772782
Dict.open_ro ~path ~end_poff:dict_end_poff ~dead_header_size
@@ -831,8 +841,8 @@ struct
831841
| `Unknown_major_pack_version _ ) as e ->
832842
e)
833843

834-
let swap t ~generation ~suffix_start_offset ~chunk_start_idx ~chunk_num
835-
~suffix_dead_bytes ~latest_gc_target_offset ~volume =
844+
let swap t ~generation ~mapping_size ~suffix_start_offset ~chunk_start_idx
845+
~chunk_num ~suffix_dead_bytes ~latest_gc_target_offset ~volume =
836846
let open Result_syntax in
837847
[%log.debug
838848
"Gc in main: swap gen %d; suffix start %a; chunk start idx %d; chunk num \
@@ -843,7 +853,8 @@ struct
843853
let pl = Control.payload t.control in
844854

845855
(* Step 1. Reopen files *)
846-
let* () = reopen_prefix t ~generation in
856+
let mapping_size = Some mapping_size in
857+
let* () = reopen_prefix t ~generation ~mapping_size in
847858
let* () =
848859
reopen_suffix t ~chunk_start_idx ~chunk_num
849860
~appendable_chunk_poff:pl.appendable_chunk_poff
@@ -868,6 +879,7 @@ struct
868879
generation;
869880
latest_gc_target_offset;
870881
suffix_dead_bytes;
882+
mapping_end_poff = mapping_size;
871883
}
872884
in
873885

@@ -973,8 +985,7 @@ struct
973985
let lower = t.lower in
974986
cleanup ~root ~generation ~chunk_start_idx ~chunk_num ~lower
975987

976-
let create_one_commit_store t config ~generation ~latest_gc_target_offset
977-
~suffix_start_offset commit_key =
988+
let create_one_commit_store t config gced commit_key =
978989
let open Result_syntax in
979990
let src_root = t.root in
980991
let dst_root = Irmin_pack.Conf.root config in
@@ -990,15 +1001,7 @@ struct
9901001
in
9911002
let* () = Suffix.close suffix in
9921003
(* Step 3. Create the control file and close it. *)
993-
let status =
994-
Payload.Gced
995-
{
996-
suffix_start_offset;
997-
generation;
998-
latest_gc_target_offset;
999-
suffix_dead_bytes = Int63.zero;
1000-
}
1001-
in
1004+
let status = Payload.Gced gced in
10021005
let dict_end_poff = Io.size_of_path dst_dict |> Errs.raise_if_error in
10031006
let pl =
10041007
{

src/irmin-pack/unix/file_manager_intf.ml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ module type S = sig
263263
val swap :
264264
t ->
265265
generation:int ->
266+
mapping_size:int63 ->
266267
suffix_start_offset:int63 ->
267268
chunk_start_idx:int ->
268269
chunk_num:int ->
@@ -292,9 +293,7 @@ module type S = sig
292293
val create_one_commit_store :
293294
t ->
294295
Irmin.Backend.Conf.t ->
295-
generation:int ->
296-
latest_gc_target_offset:int63 ->
297-
suffix_start_offset:int63 ->
296+
Control_file.Payload.Upper.Latest.gced ->
298297
Index.key Pack_key.t ->
299298
(unit, [> open_rw_error | close_error ]) result
300299
(** [create_one_commit_store t conf generation new_store_root key] is called

src/irmin-pack/unix/gc.ml

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,16 @@ module Make (Args : Gc_args.S) = struct
107107
latest_gc_target_offset;
108108
}
109109

110-
let swap_and_purge t removable_chunk_num modified_volume suffix_params =
110+
let swap_and_purge t (gc_results : Worker.gc_results) =
111+
let removable_chunk_num = List.length gc_results.removable_chunk_idxs in
111112
let { generation; latest_gc_target_offset; _ } = t in
112113
let Worker.
113114
{
114115
start_offset = suffix_start_offset;
115116
chunk_start_idx;
116117
dead_bytes = suffix_dead_bytes;
117118
} =
118-
suffix_params
119+
gc_results.suffix_params
119120
in
120121
(* Calculate chunk num in main process since more chunks could have been
121122
added while GC was running. GC process only tells us how many chunks are
@@ -126,8 +127,9 @@ module Make (Args : Gc_args.S) = struct
126127
is guaranteed by the GC process. *)
127128
assert (chunk_num >= 1);
128129

129-
Fm.swap t.fm ~generation ~suffix_start_offset ~chunk_start_idx ~chunk_num
130-
~suffix_dead_bytes ~latest_gc_target_offset ~volume:modified_volume
130+
Fm.swap t.fm ~generation ~mapping_size:gc_results.mapping_size
131+
~suffix_start_offset ~chunk_start_idx ~chunk_num ~suffix_dead_bytes
132+
~latest_gc_target_offset ~volume:gc_results.modified_volume
131133

132134
let unlink_all { root; generation; _ } removable_chunk_idxs =
133135
(* Unlink suffix chunks *)
@@ -231,33 +233,22 @@ module Make (Args : Gc_args.S) = struct
231233
let result =
232234
let open Result_syntax in
233235
match (status, gc_output) with
234-
| ( `Success,
235-
Ok
236-
{
237-
suffix_params;
238-
removable_chunk_idxs;
239-
stats = worker_stats;
240-
modified_volume;
241-
} ) ->
236+
| `Success, Ok gc_results ->
242237
let partial_stats =
243238
Gc_stats.Main.finish_current_step partial_stats
244239
"swap and purge"
245240
in
246-
let* () =
247-
swap_and_purge t
248-
(List.length removable_chunk_idxs)
249-
modified_volume suffix_params
250-
in
241+
let* () = swap_and_purge t gc_results in
251242
let partial_stats =
252243
Gc_stats.Main.finish_current_step partial_stats "unlink"
253244
in
254-
if t.unlink then unlink_all t removable_chunk_idxs;
245+
if t.unlink then unlink_all t gc_results.removable_chunk_idxs;
255246

256247
let stats =
257248
let after_suffix_end_offset =
258249
Dispatcher.end_offset t.dispatcher
259250
in
260-
Gc_stats.Main.finalise partial_stats worker_stats
251+
Gc_stats.Main.finalise partial_stats gc_results.stats
261252
~after_suffix_end_offset
262253
in
263254
Stats.report_latest_gc stats;
@@ -289,8 +280,16 @@ module Make (Args : Gc_args.S) = struct
289280
let* status = Async.await t.task in
290281
let gc_output = read_gc_output ~root:t.root ~generation:t.generation in
291282
match (status, gc_output) with
292-
| `Success, Ok _ ->
293-
Lwt.return (t.latest_gc_target_offset, t.new_suffix_start_offset)
283+
| `Success, Ok gc_results ->
284+
Lwt.return
285+
{
286+
Control_file_intf.Payload.Upper.Latest.generation =
287+
Fm.generation t.fm + 1;
288+
latest_gc_target_offset = t.latest_gc_target_offset;
289+
suffix_start_offset = t.new_suffix_start_offset;
290+
suffix_dead_bytes = Int63.zero;
291+
mapping_end_poff = Some gc_results.mapping_size;
292+
}
294293
| _ ->
295294
let r = gc_errors status gc_output |> Errs.raise_if_error in
296295
Lwt.return r

src/irmin-pack/unix/gc.mli

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,10 @@ module Make (Args : Gc_args.S) : sig
5454

5555
val cancel : t -> bool
5656

57-
val finalise_without_swap : t -> (int63 * int63) Lwt.t
57+
val finalise_without_swap :
58+
t -> Control_file_intf.Payload.Upper.Latest.gced Lwt.t
5859
(** Waits for the current gc to finish and returns immediately without
5960
swapping the files and doing the other finalisation steps from [finalise].
60-
61-
It returns the [latest_gc_target_offset] and the
62-
[new_suffix_start_offset]. *)
61+
Returns the [gced] status to create a fresh control file for the snapshot. *)
6362
end
6463
with module Args = Args

0 commit comments

Comments
 (0)