Skip to content

Commit 17c0197

Browse files
committed
Implement storage with custom filename key
Allows to serialize tags without overhead and fragility of storing filenames, using semantically-meaningful representation instead.
1 parent 3796cf3 commit 17c0197

File tree

6 files changed

+259
-190
lines changed

6 files changed

+259
-190
lines changed

src/lib/multi-key-file-storage/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
(libraries core_kernel bin_prot mina_stdlib)
55
(preprocess
66
(pps ppx_jane ppx_version))
7+
(modules_without_implementation intf)
78
(instrumentation
89
(backend bisect_ppx)))
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
module type S = sig
2+
(** Tag representing the location and metadata of a stored value *)
3+
type 'a tag
4+
5+
(** Writer object used to write values to the single-file database *)
6+
type writer_t
7+
8+
(** Type that represents the key used to identify the file *)
9+
type filename_key
10+
11+
(** Write a value to the database.
12+
13+
[write_value writer bin_prot_module value] serializes [value] using the
14+
provided bin_prot serializer and returns a [tag] that can be used to read the value later.
15+
16+
Example (assuming the default implementation with [type filename_key = string]):
17+
{[
18+
write_values_exn "my.db" ~f:(fun writer ->
19+
let tag1 = write_value writer (module Int) 42 in
20+
let tag2 = write_value writer (module String) "hello" in
21+
(* ... store tags for later use ... *)
22+
)
23+
]}
24+
*)
25+
val write_value :
26+
writer_t -> (module Bin_prot.Binable.S with type t = 'a) -> 'a -> 'a tag
27+
28+
(** Write multiple keys to a database file.
29+
30+
The [filename] parameter specifies the target file.
31+
The file will be overwritten if exists (note, it is not appending).
32+
33+
The [f] parameter is a callback that receives a [write_value] function which can be
34+
called multiple times to write different key-value pairs to the database.
35+
36+
Each call to [write_value bin_prot_module value] serializes [value] using the
37+
provided bin_prot serializer and returns a [tag] that can be used to read the value later.
38+
39+
Example (assuming the default implementation with [type filename_key = string]):
40+
{[
41+
write_values_exn "my.db" ~f:(fun writer ->
42+
let tag1 = write_value writer (module Int) 42 in
43+
let tag2 = write_value writer (module String) "hello" in
44+
(* ... store tags for later use ... *)
45+
)
46+
]}
47+
*)
48+
val write_values_exn : f:(writer_t -> 'a) -> filename_key -> 'a
49+
50+
(** Read a value from the database using a tag.
51+
52+
[read m tag] takes a [tag] (obtained from a previous [write] operation)
53+
and a bin_prot module [m] to deserialize the stored bytes back into a typed value.
54+
55+
Returns [Ok value] on success, or [Error msg] if reading or deserialization fails.
56+
57+
Example:
58+
{[
59+
match read (module Int) tag1 with
60+
| Ok value -> Printf.printf "Read value: %d\n" value
61+
| Error msg -> Printf.eprintf "Error: %s\n" msg
62+
]}
63+
*)
64+
val read :
65+
(module Bin_prot.Binable.S with type t = 'a)
66+
-> 'a tag
67+
-> 'a Core_kernel.Or_error.t
68+
end
Lines changed: 116 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,128 @@
11
open Core_kernel
2-
module Filename = Mina_stdlib.Bounded_types.String
2+
3+
(** Buffer size for writing: 128 KB *)
4+
let buffer_size = 131072
5+
6+
module type S = Intf.S
37

48
module Tag = struct
59
[%%versioned
610
module Stable = struct
711
module V1 = struct
8-
type 'a t =
9-
{ filename : Filename.Stable.V1.t; offset : int64; size : int }
12+
type ('filename_key, 'a) t =
13+
{ filename_key : 'filename_key; offset : int64; size : int }
1014
end
1115
end]
1216
end
1317

14-
type 'a tag = 'a Tag.t
18+
module Make_custom (Inputs : sig
19+
type filename_key
20+
21+
val filename : filename_key -> string
22+
end) :
23+
S
24+
with type 'a tag = (Inputs.filename_key, 'a) Tag.t
25+
and type filename_key = Inputs.filename_key = struct
26+
type 'a tag = (Inputs.filename_key, 'a) Tag.t
27+
28+
type filename_key = Inputs.filename_key
29+
30+
type writer_t =
31+
{ f : 'a. (module Bin_prot.Binable.S with type t = 'a) -> 'a -> 'a tag }
32+
33+
let write_value { f } = f
34+
35+
(* Flush buffer to file when it exceeds threshold *)
36+
let flush_buffer oc buffer =
37+
Out_channel.output_string oc (Buffer.contents buffer)
38+
39+
(* Write key function provided to the callback *)
40+
let make_writer ~oc ~filename_key ~buffer : writer_t =
41+
{ f =
42+
(fun (type a) (module B : Bin_prot.Binable.S with type t = a)
43+
(value : a) ->
44+
(* Serialize the value to a bigstring *)
45+
let serialized_size = B.bin_size_t value in
46+
let buf = Bigstring.create serialized_size in
47+
let written = B.bin_write_t buf ~pos:0 value in
48+
assert (written = serialized_size) ;
49+
50+
(* Convert bigstring to string for writing *)
51+
let data = Bigstring.to_string buf in
52+
53+
(* Create tag before writing *)
54+
let tag =
55+
{ Tag.filename_key
56+
; offset = Int64.of_int @@ Buffer.length buffer
57+
; size = serialized_size
58+
}
59+
in
60+
61+
(* Add to buffer *)
62+
Buffer.add_string buffer data ;
63+
64+
(* Flush if buffer is large enough *)
65+
if Buffer.length buffer >= buffer_size then (
66+
flush_buffer oc buffer ; Buffer.clear buffer ) ;
67+
68+
tag )
69+
}
70+
71+
(** Write multiple keys to a database file with buffered I/O *)
72+
let write_values_exn ~f filename_key =
73+
let do_writing oc =
74+
(* Buffer for accumulating writes *)
75+
let buffer = Buffer.create buffer_size in
76+
let writer = make_writer ~oc ~filename_key ~buffer in
77+
78+
(* Call user function with write_value *)
79+
let result = f writer in
80+
81+
(* Flush any remaining data *)
82+
if Buffer.length buffer > 0 then flush_buffer oc buffer ;
83+
84+
result
85+
in
86+
Out_channel.with_file
87+
(Inputs.filename filename_key)
88+
~binary:true ~f:do_writing
89+
90+
(** Read a value from the database using a tag *)
91+
let read :
92+
type a.
93+
(module Bin_prot.Binable.S with type t = a) -> a tag -> a Or_error.t =
94+
fun (module B : Bin_prot.Binable.S with type t = a) tag ->
95+
let do_reading ic =
96+
(* Seek to the specified offset *)
97+
In_channel.seek ic tag.offset ;
98+
99+
(* Read the exact number of bytes *)
100+
let buffer = Bytes.create tag.size in
101+
In_channel.really_input_exn ic ~buf:buffer ~pos:0 ~len:tag.size ;
102+
103+
(* Deserialize using bin_prot *)
104+
let bigstring = Bigstring.of_bytes buffer in
105+
let pos_ref = ref 0 in
106+
let%bind.Or_error value =
107+
Or_error.try_with ~backtrace:true
108+
@@ fun () -> B.bin_read_t bigstring ~pos_ref
109+
in
110+
if !pos_ref <> tag.size then
111+
Or_error.error_string
112+
(sprintf "Size mismatch: expected %d bytes, read %d bytes" tag.size
113+
!pos_ref )
114+
else Ok value
115+
in
116+
Or_error.tag ~tag:(Inputs.filename tag.filename_key)
117+
@@ Or_error.try_with_join ~backtrace:true
118+
@@ fun () ->
119+
In_channel.with_file
120+
(Inputs.filename tag.filename_key)
121+
~binary:true ~f:do_reading
122+
end
15123

16-
(** Buffer size for writing: 128 KB *)
17-
let buffer_size = 131072
124+
include Make_custom (struct
125+
type filename_key = string
18126

19-
(* Flush buffer to file when it exceeds threshold *)
20-
let flush_buffer oc buffer =
21-
Out_channel.output_string oc (Buffer.contents buffer)
22-
23-
type writer_t =
24-
{ f : 'a. (module Bin_prot.Binable.S with type t = 'a) -> 'a -> 'a tag }
25-
26-
let write_value :
27-
writer_t -> (module Bin_prot.Binable.S with type t = 'a) -> 'a -> 'a tag =
28-
fun { f } -> f
29-
30-
(* Write key function provided to the callback *)
31-
let make_writer ~oc ~filename ~buffer : writer_t =
32-
{ f =
33-
(fun (type a) (module B : Bin_prot.Binable.S with type t = a) (value : a) ->
34-
(* Serialize the value to a bigstring *)
35-
let serialized_size = B.bin_size_t value in
36-
let buf = Bigstring.create serialized_size in
37-
let written = B.bin_write_t buf ~pos:0 value in
38-
assert (written = serialized_size) ;
39-
40-
(* Convert bigstring to string for writing *)
41-
let data = Bigstring.to_string buf in
42-
43-
(* Create tag before writing *)
44-
let tag =
45-
{ Tag.filename
46-
; offset = Int64.of_int @@ Buffer.length buffer
47-
; size = serialized_size
48-
}
49-
in
50-
51-
(* Add to buffer *)
52-
Buffer.add_string buffer data ;
53-
54-
(* Flush if buffer is large enough *)
55-
if Buffer.length buffer >= buffer_size then (
56-
flush_buffer oc buffer ; Buffer.clear buffer ) ;
57-
58-
tag )
59-
}
60-
61-
(** Write multiple keys to a database file with buffered I/O *)
62-
let write_values_exn ~f filename =
63-
let do_writing oc =
64-
(* Buffer for accumulating writes *)
65-
let buffer = Buffer.create buffer_size in
66-
let writer = make_writer ~oc ~filename ~buffer in
67-
68-
(* Call user function with write_value *)
69-
let result = f writer in
70-
71-
(* Flush any remaining data *)
72-
if Buffer.length buffer > 0 then flush_buffer oc buffer ;
73-
74-
result
75-
in
76-
Out_channel.with_file filename ~binary:true ~f:do_writing
77-
78-
(** Read a value from the database using a tag *)
79-
let read :
80-
type a. (module Bin_prot.Binable.S with type t = a) -> a tag -> a Or_error.t
81-
=
82-
fun (module B : Bin_prot.Binable.S with type t = a) tag ->
83-
let do_reading ic =
84-
(* Seek to the specified offset *)
85-
In_channel.seek ic tag.offset ;
86-
87-
(* Read the exact number of bytes *)
88-
let buffer = Bytes.create tag.size in
89-
In_channel.really_input_exn ic ~buf:buffer ~pos:0 ~len:tag.size ;
90-
91-
(* Deserialize using bin_prot *)
92-
let bigstring = Bigstring.of_bytes buffer in
93-
let pos_ref = ref 0 in
94-
let%bind.Or_error value =
95-
Or_error.try_with ~backtrace:true
96-
@@ fun () -> B.bin_read_t bigstring ~pos_ref
97-
in
98-
if !pos_ref <> tag.size then
99-
Or_error.error_string
100-
(sprintf "Size mismatch: expected %d bytes, read %d bytes" tag.size
101-
!pos_ref )
102-
else Ok value
103-
in
104-
Or_error.tag ~tag:tag.filename
105-
@@ Or_error.try_with_join ~backtrace:true
106-
@@ fun () -> In_channel.with_file tag.filename ~binary:true ~f:do_reading
127+
let filename = ident
128+
end)

src/lib/multi-key-file-storage/multi_key_file_storage.mli

Lines changed: 7 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,71 +4,17 @@ module Tag : sig
44
[%%versioned:
55
module Stable : sig
66
module V1 : sig
7-
type 'a t
7+
type ('filename_key, 'a) t
88
end
99
end]
1010
end
1111

12-
(** Tag representing the location and metadata of a stored value *)
13-
type 'a tag = 'a Tag.t
12+
module type S = Intf.S
1413

15-
(** Writer object used to write values to the single-file database *)
16-
type writer_t
14+
include S with type 'a tag = (string, 'a) Tag.t and type filename_key = string
1715

18-
(** Write a value to the database.
19-
20-
[write_value writer bin_prot_module value] serializes [value] using the
21-
provided bin_prot serializer and returns a [tag] that can be used to read the value later.
22-
23-
Example:
24-
{[
25-
write_values_exn "my.db" ~f:(fun writer ->
26-
let tag1 = write_value writer (module Int) 42 in
27-
let tag2 = write_value writer (module String) "hello" in
28-
(* ... store tags for later use ... *)
29-
)
30-
]}
31-
*)
32-
val write_value :
33-
writer_t -> (module Bin_prot.Binable.S with type t = 'a) -> 'a -> 'a tag
16+
module Make_custom (Inputs : sig
17+
type filename_key
3418

35-
(** Write multiple keys to a database file.
36-
37-
The [filename] parameter specifies the target file.
38-
The file will be overwritten if exists (note, it is not appending).
39-
40-
The [f] parameter is a callback that receives a [write_value] function which can be
41-
called multiple times to write different key-value pairs to the database.
42-
43-
Each call to [write_value bin_prot_module value] serializes [value] using the
44-
provided bin_prot serializer and returns a [tag] that can be used to read the value later.
45-
46-
Example:
47-
{[
48-
write_values_exn "my.db" ~f:(fun writer ->
49-
let tag1 = write_value writer (module Int) 42 in
50-
let tag2 = write_value writer (module String) "hello" in
51-
(* ... store tags for later use ... *)
52-
)
53-
]}
54-
*)
55-
val write_values_exn : f:(writer_t -> 'a) -> string -> 'a
56-
57-
(** Read a value from the database using a tag.
58-
59-
[read m tag] takes a [tag] (obtained from a previous [write] operation)
60-
and a bin_prot module [m] to deserialize the stored bytes back into a typed value.
61-
62-
Returns [Ok value] on success, or [Error msg] if reading or deserialization fails.
63-
64-
Example:
65-
{[
66-
match read (module Int) tag1 with
67-
| Ok value -> Printf.printf "Read value: %d\n" value
68-
| Error msg -> Printf.eprintf "Error: %s\n" msg
69-
]}
70-
*)
71-
val read :
72-
(module Bin_prot.Binable.S with type t = 'a)
73-
-> 'a tag
74-
-> 'a Core_kernel.Or_error.t
19+
val filename : filename_key -> string
20+
end) : S with type filename_key = Inputs.filename_key

src/lib/multi-key-file-storage/tests/dune

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
core
77
core_kernel
88
bin_prot
9-
qcheck-alcotest
109
;; local libraries
11-
multi_key_file_storage)
10+
multi_key_file_storage
11+
data_hash_lib)
1212
(instrumentation
1313
(backend bisect_ppx))
1414
(preprocess

0 commit comments

Comments
 (0)