Skip to content

Commit cbfcf72

Browse files
committed
Improved README.md for a 0.1.0
1 parent 9e3b542 commit cbfcf72

File tree

4 files changed

+120
-6
lines changed

4 files changed

+120
-6
lines changed

README.md

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,99 @@
11
# PersistentCollections.jl
22

3-
Julia AbstractDict and AbstractSet data structures persisted (ACID) to disk.
3+
Julia `Dict` and `Set` data structures safely persisted to disk.
4+
5+
All collections are backed by [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) - a super fast B-Tree based embedded KV database with ACID guaranties.
6+
As with other B-Tree based databases reads are generally faster than writes. LMDB is not an exception, although write performance is relatively good to (expect 1k-10k TPS).
7+
8+
Care was taken to make the datastructures thread-safe. LMDB handles most of the locking well, we just have to serialise the writes to an LMDB Environment in julia so that
9+
multiple threads do not attempt to write at once (deadlock will occur).
410

511
## Quick Start
612

7-
TODO
13+
1. Install this package:
14+
```julia
15+
import Pkg
16+
Pkg.add("https://github.com/blenessy/PersistentCollections.jl.git")
17+
```
18+
1. Create an `LMDB.Environment` in a directory called `data` (in your current working directory):
19+
```julia
20+
using PersistentCollections
21+
env = LMDB.Environment("data")
22+
```
23+
1. Create an `AbstractDict` in your LMDB environment:
24+
```julia
25+
dict = PersistentDict{String,String}(env)
26+
```
27+
1. Use it as any other dict:
28+
```julia
29+
dict["foo"] = "bar"
30+
@assert dict["foo"] == "bar"
31+
@assert collect(keys(dict)) == ["foo"]
32+
@assert collect(values(dict)) == ["bar"]
33+
```
34+
1. (Optional) note the asymetric performance characteristic of LMDB (B-Tree) based database:
35+
```julia
36+
@time dict["bar"] = "baz"; # Writes to LMDB (B-Tree) are relatively slow
37+
@time dict["bar"]; # Reads are very fast though :)
38+
```
39+
40+
## User Guide
41+
42+
### Dynamic types
43+
44+
It is possible to create persistent collection of `Any` type although some methods will not be able to convert the value to the correct type because no metadata is stored for this in DB.
45+
Most notably the `getindex` method (e.g. `dict["foo"]`) will not return a converted value. To mitigate this limitation, use the `get` method, which includes a default value.
46+
The type of the default value (if other than `nothing`) will be used to convert the value to the desired type.
47+
48+
```julia
49+
env = LMDB.Environment("data")
50+
dict = PersistentDict{Any,Any}(env)
51+
dict["foo"] == "bar"
52+
dict["foo"] # PersistentCollections.LMDB.MDBValue{Nothing}(0x0000000000000003, Ptr{Nothing} @0x000000012c806ffd, nothing)
53+
get(dict, "foo", "") # "bar"
54+
convert(String, dict["foo"]) # "bar"
55+
```
56+
57+
### Multiple persistent collections in the same LMDB Environment
58+
59+
It is possible if you need transactional consistency between multiple persistent collections:
60+
61+
1. Create your `LMDB.Environment` with "named database" support by specifying the number of persistent collections yoy want with the `maxdbs` keyword argument:
62+
```julia
63+
env = LMDB.Environment("data", maxdbs=2)
64+
```
65+
2. Instantiate your persistent collections with a unique (within LMDB env.) id:
66+
```julia
67+
dict1 = PersistentDict{String,String}(env, id="mydict1")
68+
dict2 = PersistentDict{String,Int}(env, id="mydict2")
69+
```
70+
71+
### Danger Zone: Manual sync writes to disc
72+
73+
Yes, you can expect significant increase with write throughput if you are willing to risk loosing your last written transactions.
74+
Please note that database integrity (risk of curruption) is not in danger here.
75+
76+
```julia
77+
unsafe_env = LMDB.Environment("data", flags=LMDB.MDB_NOSYNC)
78+
unsafe_dict = PersistentDict{String,String}(unsafe_env)
79+
flush(unsafe_env) do
80+
unsafe_dict["foo"] = "bar"
81+
unsafe_dict["foo"] = "baz"
82+
end # <== data is flushed to disk here
83+
```
84+
85+
This is equvalent to:
86+
87+
```julia
88+
unsafe_env = LMDB.Environment("data", flags=LMDB.MDB_NOSYNC)
89+
unsafe_dict = PersistentDict{String,String}(unsafe_env)
90+
try
91+
unsafe_dict["foo"] = "bar"
92+
unsafe_dict["foo"] = "baz"
93+
finally
94+
flush(unsafe_env)
95+
end
96+
```
897

998
## Running Tests
1099

src/PersistentCollections.jl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
module PersistentCollections
2+
export LMDB, PersistentDict
3+
24
module LMDB
35
include(joinpath(@__DIR__, "lmdb.jl"))
46
end
57

6-
abstract type PersistentAbstractDict{K,V} <: AbstractDict{K,V} end
7-
struct PersistentDict{K,V} <: PersistentAbstractDict{K,V}
8+
struct PersistentDict{K,V} <: AbstractDict{K,V}
89
env::LMDB.Environment
910
id::String
1011
PersistentDict{K,V}(env; id="") where {K,V} = new{K,V}(env, id)
1112
end
1213

14+
Base.show(io::IO, d::PersistentDict) = print(io, typeof(d), "(", isempty(d.id) ? "" : repr(e.id), ")")
15+
1316
function Base.get(d::PersistentDict{K,V}, key::K, default::D) where {K,V,D}
1417
isopen(d.env) || error("Environment is closed")
1518
txn = d.env.rotxn[Threads.threadid()]
1619
LMDB.mdb_txn_renew(txn)
1720
try
1821
dbi = LMDB.mdb_dbi_open(txn, d.id, zero(Cuint))
19-
@assert txn != C_NULL && !iszero(dbi) "txn and/or dbi handles are not initialized"
2022
mdbkey, mdbval = convert(LMDB.MDBValue, key), LMDB.MDBValue()
2123
found = GC.@preserve mdbkey LMDB.mdb_get!(txn, dbi, pointer(mdbkey), pointer(mdbval))
2224
found || return default
@@ -154,4 +156,16 @@ module PersistentCollections
154156
Base.keys(d::PersistentDict{K,V}) where {K,V} = MDBKeyCursor{K,V}(create_atomic_cursor(d))
155157
Base.values(d::PersistentDict{K,V}) where {K,V} = MDBValCursor{K,V}(create_atomic_cursor(d))
156158

159+
function Base.length(d::PersistentDict)
160+
isopen(d.env) || error("Environment is closed")
161+
txn = d.env.rotxn[Threads.threadid()]
162+
LMDB.mdb_txn_renew(txn)
163+
try
164+
dbi = LMDB.mdb_dbi_open(txn, d.id, zero(Cuint))
165+
return convert(Int, LMDB.mdb_stat(txn, dbi).ms_entries)
166+
finally
167+
LMDB.mdb_txn_reset(txn)
168+
end
169+
end
170+
157171
end # module

src/lmdb.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@ function mdb_env_stat(env::Ptr{Cvoid})
225225
return statref[]
226226
end
227227

228+
function mdb_stat(txn::Ptr{Cvoid}, dbi::Cuint)
229+
statref = Ref(MDBStat())
230+
@chkres ccall((:mdb_stat, liblmdb), Cint, (Ptr{Cvoid}, Cuint, Ptr{MDBStat}), txn, dbi, statref)
231+
return statref[]
232+
end
233+
228234
mutable struct Environment
229235
handle::Ptr{Cvoid}
230236
path::String
@@ -237,6 +243,8 @@ const OPENED_ENVS = Dict{String,Environment}()
237243
function Environment(path::String; flags::Cuint=zero(Cuint), mode::Cmode_t=0o755, maxdbs=0, mapsize=10485760, maxreaders=126, rotxnflags::Cuint=DEFAULT_ROTXN_FLAGS)
238244
env = get(OPENED_ENVS, path, nothing)
239245
if isnothing(env)
246+
# create all directories needed to host the data
247+
mkpath(iszero(flags & MDB_NOSUBDIR) ? path : dirname(path), mode=mode)
240248
rotxn = [C_NULL for i in 1:Threads.nthreads()]
241249
env = Environment(mdb_env_create(), path, rotxn, ReentrantLock())
242250
mdb_env_set_maxdbs(env.handle, convert(Cuint, maxdbs))
@@ -260,6 +268,8 @@ function Environment(path::String; flags::Cuint=zero(Cuint), mode::Cmode_t=0o755
260268
return env
261269
end
262270

271+
Base.show(io::IO, e::Environment) = print(io, typeof(e), "(", repr(e.path), ")")
272+
263273
Base.isopen(env::Environment) = env.handle != C_NULL
264274

265275
function Base.close(env::Environment)
@@ -276,6 +286,7 @@ function Base.close(env::Environment)
276286
return false
277287
end
278288

289+
Base.flush(env::Environment) = mdb_env_sync(env.handle, one(Cint))
279290
function Base.flush(func::Function, env::Environment)
280291
try
281292
func()

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using Test
22
using BenchmarkTools
33

4-
using PersistentCollections: LMDB, PersistentDict
4+
using PersistentCollections
55

66
const ENV_DIR = "env.lmdb"
77
const UNSAFE_ENV_DIR = "unsafe_env.lmdb"

0 commit comments

Comments
 (0)