Skip to content

Commit 2bc13c1

Browse files
authored
Merge pull request #39 from rmalmain/fast_block_snapshot
Block Device fast snapshot implementation.
2 parents c92d7c2 + 587303b commit 2bc13c1

File tree

11 files changed

+570
-210
lines changed

11 files changed

+570
-210
lines changed

block/block-backend.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "qemu/option.h"
2828
#include "trace.h"
2929
#include "migration/misc.h"
30+
#include "libafl_extras/syx-snapshot/syx-snapshot.h"
3031

3132
/* Number of coroutines to reserve per attached device model */
3233
#define COROUTINE_POOL_RESERVATION 64
@@ -42,6 +43,9 @@ typedef struct BlockBackendAioNotifier {
4243

4344
struct BlockBackend {
4445
char *name;
46+
//// --- Begin LibAFL code ---
47+
guint name_hash;
48+
//// --- End LibAFL code ---
4549
int refcnt;
4650
BdrvChild *root;
4751
AioContext *ctx;
@@ -714,6 +718,10 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
714718
error_setg(errp, "Device with id '%s' already exists", name);
715719
return false;
716720
}
721+
if (blk_by_name_hash(g_str_hash(name))) {
722+
error_setg(errp, "Device with name hash '%x' already exists", g_str_hash(name));
723+
return false;
724+
}
717725
if (bdrv_find_node(name)) {
718726
error_setg(errp,
719727
"Device name '%s' conflicts with an existing node name",
@@ -722,6 +730,11 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
722730
}
723731

724732
blk->name = g_strdup(name);
733+
//// --- Begin LibAFL code ---
734+
735+
blk->name_hash = g_str_hash(blk->name);
736+
737+
//// --- End LibAFL code ---
725738
QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
726739
return true;
727740
}
@@ -753,6 +766,12 @@ const char *blk_name(const BlockBackend *blk)
753766
return blk->name ?: "";
754767
}
755768

769+
guint blk_name_hash(const BlockBackend* blk)
770+
{
771+
IO_CODE();
772+
return blk->name_hash;
773+
}
774+
756775
/*
757776
* Return the BlockBackend with name @name if it exists, else null.
758777
* @name must not be null.
@@ -771,6 +790,22 @@ BlockBackend *blk_by_name(const char *name)
771790
return NULL;
772791
}
773792

793+
/*
794+
* Return the BlockBackend with name hash @name_hash if it exists, else null.
795+
*/
796+
BlockBackend *blk_by_name_hash(guint name_hash)
797+
{
798+
BlockBackend *blk = NULL;
799+
800+
GLOBAL_STATE_CODE();
801+
while ((blk = blk_next(blk)) != NULL) {
802+
if (name_hash == blk->name_hash) {
803+
return blk;
804+
}
805+
}
806+
return NULL;
807+
}
808+
774809
/*
775810
* Return the BlockDriverState attached to @blk if any, else null.
776811
*/
@@ -1624,8 +1659,14 @@ static void coroutine_fn blk_aio_read_entry(void *opaque)
16241659
QEMUIOVector *qiov = rwco->iobuf;
16251660

16261661
assert(qiov->size == acb->bytes);
1627-
rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
1662+
1663+
if (!syx_snapshot_cow_cache_read_entry(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags)) {
1664+
rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
16281665
0, rwco->flags);
1666+
} else {
1667+
rwco->ret = 0;
1668+
}
1669+
16291670
blk_aio_complete(acb);
16301671
}
16311672

@@ -1636,8 +1677,14 @@ static void coroutine_fn blk_aio_write_entry(void *opaque)
16361677
QEMUIOVector *qiov = rwco->iobuf;
16371678

16381679
assert(!qiov || qiov->size == acb->bytes);
1639-
rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
1680+
1681+
if (!syx_snapshot_cow_cache_write_entry(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags)) {
1682+
rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
16401683
qiov, 0, rwco->flags);
1684+
} else {
1685+
rwco->ret = 0;
1686+
}
1687+
16411688
blk_aio_complete(acb);
16421689
}
16431690

include/qemu/osdep.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ void QEMU_ERROR("code path is reachable")
470470

471471
/*
472472
* Round number down to multiple. Requires that d be a power of 2 (see
473-
* QEMU_ALIGN_UP for a safer but slower version on arbitrary
473+
* QEMU_ALIGN_DOWN for a safer but slower version on arbitrary
474474
* numbers); works even if d is a smaller type than n.
475475
*/
476476
#ifndef ROUND_DOWN

include/sysemu/block-backend-global-state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
4848

4949
void blk_remove_all_bs(void);
5050
BlockBackend *blk_by_name(const char *name);
51+
BlockBackend *blk_by_name_hash(guint name_hash);
5152
BlockBackend *blk_next(BlockBackend *blk);
5253
BlockBackend *blk_all_next(BlockBackend *blk);
5354
bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);

include/sysemu/block-backend-io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
*/
2525

2626
const char *blk_name(const BlockBackend *blk);
27+
guint blk_name_hash(const BlockBackend* blk);
2728

2829
BlockDriverState *blk_bs(BlockBackend *blk);
2930

libafl_extras/exit.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ void libafl_sync_exit_cpu(void)
6666
last_exit_reason.next_pc = 0;
6767
}
6868

69-
bool libafl_exit_asap(void) {
69+
bool libafl_exit_asap(void)
70+
{
7071
return expected_exit;
7172
}
7273

libafl_extras/exit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ int libafl_qemu_remove_breakpoint(target_ulong pc);
2020

2121
enum libafl_exit_reason_kind {
2222
BREAKPOINT = 0,
23-
SYNC_BACKDOOR = 1
23+
SYNC_BACKDOOR = 1,
2424
};
2525

2626
struct libafl_exit_reason_breakpoint {

libafl_extras/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files(
22
'syx-snapshot/device-save.c',
33
'syx-snapshot/syx-snapshot.c',
4+
'syx-snapshot/syx-cow-cache.c',
45
'syx-snapshot/channel-buffer-writeback.c',
56
)])
67
specific_ss.add(files('exit.c', 'hook.c', 'jit.c'))
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#include "syx-cow-cache.h"
2+
3+
#include "sysemu/block-backend.h"
4+
5+
#define IS_POWER_OF_TWO(x) ((x != 0) && ((x & (x - 1)) == 0))
6+
7+
SyxCowCache* syx_cow_cache_new(void)
8+
{
9+
SyxCowCache* cache = g_new0(SyxCowCache, 2);
10+
11+
QTAILQ_INIT(&cache->layers);
12+
13+
return cache;
14+
}
15+
16+
static gchar* g_array_element_ptr(GArray* array, guint position)
17+
{
18+
assert(position < array->len);
19+
return array->data + position * g_array_get_element_size(array);
20+
}
21+
22+
void syx_cow_cache_push_layer(SyxCowCache* scc, uint64_t chunk_size, uint64_t max_size)
23+
{
24+
SyxCowCacheLayer* new_layer = g_new0(SyxCowCacheLayer, 1);
25+
26+
new_layer->cow_cache_devices = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
27+
new_layer->chunk_size = chunk_size;
28+
new_layer->max_nb_chunks = max_size;
29+
30+
assert(IS_POWER_OF_TWO(chunk_size));
31+
assert(!(max_size % chunk_size));
32+
33+
QTAILQ_INSERT_HEAD(&scc->layers, new_layer, next);
34+
}
35+
36+
void syx_cow_cache_pop_layer(SyxCowCache* scc)
37+
{
38+
// TODO
39+
}
40+
41+
static void flush_device_layer(gpointer _blk_name_hash, gpointer cache_device, gpointer _user_data)
42+
{
43+
SyxCowCacheDevice* sccd = (SyxCowCacheDevice*) cache_device;
44+
45+
g_hash_table_remove_all(sccd->positions);
46+
g_array_set_size(sccd->data, 0);
47+
}
48+
49+
void syx_cow_cache_flush_highest_layer(SyxCowCache* scc)
50+
{
51+
SyxCowCacheLayer* highest_layer = QTAILQ_FIRST(&scc->layers);
52+
53+
// highest_layer->cow_cache_devices
54+
g_hash_table_foreach(highest_layer->cow_cache_devices, flush_device_layer, NULL);
55+
}
56+
57+
void syx_cow_cache_move(SyxCowCache* lhs, SyxCowCache** rhs)
58+
{
59+
lhs->layers = (*rhs)->layers;
60+
g_free(*rhs);
61+
*rhs = NULL;
62+
}
63+
64+
static bool read_chunk_from_cache_layer_device(SyxCowCacheDevice* sccd, QEMUIOVector* qiov, size_t qiov_offset, uint64_t blk_offset)
65+
{
66+
gpointer data_position = NULL;
67+
bool found = g_hash_table_lookup_extended(sccd->positions, GUINT_TO_POINTER(blk_offset), NULL, &data_position);
68+
69+
// cache hit
70+
if (found) {
71+
void* data_position_ptr = g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position));
72+
assert(qemu_iovec_from_buf(qiov, qiov_offset, data_position_ptr, g_array_get_element_size(sccd->data)) == g_array_get_element_size(sccd->data));
73+
}
74+
75+
return found;
76+
}
77+
78+
// len must be smaller than nb bytes to next aligned to chunk of blk_offset.
79+
// static void write_to_cache_layer_device_unaligned(SyxCowCacheDevice* sccd, QEMUIOVector* qiov, size_t qiov_offset, uint64_t blk_offset, uint64_t len)
80+
// {
81+
// const uint64_t chunk_size = g_array_get_element_size(sccd->data);
82+
//
83+
// assert(ROUND_UP(blk_offset, chunk_size) - blk_offset <= len);
84+
// assert(IS_POWER_OF_TWO(chunk_size));
85+
//
86+
// uint64_t blk_offset_aligned = ROUND_DOWN(blk_offset, chunk_size);
87+
//
88+
// gpointer data_position = NULL;
89+
// bool found = g_hash_table_lookup_extended(sccd->positions, GUINT_TO_POINTER(blk_offset_aligned), NULL, &data_position);
90+
//
91+
// if (!found) {
92+
// data_position = GUINT_TO_POINTER(sccd->data->len);
93+
// sccd->data = g_array_set_size(sccd->data, sccd->data->len + 1);
94+
// g_hash_table_insert(sccd->positions, GUINT_TO_POINTER(blk_offset), data_position);
95+
// }
96+
//
97+
// void* data_position_ptr = g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position));
98+
//
99+
// assert(qemu_iovec_to_buf(qiov, qiov_offset, data_position_ptr, g_array_get_element_size(sccd->data)) ==
100+
// g_array_get_element_size(sccd->data));
101+
// }
102+
103+
// cache layer is allocated and all the basic checks are already done.
104+
static void write_chunk_to_cache_layer_device(SyxCowCacheDevice* sccd, QEMUIOVector* qiov, size_t qiov_offset, uint64_t blk_offset)
105+
{
106+
const uint64_t chunk_size = g_array_get_element_size(sccd->data);
107+
108+
gpointer data_position = NULL;
109+
bool found = g_hash_table_lookup_extended(sccd->positions, GUINT_TO_POINTER(blk_offset), NULL, &data_position);
110+
111+
if (!found) {
112+
data_position = GUINT_TO_POINTER(sccd->data->len);
113+
sccd->data = g_array_set_size(sccd->data, sccd->data->len + 1);
114+
g_hash_table_insert(sccd->positions, GUINT_TO_POINTER(blk_offset), data_position);
115+
}
116+
117+
void* data_position_ptr = g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position));
118+
119+
assert(qemu_iovec_to_buf(qiov, qiov_offset, data_position_ptr, chunk_size) ==
120+
chunk_size);
121+
}
122+
123+
static bool read_chunk_from_cache_layer(SyxCowCacheLayer* sccl, BlockBackend* blk, QEMUIOVector* qiov, size_t qiov_offset, uint64_t blk_offset)
124+
{
125+
assert(!(qiov->size % sccl->chunk_size));
126+
127+
SyxCowCacheDevice* cache_entry = g_hash_table_lookup(sccl->cow_cache_devices, GINT_TO_POINTER(blk_name_hash(blk)));
128+
129+
// return early if nothing is registered
130+
if (!cache_entry) {
131+
return false;
132+
}
133+
134+
assert(cache_entry && cache_entry->data);
135+
136+
// try to read cached pages in current layer if something is registered.
137+
return read_chunk_from_cache_layer_device(cache_entry, qiov, qiov_offset, blk_offset);
138+
}
139+
140+
// Returns false if could not write to current layer.
141+
static bool write_to_cache_layer(SyxCowCacheLayer* sccl, BlockBackend* blk, int64_t offset, int64_t bytes, QEMUIOVector* qiov)
142+
{
143+
if (qiov->size % sccl->chunk_size) {
144+
// todo: determine if it is worth developing an unaligned access version.
145+
printf("error: 0x%zx %% 0x%lx == 0x%lx\n", qiov->size, sccl->chunk_size, qiov->size % sccl->chunk_size);
146+
exit(1);
147+
}
148+
149+
SyxCowCacheDevice* cache_entry = g_hash_table_lookup(sccl->cow_cache_devices, GINT_TO_POINTER(blk_name_hash(blk)));
150+
151+
if (unlikely(!cache_entry)) {
152+
cache_entry = g_new0(SyxCowCacheDevice, 1);
153+
cache_entry->data = g_array_sized_new(false, false, sccl->chunk_size, INITIAL_NB_CHUNKS_PER_DEVICE);
154+
cache_entry->positions = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
155+
g_hash_table_insert(sccl->cow_cache_devices, GINT_TO_POINTER(blk_name_hash(blk)), cache_entry);
156+
}
157+
158+
assert(cache_entry && cache_entry->data);
159+
160+
if (cache_entry->data->len + (qiov->size / sccl->chunk_size) > sccl->max_nb_chunks) {
161+
return false;
162+
}
163+
164+
// write cached page
165+
uint64_t blk_offset = offset;
166+
size_t qiov_offset = 0;
167+
for (; qiov_offset < qiov->size; blk_offset += sccl->chunk_size, qiov_offset += sccl->chunk_size) {
168+
write_chunk_to_cache_layer_device(cache_entry, qiov, qiov_offset, blk_offset);
169+
}
170+
171+
return true;
172+
}
173+
174+
void syx_cow_cache_read_entry(SyxCowCache* scc, BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t _qiov_offset,
175+
BdrvRequestFlags flags)
176+
{
177+
SyxCowCacheLayer* layer;
178+
uint64_t blk_offset = offset;
179+
size_t qiov_offset = 0;
180+
uint64_t chunk_size = 0;
181+
182+
// printf("[%s] Read 0x%zx bytes @addr %lx\n", blk_name(blk), qiov->size, offset);
183+
184+
// First read the backing block device normally.
185+
assert(blk_co_preadv(blk, offset, bytes, qiov, flags) >= 0);
186+
187+
// Then fix the chunks that have been read from before.
188+
if (!QTAILQ_EMPTY(&scc->layers)) {
189+
for (;qiov_offset < qiov->size; blk_offset += chunk_size, qiov_offset += chunk_size) {
190+
QTAILQ_FOREACH(layer, &scc->layers, next) {
191+
chunk_size = layer->chunk_size;
192+
if (read_chunk_from_cache_layer(layer, blk, qiov, qiov_offset, blk_offset)) {
193+
break;
194+
}
195+
}
196+
}
197+
}
198+
}
199+
200+
bool syx_cow_cache_write_entry(SyxCowCache* scc, BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
201+
BdrvRequestFlags flags)
202+
{
203+
SyxCowCacheLayer* layer;
204+
205+
// printf("[%s] Write 0x%zx bytes @addr %lx\n", blk_name(blk), qiov->size, offset);
206+
207+
layer = QTAILQ_FIRST(&scc->layers);
208+
if (layer) {
209+
assert(write_to_cache_layer(layer, blk, offset, bytes, qiov));
210+
return true;
211+
} else {
212+
return false;
213+
}
214+
}

0 commit comments

Comments
 (0)