Skip to content

Commit 78cf581

Browse files
author
Dwight Guth
authored
Implement serialization/deserialization of new algorithm (#1051)
This PR implements a new version of the proof hint, version 9, which uses the newly created serialization and deserialization functions to serialize terms during the proof hint generation. Not only does this new serialization format take up less memory, it also is roughly 3x faster to serialize. Note: there is one known issue with this serializer/deserializer: it incorrectly specifies the second sort parameter of the `inj` symbol when deserializing. We choose to ignore this known issue due to the fact that this deserializer is being consumed solely by code which transforms the resulting terms into an order-sorted representation by removing the injections entirely.
1 parent 38d8a5a commit 78cf581

File tree

18 files changed

+671
-158
lines changed

18 files changed

+671
-158
lines changed

bindings/python/ast.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,12 +439,16 @@ void bind_proof_trace(py::module_ &m) {
439439
.def_property_readonly("trace", &llvm_rewrite_trace::get_trace)
440440
.def_static(
441441
"parse",
442-
[](py::bytes const &bytes) {
443-
proof_trace_parser parser(false, false);
442+
[](py::bytes const &bytes, kore_header const &header) {
443+
proof_trace_parser parser(false, false, header);
444444
auto str = std::string(bytes);
445445
return parser.parse_proof_trace(str);
446446
},
447-
py::arg("bytes"));
447+
py::arg("bytes"), py::arg("header"));
448+
449+
py::class_<kore_header, std::shared_ptr<kore_header>>(
450+
proof_trace, "kore_header")
451+
.def(py::init(&kore_header::create), py::arg("path"));
448452
}
449453

450454
PYBIND11_MODULE(_kllvm, m) {

include/kllvm/binary/ProofTraceParser.h

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ class proof_trace_parser {
265265
private:
266266
bool verbose_;
267267
bool expand_terms_;
268+
[[maybe_unused]] kore_header const &header_;
268269

269270
// Caller needs to check that there are at least 8 bytes remaining in the
270271
// stream before peeking
@@ -337,30 +338,19 @@ class proof_trace_parser {
337338

338339
template <typename It>
339340
sptr<kore_pattern> parse_kore_term(It &ptr, It end, uint64_t &pattern_len) {
340-
if (std::distance(ptr, end) < 11U) {
341+
if (std::distance(ptr, end) < 9U) {
341342
return nullptr;
342343
}
344+
It old_ptr = ptr;
343345
if (detail::read<char>(ptr, end) != '\x7F'
344346
|| detail::read<char>(ptr, end) != 'K'
345-
|| detail::read<char>(ptr, end) != 'O'
346347
|| detail::read<char>(ptr, end) != 'R'
347-
|| detail::read<char>(ptr, end) != 'E') {
348+
|| detail::read<char>(ptr, end) != '2') {
348349
return nullptr;
349350
}
350-
auto version = detail::read_version(ptr, end);
351-
352-
if (!read_uint64(ptr, end, pattern_len)) {
353-
return nullptr;
354-
}
355-
356-
if (std::distance(ptr, end) < pattern_len) {
357-
return nullptr;
358-
}
359-
if (pattern_len > 0 && std::distance(ptr, end) > pattern_len) {
360-
end = std::next(ptr, pattern_len);
361-
}
362-
363-
return detail::read(ptr, end, version);
351+
auto result = detail::read_v2(ptr, end, header_);
352+
pattern_len = ptr - old_ptr;
353+
return result;
364354
}
365355

366356
template <typename It>
@@ -736,7 +726,8 @@ class proof_trace_parser {
736726
}
737727

738728
public:
739-
proof_trace_parser(bool verbose, bool expand_terms);
729+
proof_trace_parser(
730+
bool verbose, bool expand_terms, kore_header const &header);
740731

741732
std::optional<llvm_rewrite_trace>
742733
parse_proof_trace_from_file(std::string const &filename);

include/kllvm/binary/deserializer.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,36 @@
66
#include <kllvm/binary/version.h>
77

88
#include <cstddef>
9+
#include <cstdio>
910
#include <cstring>
1011
#include <vector>
1112

1213
#include <iostream>
1314

1415
namespace kllvm {
1516

17+
class kore_header {
18+
private:
19+
std::vector<uint8_t> arities_;
20+
std::vector<ptr<kore_symbol>> symbols_;
21+
22+
public:
23+
kore_header(FILE *in);
24+
static std::unique_ptr<kore_header> create(std::string const &path) {
25+
FILE *f = fopen(path.c_str(), "rb");
26+
auto *result = new kore_header(f);
27+
fclose(f);
28+
return std::unique_ptr<kore_header>(result);
29+
}
30+
31+
[[nodiscard]] uint8_t get_arity(uint32_t offset) const {
32+
return arities_[offset];
33+
};
34+
[[nodiscard]] kore_symbol *get_symbol(uint32_t offset) const {
35+
return symbols_[offset].get();
36+
};
37+
};
38+
1639
namespace detail {
1740

1841
template <typename It>
@@ -249,6 +272,35 @@ sptr<kore_pattern> read(It &ptr, It end, binary_version version) {
249272
return term_stack[0];
250273
}
251274

275+
template <typename It>
276+
sptr<kore_pattern> read_v2(It &ptr, It end, kore_header const &header) {
277+
switch (peek(ptr)) {
278+
case 0: {
279+
++ptr;
280+
auto len = detail::read<uint64_t>(ptr, end);
281+
auto str = std::string((char *)&*ptr, (char *)(&*ptr + len));
282+
ptr += len + 1;
283+
return kore_string_pattern::create(str);
284+
}
285+
case 1: {
286+
++ptr;
287+
auto offset = detail::read<uint32_t>(ptr, end);
288+
auto arity = header.get_arity(offset);
289+
// TODO: we need to check if this PR is an `inj` symbol and adjust the
290+
// second sort parameter of the symbol to be equal to the sort of the
291+
// current pattern.
292+
auto symbol = header.get_symbol(offset);
293+
auto new_pattern = kore_composite_pattern::create(symbol);
294+
for (auto i = 0; i < arity; ++i) {
295+
auto child = read_v2(ptr, end, header);
296+
new_pattern->add_argument(child);
297+
}
298+
return new_pattern;
299+
}
300+
default: throw std::runtime_error("Bad term " + std::to_string(*ptr));
301+
}
302+
}
303+
252304
} // namespace detail
253305

254306
std::string file_contents(std::string const &fn, int max_bytes = -1);

include/kllvm/codegen/ProofEvent.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,6 @@ class proof_event {
5252
kore_composite_sort &sort, llvm::Value *output_file, llvm::Value *term,
5353
llvm::BasicBlock *insert_at_end);
5454

55-
/*
56-
* Emit a call that will serialize `config` to the specified `outputFile` as
57-
* binary KORE. This function does not require a sort, but the configuration
58-
* passed must be a top-level configuration.
59-
*/
60-
llvm::CallInst *emit_serialize_configuration(
61-
llvm::Value *output_file, llvm::Value *config,
62-
llvm::BasicBlock *insert_at_end);
63-
6455
/*
6556
* Emit a call that will serialize `value` to the specified `outputFile`.
6657
*/

include/runtime/header.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,13 +331,16 @@ void serialize_configurations(
331331
void serialize_configuration(
332332
block *subject, char const *sort, char **data_out, size_t *size_out,
333333
bool emit_size, bool use_intern);
334+
void serialize_configuration_v2(FILE *file, block *subject, uint32_t sort);
334335
void serialize_configuration_to_file(
335336
FILE *file, block *subject, bool emit_size, bool use_intern);
337+
void serialize_configuration_to_file_v2(FILE *file, block *subject);
336338
void write_uint64_to_file(FILE *file, uint64_t i);
337339
void write_bool_to_file(FILE *file, bool b);
338340
void serialize_term_to_file(
339341
FILE *file, void *subject, char const *sort, bool use_intern,
340342
bool k_item_inj = false);
343+
void serialize_term_to_file_v2(FILE *file, void *subject, uint64_t, bool);
341344
void serialize_raw_term_to_file(
342345
FILE *file, void *subject, char const *sort, bool use_intern);
343346
void print_variable_to_file(FILE *file, char const *varname);
@@ -360,6 +363,7 @@ bool hook_STRING_eq(SortString, SortString);
360363
char const *get_symbol_name_for_tag(uint32_t tag);
361364
char const *get_return_sort_for_tag(uint32_t tag);
362365
char const **get_argument_sorts_for_tag(uint32_t tag);
366+
uint32_t *get_argument_sorts_for_tag_v2(uint32_t tag);
363367
char const *top_sort(void);
364368

365369
bool symbol_is_instantiation(uint32_t tag);
@@ -382,6 +386,19 @@ using visitor = struct {
382386
writer *, rangemap *, char const *, char const *, char const *, void *);
383387
};
384388

389+
using serialize_visitor = struct {
390+
void (*visit_config)(writer *, block *, uint32_t, bool);
391+
void (*visit_map)(writer *, map *, uint32_t, uint32_t, uint32_t);
392+
void (*visit_list)(writer *, list *, uint32_t, uint32_t, uint32_t);
393+
void (*visit_set)(writer *, set *, uint32_t, uint32_t, uint32_t);
394+
void (*visit_int)(writer *, mpz_t, uint32_t);
395+
void (*visit_float)(writer *, floating *, uint32_t);
396+
void (*visit_bool)(writer *, bool, uint32_t);
397+
void (*visit_string_buffer)(writer *, stringbuffer *, uint32_t);
398+
void (*visit_m_int)(writer *, size_t *, size_t, uint32_t);
399+
void (*visit_range_map)(writer *, rangemap *, uint32_t, uint32_t, uint32_t);
400+
};
401+
385402
void print_map(
386403
writer *, map *, char const *, char const *, char const *, void *);
387404
void print_range_map(
@@ -392,6 +409,8 @@ void print_list(
392409
writer *, list *, char const *, char const *, char const *, void *);
393410
void visit_children(
394411
block *subject, writer *file, visitor *printer, void *state);
412+
void visit_children_for_serialize(
413+
block *subject, writer *file, serialize_visitor *printer);
395414

396415
stringbuffer *hook_BUFFER_empty(void);
397416
stringbuffer *hook_BUFFER_concat(stringbuffer *buf, string *s);
@@ -442,4 +461,16 @@ void sfprintf(writer *file, char const *fmt, Args &&...args) {
442461
}
443462
}
444463

464+
template <typename... Args>
465+
void sfwrite(void const *ptr, size_t size, size_t nmemb, writer *file) {
466+
if (file->file) {
467+
fwrite(ptr, size, nmemb, file->file);
468+
} else {
469+
std::string output;
470+
output.resize(size * nmemb);
471+
memcpy(output.data(), ptr, size * nmemb);
472+
hook_BUFFER_concat_raw(file->buffer, output.data(), output.size());
473+
}
474+
}
475+
445476
#endif // RUNTIME_HEADER_H

lib/ast/definition.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,19 @@ std::string get_raw_symbol_name(sort_category cat) {
7676

7777
void kore_definition::insert_reserved_symbols() {
7878
auto mod = kore_module::create("K-RAW-TERM");
79+
// syntax KItem ::= rawTerm(KItem)
7980
auto decl = kore_symbol_declaration::create("rawTerm", true);
81+
// syntax KItem ::= rawKTerm(K)
82+
auto k_decl = kore_symbol_declaration::create("rawKTerm", true);
8083
auto kitem = kore_composite_sort::create("SortKItem");
84+
auto k = kore_composite_sort::create("SortK");
8185

8286
decl->get_symbol()->add_sort(kitem);
8387
decl->get_symbol()->add_argument(kitem);
88+
k_decl->get_symbol()->add_sort(kitem);
89+
k_decl->get_symbol()->add_argument(k);
8490
mod->add_declaration(std::move(decl));
91+
mod->add_declaration(std::move(k_decl));
8592

8693
for (auto const &cat : hooked_sorts_) {
8794
switch (cat.first.cat) {

lib/binary/ProofTraceParser.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,11 @@ void llvm_rewrite_trace::print(
125125
}
126126
}
127127

128-
proof_trace_parser::proof_trace_parser(bool verbose, bool expand_terms)
128+
proof_trace_parser::proof_trace_parser(
129+
bool verbose, bool expand_terms, kore_header const &header)
129130
: verbose_(verbose)
130-
, expand_terms_(expand_terms) { }
131+
, expand_terms_(expand_terms)
132+
, header_(header) { }
131133

132134
std::optional<llvm_rewrite_trace>
133135
proof_trace_parser::parse_proof_trace(std::string const &data) {

lib/binary/deserializer.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,85 @@ sptr<kore_pattern> deserialize_pattern(std::string const &filename) {
3737
return deserialize_pattern(data.begin(), data.end());
3838
}
3939

40+
// NOLINTNEXTLINE(*-cognitive-complexity)
41+
kore_header::kore_header(FILE *in) {
42+
// NOLINTNEXTLINE(misc-redundant-expression)
43+
if (fgetc(in) != 0x7f || fgetc(in) != 'K' || fgetc(in) != 'R'
44+
|| fgetc(in) != '2') {
45+
throw std::runtime_error("invalid magic");
46+
}
47+
std::array<uint32_t, 4> num_entries{};
48+
if (fread(num_entries.data(), sizeof(uint32_t), 4, in) != 4) {
49+
throw std::runtime_error("invalid table header");
50+
}
51+
uint32_t version = num_entries[0];
52+
uint32_t nstrings = num_entries[1];
53+
uint32_t nsorts = num_entries[2];
54+
uint32_t nsymbols = num_entries[3];
55+
56+
if (version != 1) {
57+
throw std::runtime_error("invalid binary version");
58+
}
59+
60+
std::vector<std::string> strings;
61+
strings.reserve(nstrings);
62+
63+
for (uint32_t i = 0; i < nstrings; ++i) {
64+
uint32_t len = 0;
65+
if (fread(&len, sizeof(uint32_t), 1, in) != 1) {
66+
throw std::runtime_error("invalid string table length");
67+
}
68+
std::string str;
69+
str.resize(len);
70+
if (fread(str.data(), 1, len, in) != len) {
71+
throw std::runtime_error("invalid string table entry");
72+
}
73+
fgetc(in);
74+
strings.push_back(str);
75+
}
76+
77+
std::vector<sptr<kore_sort>> sorts;
78+
sorts.reserve(nsorts);
79+
80+
for (uint32_t i = 0; i < nsorts; ++i) {
81+
uint32_t offset = 0;
82+
if (fread(&offset, sizeof(uint32_t), 1, in) != 1) {
83+
throw std::runtime_error("invalid string table offset in sort table");
84+
}
85+
uint8_t nparams = fgetc(in);
86+
auto sort = kore_composite_sort::create(strings[offset]);
87+
for (uint8_t j = 0; j < nparams; j++) {
88+
uint32_t param_offset = 0;
89+
if (fread(&param_offset, sizeof(uint32_t), 1, in) != 1
90+
|| param_offset >= i) {
91+
throw std::runtime_error("invalid sort table offset in sort table");
92+
}
93+
sort->add_argument(sorts[param_offset]);
94+
}
95+
sorts.push_back(sort);
96+
}
97+
98+
arities_.reserve(nsymbols);
99+
symbols_.reserve(nsymbols);
100+
101+
for (uint32_t i = 0; i < nsymbols; ++i) {
102+
uint32_t offset = 0;
103+
if (fread(&offset, sizeof(uint32_t), 1, in) != 1) {
104+
throw std::runtime_error("invalid string table offset in symbol table");
105+
}
106+
uint8_t nparams = fgetc(in);
107+
uint8_t arity = fgetc(in);
108+
auto symbol = kore_symbol::create(strings[offset]);
109+
for (uint8_t j = 0; j < nparams; j++) {
110+
uint32_t param_offset = 0;
111+
if (fread(&param_offset, sizeof(uint32_t), 1, in) != 1) {
112+
throw std::runtime_error("invalid sort table offset in symbol table");
113+
}
114+
symbol->add_formal_argument(sorts[param_offset]);
115+
}
116+
symbols_.push_back(std::move(symbol));
117+
arities_.push_back(arity);
118+
}
119+
}
120+
40121
} // namespace kllvm

0 commit comments

Comments
 (0)