Skip to content

Commit f682037

Browse files
committed
MB-39292: 3/n Change from JSON to flatbuffers+crc32c
Change from a JSON format to a flatbuffers defined structure. Reload of the manifest can use the flatbuffers Verify which checks that the loaded data appears to be a valid 'Manifest'. A CRC (using crc32c) is further added to the stored data so that we can detect changes to the data which may not be detected by Verify (e.g. a string value "dog" changing to "cat"). Detected issues are logged with CRITICAL - the bucket is allowed to continue warm-up and the in-memory Bucket::Manifest is initialised as the 'epoch' Manifest, the bucket will then pick-up the current manifest from ns_server, any Manifest is a successor of epoch. Change-Id: I319c3e76ef5c07da0680e602f3e342d3d8affa58 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/137162 Reviewed-by: Dave Rigby <[email protected]> Tested-by: Build Bot <[email protected]>
1 parent 11fefec commit f682037

File tree

10 files changed

+346
-45
lines changed

10 files changed

+346
-45
lines changed

engines/ep/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,14 @@ ADD_CUSTOM_COMMAND(OUTPUT
190190
src/collections/events.fbs
191191
COMMENT "Generating flatbuffers - collection events")
192192

193+
ADD_CUSTOM_COMMAND(OUTPUT
194+
${CMAKE_CURRENT_BINARY_DIR}/src/collections/manifest_generated.h
195+
COMMAND
196+
${FLATC} -o ${CMAKE_CURRENT_BINARY_DIR}/src/collections/ --cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/collections/manifest.fbs
197+
DEPENDS
198+
src/collections/manifest.fbs
199+
COMMENT "Generating flatbuffers - collection manifest")
200+
193201
# Custom build target to generate source files that we normally generate as part
194202
# of a build.
195203
add_custom_target(generated_source_files
@@ -202,6 +210,7 @@ add_custom_target(generated_source_files
202210
${CMAKE_CURRENT_BINARY_DIR}/src/collections/kvstore_generated.h
203211
${CMAKE_CURRENT_BINARY_DIR}/src/collections/kvstore_flatbuffers_schema.cc
204212
${CMAKE_CURRENT_BINARY_DIR}/src/collections/events_generated.h
213+
${CMAKE_CURRENT_BINARY_DIR}/src/collections/manifest_generated.h
205214
generate_audit_descriptors)
206215

207216
# Collections library for things required outside of ep-engine

engines/ep/src/collections/manager.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -315,14 +315,18 @@ void Collections::Manager::addScopeStats(KVBucket& bucket,
315315

316316
void Collections::Manager::warmupLoadManifest(const std::string& dbpath) {
317317
auto rv = Collections::PersistManifestTask::tryAndLoad(dbpath);
318-
if (rv) {
318+
if (rv.has_value()) {
319319
EP_LOG_INFO(
320-
"Collections::Manager::warmupLoadManifest: loaded uid:{:#x}",
321-
rv->getUid());
322-
*currentManifest.wlock() = std::move(*rv);
323-
} else {
324-
EP_LOG_INFO("Collections::Manager::warmupLoadManifest: nothing loaded");
325-
}
320+
"Collections::Manager::warmupLoadManifest: starting at "
321+
"uid:{:#x}",
322+
rv.value().getUid());
323+
*currentManifest.wlock() = rv.value();
324+
}
325+
// else tryAndLoad detected (and logged) some kind of corruption issue.
326+
// If this corruption occurred at the same time as some issue in the
327+
// forward flow of the Manifest, KV can't validate that any change to the
328+
// manifest is a legal successor (Manifest::isSuccessor) - we will just have
329+
// to accept what we're given and carry on.
326330
}
327331

328332
/**

engines/ep/src/collections/manager.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,8 @@ class Manager {
324324

325325
friend std::ostream& operator<<(std::ostream& os, const Manager& manager);
326326

327-
/// Store the most recent (current) manifest received
327+
/// Store the most recent (current) manifest received - this default
328+
/// constructs as the 'epoch' Manifest
328329
folly::Synchronized<Manifest> currentManifest;
329330

330331
/// Serialise updates to the manifest (set_collections core)

engines/ep/src/collections/manifest.cc

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "bucket_logger.h"
2020
#include "collections/collections_constants.h"
2121
#include "collections/collections_types.h"
22+
#include "collections/manifest_generated.h"
2223
#include "ep_engine.h"
2324
#include "kv_bucket.h"
2425
#include "statistics/collector.h"
@@ -301,6 +302,85 @@ nlohmann::json Manifest::toJson(
301302
return manifest;
302303
}
303304

305+
flatbuffers::DetachedBuffer Manifest::toFlatbuffer() const {
306+
flatbuffers::FlatBufferBuilder builder;
307+
std::vector<flatbuffers::Offset<Collections::Persist::Scope>> fbScopes;
308+
309+
for (const auto& scope : scopes) {
310+
std::vector<flatbuffers::Offset<Collections::Persist::Collection>>
311+
fbCollections;
312+
313+
for (const auto& c : scope.second.collections) {
314+
auto newEntry = Collections::Persist::CreateCollection(
315+
builder,
316+
uint32_t(c.id),
317+
c.maxTtl.has_value(),
318+
c.maxTtl.value_or(std::chrono::seconds(0)).count(),
319+
builder.CreateString(collections.at(c.id).name.data(),
320+
collections.at(c.id).name.size()));
321+
fbCollections.push_back(newEntry);
322+
}
323+
auto collectionVector = builder.CreateVector(fbCollections);
324+
325+
auto newEntry = Collections::Persist::CreateScope(
326+
builder,
327+
scope.first,
328+
builder.CreateString(scope.second.name.data(),
329+
scope.second.name.size()),
330+
collectionVector);
331+
fbScopes.push_back(newEntry);
332+
}
333+
334+
auto scopeVector = builder.CreateVector(fbScopes);
335+
auto toWrite =
336+
Collections::Persist::CreateManifest(builder, uid, scopeVector);
337+
builder.Finish(toWrite);
338+
return builder.Release();
339+
}
340+
341+
// sibling of toFlatbuffer, construct a Manifest from a flatbuffer format
342+
Manifest::Manifest(std::string_view flatbufferData, Manifest::FlatBuffers tag)
343+
: defaultCollectionExists(false), scopes(), collections(), uid(0) {
344+
flatbuffers::Verifier v(
345+
reinterpret_cast<const uint8_t*>(flatbufferData.data()),
346+
flatbufferData.size());
347+
if (!v.VerifyBuffer<Collections::Persist::Manifest>(nullptr)) {
348+
std::stringstream ss;
349+
ss << "Collections::Manifest::Manifest(FlatBuffers): flatbufferData "
350+
"invalid, ptr:"
351+
<< reinterpret_cast<const void*>(flatbufferData.data())
352+
<< ", size:" << flatbufferData.size();
353+
354+
throw std::invalid_argument(ss.str());
355+
}
356+
357+
auto manifest = flatbuffers::GetRoot<Collections::Persist::Manifest>(
358+
reinterpret_cast<const uint8_t*>(flatbufferData.data()));
359+
360+
uid = manifest->uid();
361+
for (const Collections::Persist::Scope* scope : *manifest->scopes()) {
362+
std::vector<CollectionEntry> scopeCollections;
363+
364+
for (const Collections::Persist::Collection* collection :
365+
*scope->collections()) {
366+
cb::ExpiryLimit maxTtl;
367+
CollectionID cid(collection->collectionId());
368+
if (collection->ttlValid()) {
369+
maxTtl = std::chrono::seconds(collection->maxTtl());
370+
}
371+
this->collections.emplace(std::make_pair(
372+
cid,
373+
Collection{scope->scopeId(), collection->name()->str()}));
374+
enableDefaultCollection(cid);
375+
scopeCollections.push_back({cid, maxTtl});
376+
}
377+
378+
this->scopes.emplace(
379+
scope->scopeId(),
380+
Scope{scope->name()->str(), std::move(scopeCollections)});
381+
}
382+
}
383+
304384
void Manifest::addCollectionStats(KVBucket& bucket,
305385
const void* cookie,
306386
const AddStatFn& add_stat) const {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//
2+
// Copyright 2020 Couchbase, Inc
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//
16+
17+
//
18+
// Flatbuffer types used in the serialisation of the Collections::Manifest
19+
// object. These are used to store as a file a copy of the current manifest
20+
// and use it in warm-up.
21+
//
22+
23+
namespace Collections.Persist;
24+
25+
table Collection {
26+
collectionId:uint;
27+
ttlValid:bool;
28+
maxTtl:uint;
29+
name:string;
30+
}
31+
32+
table Scope {
33+
scopeId:uint;
34+
name:string;
35+
collections:[Collection];
36+
}
37+
38+
table Manifest {
39+
uid:ulong;
40+
scopes:[Scope];
41+
}
42+
43+
table ManifestWithCrc {
44+
crc:uint;
45+
manifest:[ubyte];
46+
}
47+
48+
// This is the main type representing a Collections::Manifest
49+
root_type Manifest;
50+
// This wraps the flatbuffer Manifest with a CRC for integrity checking
51+
root_type ManifestWithCrc;

engines/ep/src/collections/manifest.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929

3030
class KVBucket;
3131

32+
namespace flatbuffers {
33+
class DetachedBuffer;
34+
}
35+
3236
namespace Collections {
3337

3438
static const size_t MaxScopeOrCollectionNameSize = 251;
@@ -69,6 +73,9 @@ class Manifest {
6973
*/
7074
explicit Manifest(std::string_view json);
7175

76+
struct FlatBuffers {};
77+
explicit Manifest(std::string_view flatbufferData, FlatBuffers tag);
78+
7279
bool doesDefaultCollectionExist() const {
7380
return defaultCollectionExists;
7481
}
@@ -229,6 +236,11 @@ class Manifest {
229236
nlohmann::json toJson(
230237
const Collections::IsVisibleFunction& isVisible) const;
231238

239+
/**
240+
* @return flatbuffer representation of this object
241+
*/
242+
flatbuffers::DetachedBuffer toFlatbuffer() const;
243+
232244
/**
233245
* Add stats for collection. Each collection is tested for
234246
* Privilege::SimpleStats and 'added' if the user has the privilege.

0 commit comments

Comments
 (0)