Skip to content

Commit 6375447

Browse files
committed
impl(bigtable): dedup map when creating Value type
1 parent 0f0a7cf commit 6375447

File tree

3 files changed

+94
-11
lines changed

3 files changed

+94
-11
lines changed

google/cloud/bigtable/value.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "google/cloud/bigtable/value.h"
1616
#include "google/cloud/bigtable/timestamp.h"
1717
#include "google/cloud/internal/throw_delegate.h"
18+
#include "absl/container/flat_hash_set.h"
1819
#include "absl/strings/cord.h"
1920
#include <google/bigtable/v2/types.pb.h>
2021
#include <google/protobuf/descriptor.h>
@@ -591,6 +592,33 @@ StatusOr<absl::CivilDay> Value::GetValue(absl::CivilDay const&,
591592

592593
bool Value::is_null() const { return IsNullValue(value_); }
593594

595+
// We're calling this function from a constructor which we prefer to always
596+
// complete. Any errors encountered will be deferred, and we will report them on
597+
// attempts at accessing the value.
598+
// Any duplicates keys found in the map will be deduped to use the last value
599+
// specified for the key, per:
600+
// https://github.com/googleapis/googleapis/blob/0eeb1be5b78a9c7e006ee57cde95349834ae9f3b/google/bigtable/v2/types.proto#L357
601+
void Value::DedupProtoMap() {
602+
google::bigtable::v2::Value dedup_value;
603+
absl::flat_hash_set<std::string> keys;
604+
for (int i = value_.array_value().values_size() - 1; i >= 0; --i) {
605+
auto map_value_proto = GetProtoValueArrayElement(value_, i);
606+
if (!map_value_proto.has_array_value() ||
607+
map_value_proto.array_value().values_size() != 2) {
608+
continue;
609+
}
610+
auto const& key = map_value_proto.array_value().values(0);
611+
// TODO(#15766): Find a better way to hash these protos.
612+
if (keys.insert(key.SerializeAsString()).second) {
613+
*dedup_value.mutable_array_value()->add_values() =
614+
std::move(map_value_proto);
615+
}
616+
}
617+
std::reverse(dedup_value.mutable_array_value()->mutable_values()->begin(),
618+
dedup_value.mutable_array_value()->mutable_values()->end());
619+
value_ = std::move(dedup_value);
620+
}
621+
594622
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
595623
} // namespace bigtable
596624
} // namespace cloud

google/cloud/bigtable/value.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -629,13 +629,6 @@ class Value {
629629
pt.map_type().value_type());
630630
if (!key) return std::move(key).status();
631631
if (!value) return std::move(value).status();
632-
633-
// the documented behavior indicates that the last value will take
634-
// precedence for a given key.
635-
auto const& pos = m.find(key.value());
636-
if (pos != m.end()) {
637-
m.erase(pos);
638-
}
639632
m.insert(std::make_pair(*std::move(key), *std::move(value)));
640633
}
641634
return m;
@@ -692,6 +685,8 @@ class Value {
692685
return std::move(*pv.mutable_array_value()->mutable_values(pos));
693686
}
694687

688+
void DedupProtoMap();
689+
695690
// A private templated constructor that is called by all the public
696691
// constructors to set the type_ and value_ members. The `PrivateConstructor`
697692
// type is used so that this overload is never chosen for
@@ -701,10 +696,20 @@ class Value {
701696
struct PrivateConstructor {};
702697
template <typename T>
703698
Value(PrivateConstructor, T&& t)
704-
: type_(MakeTypeProto(t)), value_(MakeValueProto(std::forward<T>(t))) {}
699+
: type_(MakeTypeProto(t)), value_(MakeValueProto(std::forward<T>(t))) {
700+
if (type_.has_map_type() && value_.has_array_value() &&
701+
!value_.array_value().values().empty()) {
702+
DedupProtoMap();
703+
}
704+
}
705705

706706
Value(google::bigtable::v2::Type t, google::bigtable::v2::Value v)
707-
: type_(std::move(t)), value_(std::move(v)) {}
707+
: type_(std::move(t)), value_(std::move(v)) {
708+
if (type_.has_map_type() && value_.has_array_value() &&
709+
!value_.array_value().values().empty()) {
710+
DedupProtoMap();
711+
}
712+
}
708713

709714
friend struct bigtable_internal::ValueInternals;
710715
friend class Parameter;

google/cloud/bigtable/value_test.cc

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "google/cloud/testing_util/is_proto_equal.h"
1919
#include "google/cloud/testing_util/status_matchers.h"
2020
#include "absl/strings/cord.h"
21+
#include <google/protobuf/text_format.h>
2122
#include <google/type/date.pb.h>
2223
#include <gmock/gmock.h>
2324
#include <cmath>
@@ -39,6 +40,7 @@ namespace {
3940
using ::google::cloud::testing_util::IsOk;
4041
using ::google::cloud::testing_util::IsOkAndHolds;
4142
using ::google::cloud::testing_util::IsProtoEqual;
43+
using ::google::protobuf::TextFormat;
4244
using ::testing::Not;
4345

4446
absl::Time MakeTime(std::int64_t sec, int nanos) {
@@ -169,8 +171,6 @@ TEST(Value, BasicSemantics) {
169171
for (auto ts : TestTimes()) {
170172
SCOPED_TRACE("Testing: google::cloud::bigtable::Timestamp " +
171173
bigtable_internal::TimestampToRFC3339(ts));
172-
std::cout << "Testing: google::cloud::bigtable::Timestamp "
173-
<< bigtable_internal::TimestampToRFC3339(ts) << std::endl;
174174
TestBasicSemantics(ts);
175175
std::vector<Timestamp> v(5, ts);
176176
TestBasicSemantics(v);
@@ -1004,6 +1004,56 @@ TEST(Value, ProtoConversionMap) {
10041004
EXPECT_TRUE(p.first.has_map_type());
10051005
}
10061006

1007+
TEST(Value, ProtoMapWithDuplicateKeys) {
1008+
auto constexpr kTypeProto = R"""(
1009+
map_type {
1010+
key_type {
1011+
bytes_type {
1012+
}
1013+
}
1014+
value_type {
1015+
string_type {
1016+
}
1017+
}
1018+
}
1019+
)""";
1020+
google::bigtable::v2::Type type_proto;
1021+
ASSERT_TRUE(TextFormat::ParseFromString(kTypeProto, &type_proto));
1022+
1023+
auto constexpr kValueProto = R"""(
1024+
array_value {
1025+
values {
1026+
array_value {
1027+
values {
1028+
bytes_value: "foo"
1029+
}
1030+
values {
1031+
string_value: "foo"
1032+
}
1033+
}
1034+
}
1035+
values {
1036+
array_value {
1037+
values {
1038+
bytes_value: "foo"
1039+
}
1040+
values {
1041+
string_value: "bar"
1042+
}
1043+
}
1044+
}
1045+
}
1046+
)""";
1047+
google::bigtable::v2::Value value_proto;
1048+
ASSERT_TRUE(TextFormat::ParseFromString(kValueProto, &value_proto));
1049+
1050+
auto value = bigtable_internal::FromProto(type_proto, value_proto);
1051+
auto map = value.get<std::unordered_map<Bytes, std::string>>();
1052+
ASSERT_STATUS_OK(map);
1053+
EXPECT_THAT(*map, ::testing::UnorderedElementsAre(
1054+
::testing::Pair(Bytes("foo"), "bar")));
1055+
}
1056+
10071057
void SetNullProtoKind(Value& v) {
10081058
auto p = bigtable_internal::ToProto(v);
10091059
p.second.clear_kind();

0 commit comments

Comments
 (0)