Skip to content

Commit f1d1657

Browse files
martijnvelscopybara-github
authored andcommitted
Add memory sanitizer to absl::Cord
PiperOrigin-RevId: 504555535 Change-Id: Id40484e9f52c87e9d67def2735ee60481ca50526
1 parent b0a2b10 commit f1d1657

File tree

4 files changed

+229
-30
lines changed

4 files changed

+229
-30
lines changed

absl/strings/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ cc_library(
322322
"//absl/base:raw_logging_internal",
323323
"//absl/base:throw_delegate",
324324
"//absl/container:compressed_tuple",
325+
"//absl/container:container_memory",
325326
"//absl/container:inlined_vector",
326327
"//absl/container:layout",
327328
"//absl/crc:crc_cord_state",

absl/strings/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ absl_cc_library(
604604
absl::base_internal
605605
absl::compressed_tuple
606606
absl::config
607+
absl::container_memory
607608
absl::core_headers
608609
absl::crc_cord_state
609610
absl::endian

absl/strings/cord_test.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,3 +3057,69 @@ TEST_P(CordTest, ChecksummedEmptyCord) {
30573057
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord()));
30583058
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
30593059
}
3060+
3061+
#if defined(GTEST_HAS_DEATH_TEST) && defined(ABSL_INTERNAL_CORD_HAVE_SANITIZER)
3062+
3063+
// Returns an expected poison / uninitialized death message expression.
3064+
const char* MASanDeathExpr() {
3065+
return "(use-after-poison|use-of-uninitialized-value)";
3066+
}
3067+
3068+
TEST(CordSanitizerTest, SanitizesEmptyCord) {
3069+
absl::Cord cord;
3070+
const char* data = cord.Flatten().data();
3071+
EXPECT_DEATH(EXPECT_EQ(data[0], 0), MASanDeathExpr());
3072+
}
3073+
3074+
TEST(CordSanitizerTest, SanitizesSmallCord) {
3075+
absl::Cord cord("Hello");
3076+
const char* data = cord.Flatten().data();
3077+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3078+
}
3079+
3080+
TEST(CordSanitizerTest, SanitizesCordOnSetSSOValue) {
3081+
absl::Cord cord("String that is too big to be an SSO value");
3082+
cord = "Hello";
3083+
const char* data = cord.Flatten().data();
3084+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3085+
}
3086+
3087+
TEST(CordSanitizerTest, SanitizesCordOnCopyCtor) {
3088+
absl::Cord src("hello");
3089+
absl::Cord dst(src);
3090+
const char* data = dst.Flatten().data();
3091+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3092+
}
3093+
3094+
TEST(CordSanitizerTest, SanitizesCordOnMoveCtor) {
3095+
absl::Cord src("hello");
3096+
absl::Cord dst(std::move(src));
3097+
const char* data = dst.Flatten().data();
3098+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3099+
}
3100+
3101+
TEST(CordSanitizerTest, SanitizesCordOnAssign) {
3102+
absl::Cord src("hello");
3103+
absl::Cord dst;
3104+
dst = src;
3105+
const char* data = dst.Flatten().data();
3106+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3107+
}
3108+
3109+
TEST(CordSanitizerTest, SanitizesCordOnMoveAssign) {
3110+
absl::Cord src("hello");
3111+
absl::Cord dst;
3112+
dst = std::move(src);
3113+
const char* data = dst.Flatten().data();
3114+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3115+
}
3116+
3117+
TEST(CordSanitizerTest, SanitizesCordOnSsoAssign) {
3118+
absl::Cord src("hello");
3119+
absl::Cord dst("String that is too big to be an SSO value");
3120+
dst = src;
3121+
const char* data = dst.Flatten().data();
3122+
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
3123+
}
3124+
3125+
#endif // GTEST_HAS_DEATH_TEST && ABSL_INTERNAL_CORD_HAVE_SANITIZER

absl/strings/internal/cord_internal.h

Lines changed: 161 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,20 @@
2727
#include "absl/base/internal/invoke.h"
2828
#include "absl/base/optimization.h"
2929
#include "absl/container/internal/compressed_tuple.h"
30+
#include "absl/container/internal/container_memory.h"
3031
#include "absl/meta/type_traits.h"
3132
#include "absl/strings/string_view.h"
3233

34+
// We can only add poisoning if we can detect consteval executions.
35+
#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
36+
(defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
37+
defined(ABSL_HAVE_MEMORY_SANITIZER))
38+
#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
39+
#endif
40+
41+
#define ABSL_CORD_INTERNAL_NO_SANITIZE \
42+
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
43+
3344
namespace absl {
3445
ABSL_NAMESPACE_BEGIN
3546
namespace cord_internal {
@@ -267,7 +278,7 @@ struct CordRep {
267278
// The following three fields have to be less than 32 bytes since
268279
// that is the smallest supported flat node size. Some code optimizations rely
269280
// on the specific layout of these fields. Notably: the non-trivial field
270-
// `refcount` being preceeded by `length`, and being tailed by POD data
281+
// `refcount` being preceded by `length`, and being tailed by POD data
271282
// members only.
272283
// # LINT.IfChange
273284
size_t length;
@@ -506,26 +517,57 @@ class InlineData {
506517
// is actively inspected and used by gdb pretty printing code.
507518
static constexpr size_t kTagOffset = 0;
508519

509-
constexpr InlineData() = default;
510-
explicit InlineData(DefaultInitType) : rep_(kDefaultInit) {}
511-
explicit InlineData(CordRep* rep) : rep_(rep) {}
520+
// Implement `~InlineData()` conditionally: we only need this destructor to
521+
// unpoison poisoned instances under *SAN, and it will only compile correctly
522+
// if the current compiler supports `absl::is_constant_evaluated()`.
523+
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
524+
~InlineData() noexcept { unpoison(); }
525+
#endif
526+
527+
constexpr InlineData() noexcept { poison_this(); }
528+
529+
explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
530+
poison_this();
531+
}
532+
533+
explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
534+
ABSL_ASSERT(rep != nullptr);
535+
}
512536

513537
// Explicit constexpr constructor to create a constexpr InlineData
514538
// value. Creates an inlined SSO value if `rep` is null, otherwise
515539
// creates a tree instance value.
516-
constexpr InlineData(absl::string_view sv, CordRep* rep)
517-
: rep_(rep ? Rep(rep) : Rep(sv)) {}
540+
constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
541+
: rep_(rep ? Rep(rep) : Rep(sv)) {
542+
poison();
543+
}
518544

519-
constexpr InlineData(const InlineData& rhs) = default;
520-
InlineData& operator=(const InlineData& rhs) = default;
545+
constexpr InlineData(const InlineData& rhs) noexcept;
546+
InlineData& operator=(const InlineData& rhs) noexcept;
521547

522548
friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
549+
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
550+
const Rep l = lhs.rep_.SanitizerSafeCopy();
551+
const Rep r = rhs.rep_.SanitizerSafeCopy();
552+
return memcmp(&l, &r, sizeof(l)) == 0;
553+
#else
523554
return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
555+
#endif
524556
}
525557
friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
526558
return !operator==(lhs, rhs);
527559
}
528560

561+
// Poisons the unused inlined SSO data if the current instance
562+
// is inlined, else un-poisons the entire instance.
563+
constexpr void poison();
564+
565+
// Un-poisons this instance.
566+
constexpr void unpoison();
567+
568+
// Poisons the current instance. This is used on default initialization.
569+
constexpr void poison_this();
570+
529571
// Returns true if the current instance is empty.
530572
// The 'empty value' is an inlined data value of zero length.
531573
bool is_empty() const { return rep_.tag() == 0; }
@@ -610,18 +652,23 @@ class InlineData {
610652

611653
void set_inline_data(const char* data, size_t n) {
612654
ABSL_ASSERT(n <= kMaxInline);
655+
unpoison();
613656
rep_.set_tag(static_cast<int8_t>(n << 1));
614657
SmallMemmove<true>(rep_.as_chars(), data, n);
658+
poison();
615659
}
616660

617661
void copy_max_inline_to(char* dst) const {
618662
assert(!is_tree());
619-
memcpy(dst, as_chars(), kMaxInline);
663+
memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
620664
}
621665

622666
// Initialize this instance to holding the tree value `rep`,
623667
// initializing the cordz_info to null, i.e.: 'not profiled'.
624-
void make_tree(CordRep* rep) { rep_.make_tree(rep); }
668+
void make_tree(CordRep* rep) {
669+
unpoison();
670+
rep_.make_tree(rep);
671+
}
625672

626673
// Set the tree value of this instance to 'rep`.
627674
// Requires the current instance to already hold a tree value.
@@ -633,17 +680,15 @@ class InlineData {
633680

634681
// Returns the size of the inlined character data inside this instance.
635682
// Requires the current instance to hold inline data.
636-
size_t inline_size() const {
637-
assert(!is_tree());
638-
return static_cast<size_t>(rep_.tag()) >> 1;
639-
}
683+
size_t inline_size() const { return rep_.inline_size(); }
640684

641685
// Sets the size of the inlined character data inside this instance.
642686
// Requires `size` to be <= kMaxInline.
643687
// See the documentation on 'as_chars()' for more information and examples.
644688
void set_inline_size(size_t size) {
645-
ABSL_ASSERT(size <= kMaxInline);
646-
rep_.set_tag(static_cast<int8_t>(size << 1));
689+
unpoison();
690+
rep_.set_inline_size(size);
691+
poison();
647692
}
648693

649694
// Compares 'this' inlined data with rhs. The comparison is a straightforward
@@ -653,20 +698,7 @@ class InlineData {
653698
// 0 the InlineData instances are equal
654699
// 1 'this' InlineData instance larger
655700
int Compare(const InlineData& rhs) const {
656-
uint64_t x, y;
657-
memcpy(&x, as_chars(), sizeof(x));
658-
memcpy(&y, rhs.as_chars(), sizeof(y));
659-
if (x == y) {
660-
memcpy(&x, as_chars() + 7, sizeof(x));
661-
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
662-
if (x == y) {
663-
if (inline_size() == rhs.inline_size()) return 0;
664-
return inline_size() < rhs.inline_size() ? -1 : 1;
665-
}
666-
}
667-
x = absl::big_endian::FromHost64(x);
668-
y = absl::big_endian::FromHost64(y);
669-
return x < y ? -1 : 1;
701+
return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
670702
}
671703

672704
private:
@@ -704,12 +736,26 @@ class InlineData {
704736
GetOrNull(chars, 13),
705737
GetOrNull(chars, 14)} {}
706738

739+
// Disable sanitizer as we must always be able to read `tag`.
740+
ABSL_CORD_INTERNAL_NO_SANITIZE
707741
int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
708742
void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
709743

710744
char* as_chars() { return data + 1; }
711745
const char* as_chars() const { return data + 1; }
712746

747+
bool is_tree() const { return (tag() & 1) != 0; }
748+
749+
size_t inline_size() const {
750+
ABSL_ASSERT(!is_tree());
751+
return static_cast<size_t>(tag()) >> 1;
752+
}
753+
754+
void set_inline_size(size_t size) {
755+
ABSL_ASSERT(size <= kMaxInline);
756+
set_tag(static_cast<int8_t>(size << 1));
757+
}
758+
713759
CordRep* tree() const { return as_tree.rep; }
714760
void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
715761

@@ -721,6 +767,21 @@ class InlineData {
721767
as_tree.cordz_info = kNullCordzInfo;
722768
}
723769

770+
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
771+
Rep SanitizerSafeCopy() const {
772+
Rep res;
773+
if (is_tree()) {
774+
res = *this;
775+
} else {
776+
res.set_tag(tag());
777+
memcpy(res.as_chars(), as_chars(), inline_size());
778+
}
779+
return res;
780+
}
781+
#else
782+
const Rep& SanitizerSafeCopy() const { return *this; }
783+
#endif
784+
724785
// If the data has length <= kMaxInline, we store it in `data`, and
725786
// store the size in the first char of `data` shifted left + 1.
726787
// Else we store it in a tree and store a pointer to that tree in
@@ -731,11 +792,81 @@ class InlineData {
731792
};
732793
};
733794

795+
// Private implementation of `Compare()`
796+
static inline int Compare(const Rep& lhs, const Rep& rhs) {
797+
uint64_t x, y;
798+
memcpy(&x, lhs.as_chars(), sizeof(x));
799+
memcpy(&y, rhs.as_chars(), sizeof(y));
800+
if (x == y) {
801+
memcpy(&x, lhs.as_chars() + 7, sizeof(x));
802+
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
803+
if (x == y) {
804+
if (lhs.inline_size() == rhs.inline_size()) return 0;
805+
return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
806+
}
807+
}
808+
x = absl::big_endian::FromHost64(x);
809+
y = absl::big_endian::FromHost64(y);
810+
return x < y ? -1 : 1;
811+
}
812+
734813
Rep rep_;
735814
};
736815

737816
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
738817

818+
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
819+
820+
constexpr InlineData::InlineData(const InlineData& rhs) noexcept
821+
: rep_(rhs.rep_.SanitizerSafeCopy()) {
822+
poison();
823+
}
824+
825+
inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
826+
unpoison();
827+
rep_ = rhs.rep_.SanitizerSafeCopy();
828+
poison();
829+
return *this;
830+
}
831+
832+
constexpr void InlineData::poison_this() {
833+
if (!absl::is_constant_evaluated()) {
834+
container_internal::SanitizerPoisonObject(this);
835+
}
836+
}
837+
838+
constexpr void InlineData::unpoison() {
839+
if (!absl::is_constant_evaluated()) {
840+
container_internal::SanitizerUnpoisonObject(this);
841+
}
842+
}
843+
844+
constexpr void InlineData::poison() {
845+
if (!absl::is_constant_evaluated()) {
846+
if (is_tree()) {
847+
container_internal::SanitizerUnpoisonObject(this);
848+
} else if (const size_t size = inline_size()) {
849+
if (size < kMaxInline) {
850+
const char* end = rep_.as_chars() + size;
851+
container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
852+
}
853+
} else {
854+
container_internal::SanitizerPoisonObject(this);
855+
}
856+
}
857+
}
858+
859+
#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
860+
861+
constexpr InlineData::InlineData(const InlineData&) noexcept = default;
862+
inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
863+
864+
constexpr void InlineData::poison_this() {}
865+
constexpr void InlineData::unpoison() {}
866+
constexpr void InlineData::poison() {}
867+
868+
#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
869+
739870
inline CordRepSubstring* CordRep::substring() {
740871
assert(IsSubstring());
741872
return static_cast<CordRepSubstring*>(this);

0 commit comments

Comments
 (0)