Skip to content

Conversation

@llvmbot
Copy link
Member

@llvmbot llvmbot commented Oct 19, 2024

Backport 7619699

Requested by: @mgorny

@llvmbot llvmbot added this to the LLVM 19.X Release milestone Oct 19, 2024
@llvmbot
Copy link
Member Author

llvmbot commented Oct 19, 2024

@zygoloid What do you think about merging this PR to the release branch?

@llvmbot llvmbot requested a review from zygoloid October 19, 2024 17:03
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Oct 19, 2024
@llvmbot
Copy link
Member Author

llvmbot commented Oct 19, 2024

@llvm/pr-subscribers-clang

Author: None (llvmbot)

Changes

Backport 7619699

Requested by: @mgorny


Full diff: https://github.com/llvm/llvm-project/pull/113052.diff

1 Files Affected:

  • (modified) clang/include/clang/AST/ExternalASTSource.h (+35-13)
diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h
index 385c32edbae0fd..582ed7c65f58ca 100644
--- a/clang/include/clang/AST/ExternalASTSource.h
+++ b/clang/include/clang/AST/ExternalASTSource.h
@@ -25,10 +25,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
+#include <new>
 #include <optional>
 #include <utility>
 
@@ -326,29 +328,49 @@ struct LazyOffsetPtr {
   ///
   /// If the low bit is clear, a pointer to the AST node. If the low
   /// bit is set, the upper 63 bits are the offset.
-  mutable uint64_t Ptr = 0;
+  static constexpr size_t DataSize = std::max(sizeof(uint64_t), sizeof(T *));
+  alignas(uint64_t) alignas(T *) mutable unsigned char Data[DataSize] = {};
+
+  unsigned char GetLSB() const {
+    return Data[llvm::sys::IsBigEndianHost ? DataSize - 1 : 0];
+  }
+
+  template <typename U> U &As(bool New) const {
+    unsigned char *Obj =
+        Data + (llvm::sys::IsBigEndianHost ? DataSize - sizeof(U) : 0);
+    if (New)
+      return *new (Obj) U;
+    return *std::launder(reinterpret_cast<U *>(Obj));
+  }
+
+  T *&GetPtr() const { return As<T *>(false); }
+  uint64_t &GetU64() const { return As<uint64_t>(false); }
+  void SetPtr(T *Ptr) const { As<T *>(true) = Ptr; }
+  void SetU64(uint64_t U64) const { As<uint64_t>(true) = U64; }
 
 public:
   LazyOffsetPtr() = default;
-  explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
+  explicit LazyOffsetPtr(T *Ptr) : Data() { SetPtr(Ptr); }
 
-  explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) {
+  explicit LazyOffsetPtr(uint64_t Offset) : Data() {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
+    else
+      SetU64((Offset << 1) | 0x01);
   }
 
   LazyOffsetPtr &operator=(T *Ptr) {
-    this->Ptr = reinterpret_cast<uint64_t>(Ptr);
+    SetPtr(Ptr);
     return *this;
   }
 
   LazyOffsetPtr &operator=(uint64_t Offset) {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
     else
-      Ptr = (Offset << 1) | 0x01;
+      SetU64((Offset << 1) | 0x01);
 
     return *this;
   }
@@ -356,15 +378,15 @@ struct LazyOffsetPtr {
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  explicit operator bool() const { return Ptr != 0; }
+  explicit operator bool() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  bool isValid() const { return Ptr != 0; }
+  bool isValid() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is currently stored as an offset.
-  bool isOffset() const { return Ptr & 0x01; }
+  bool isOffset() const { return GetLSB() & 0x01; }
 
   /// Retrieve the pointer to the AST node that this lazy pointer points to.
   ///
@@ -375,9 +397,9 @@ struct LazyOffsetPtr {
     if (isOffset()) {
       assert(Source &&
              "Cannot deserialize a lazy pointer without an AST source");
-      Ptr = reinterpret_cast<uint64_t>((Source->*Get)(OffsT(Ptr >> 1)));
+      SetPtr((Source->*Get)(OffsT(GetU64() >> 1)));
     }
-    return reinterpret_cast<T*>(Ptr);
+    return GetPtr();
   }
 
   /// Retrieve the address of the AST node pointer. Deserializes the pointee if
@@ -385,7 +407,7 @@ struct LazyOffsetPtr {
   T **getAddressOfPointer(ExternalASTSource *Source) const {
     // Ensure the integer is in pointer form.
     (void)get(Source);
-    return reinterpret_cast<T**>(&Ptr);
+    return &GetPtr();
   }
 };
 

@tru
Copy link
Collaborator

tru commented Oct 28, 2024

@jrtc27 @zygoloid how about merging this to the release branch?

@jrtc27
Copy link
Collaborator

jrtc27 commented Oct 28, 2024

As the author of the patch that seems sensible to me, and I’ve not been aware of any regressions from it in main.

LazyOffsetPtr currently relies on uint64_t being able to store a pointer
and, unless sizeof(uint64_t) == sizeof(void *), little endianness, since
getAddressOfPointer reinterprets the memory as a pointer. This also
doesn't properly respect the C++ object model.

As removing getAddressOfPointer would have wide-reaching implications,
improve the implementation to account for these problems by using
placement new and a suitably sized-and-aligned buffer, "right"-aligning
the objects on big-endian platforms so the LSBs are in the same place
for use as the discriminator.

Fixes: bc73ef0
Fixes: llvm#111993
(cherry picked from commit 7619699)
@tru tru merged commit e541aa5 into llvm:release/19.x Oct 29, 2024
7 of 8 checks passed
@github-actions
Copy link

@mgorny (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category

Projects

Development

Successfully merging this pull request may close these issues.

3 participants