-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[clang][bytecode] Implement __builtin_{memchr,strchr,char_memchr} #130420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: Timm Baeder (tbaederr) Changesllvm has recently started to use Full diff: https://github.com/llvm/llvm-project/pull/130420.diff 2 Files Affected:
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 00f99745862ee..b8c4ef2f48a79 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1960,13 +1960,103 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
// However, if we read all the available bytes but were instructed to read
// even more, diagnose this as a "read of dereferenced one-past-the-end
- // pointer". This is what would happen if we called CheckRead() on every array
+ // pointer". This is what would happen if we called CheckLoad() on every array
// element.
S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_past_end)
<< AK_Read << S.Current->getRange(OpPC);
return false;
}
+static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const Function *Func, const CallExpr *Call) {
+ unsigned ID = Func->getBuiltinID();
+ if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
+ ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
+ diagnoseNonConstexprBuiltin(S, OpPC, ID);
+
+ const Pointer &Ptr = getParam<Pointer>(Frame, 0);
+ APSInt Desired;
+ std::optional<APSInt> MaxLength;
+ if (Call->getNumArgs() == 3) {
+ MaxLength =
+ peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)), 0);
+ Desired = peekToAPSInt(
+ S.Stk, *S.getContext().classify(Call->getArg(1)),
+ align(primSize(*S.getContext().classify(Call->getArg(2)))) +
+ align(primSize(*S.getContext().classify(Call->getArg(1)))));
+ } else {
+ Desired = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(1)));
+ }
+
+ if (MaxLength && MaxLength->isZero()) {
+ S.Stk.push<Pointer>();
+ return true;
+ }
+
+ if (Ptr.isDummy())
+ return false;
+
+ // Null is only okay if the given size is 0.
+ if (Ptr.isZero()) {
+ S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null)
+ << AK_Read;
+ return false;
+ }
+
+ QualType ElemTy = Ptr.getFieldDesc()->isArray()
+ ? Ptr.getFieldDesc()->getElemQualType()
+ : Ptr.getFieldDesc()->getType();
+ bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
+
+ // Give up on byte-oriented matching against multibyte elements.
+ if (IsRawByte && !isOneByteCharacterType(ElemTy)) {
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_memchr_unsupported)
+ << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
+ return false;
+ }
+
+ if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
+ // strchr compares directly to the passed integer, and therefore
+ // always fails if given an int that is not a char.
+ if (Desired !=
+ Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) {
+ S.Stk.push<Pointer>();
+ return true;
+ }
+ }
+
+ uint64_t DesiredVal =
+ Desired.trunc(S.getASTContext().getCharWidth()).getZExtValue();
+ bool StopAtZero =
+ (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr);
+
+ size_t Index = Ptr.getIndex();
+ for (;;) {
+ const Pointer &ElemPtr = Index > 0 ? Ptr.atIndex(Index) : Ptr;
+
+ if (!CheckLoad(S, OpPC, ElemPtr))
+ return false;
+
+ unsigned char V = static_cast<unsigned char>(ElemPtr.deref<char>());
+ if (V == DesiredVal) {
+ S.Stk.push<Pointer>(ElemPtr);
+ return true;
+ }
+
+ if (StopAtZero && V == 0)
+ break;
+
+ ++Index;
+ if (MaxLength && Index == MaxLength->getZExtValue())
+ break;
+ }
+
+ S.Stk.push<Pointer>();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
const CallExpr *Call, uint32_t BuiltinID) {
const InterpFrame *Frame = S.Current;
@@ -2445,6 +2535,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
return false;
break;
+ case Builtin::BImemchr:
+ case Builtin::BI__builtin_memchr:
+ case Builtin::BIstrchr:
+ case Builtin::BI__builtin_strchr:
+#if 0
+ case Builtin::BIwcschr:
+ case Builtin::BI__builtin_wcschr:
+ case Builtin::BImemchr:
+ case Builtin::BI__builtin_wmemchr:
+#endif
+ case Builtin::BI__builtin_char_memchr:
+ if (!interp__builtin_memchr(S, OpPC, Frame, F, Call))
+ return false;
+ break;
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 75380f99901a2..dbff9164a91c1 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -18,6 +18,8 @@
extern "C" {
typedef decltype(sizeof(int)) size_t;
extern size_t wcslen(const wchar_t *p);
+ extern void *memchr(const void *s, int c, size_t n);
+ extern char *strchr(const char *s, int c);
}
namespace strcmp {
@@ -1351,3 +1353,119 @@ namespace Memcmp {
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1);
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0);
}
+
+namespace Memchr {
+ constexpr const char *kStr = "abca\xff\0d";
+ constexpr char kFoo[] = {'f', 'o', 'o'};
+
+ static_assert(__builtin_memchr(kStr, 'a', 0) == nullptr);
+ static_assert(__builtin_memchr(kStr, 'a', 1) == kStr);
+ static_assert(__builtin_memchr(kStr, '\0', 5) == nullptr);
+ static_assert(__builtin_memchr(kStr, '\0', 6) == kStr + 5);
+ static_assert(__builtin_memchr(kStr, '\xff', 8) == kStr + 4);
+ static_assert(__builtin_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+ static_assert(__builtin_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+ static_assert(__builtin_memchr(kFoo, 'x', 3) == nullptr);
+ static_assert(__builtin_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr);
+
+
+#if defined(CHAR8_T)
+ constexpr const char8_t *kU8Str = u8"abca\xff\0d";
+ constexpr char8_t kU8Foo[] = {u8'f', u8'o', u8'o'};
+ static_assert(__builtin_memchr(kU8Str, u8'a', 0) == nullptr);
+ static_assert(__builtin_memchr(kU8Str, u8'a', 1) == kU8Str);
+ static_assert(__builtin_memchr(kU8Str, u8'\0', 5) == nullptr);
+ static_assert(__builtin_memchr(kU8Str, u8'\0', 6) == kU8Str + 5);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff', 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff' + 256, 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff' - 256, 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Foo, u8'x', 3) == nullptr);
+ static_assert(__builtin_memchr(kU8Foo, u8'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_memchr(nullptr, u8'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_memchr(nullptr, u8'x', 0) == nullptr);
+#endif
+
+ extern struct Incomplete incomplete;
+ static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr);
+ static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // both-error {{not an integral constant}} \
+ // ref-note {{read of incomplete type 'struct Incomplete'}}
+
+ const unsigned char &u1 = 0xf0;
+ auto &&i1 = (const signed char []){-128};
+ static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1);
+ static_assert(__builtin_memchr(i1, 0x80, 1) == i1);
+
+ enum class E : unsigned char {};
+ struct EPair { E e, f; };
+ constexpr EPair ee{E{240}};
+ static_assert(__builtin_memchr(&ee.e, 240, 1) == &ee.e); // both-error {{constant}} \
+ // both-note {{not supported}}
+
+ constexpr bool kBool[] = {false, true, false};
+ constexpr const bool *const kBoolPastTheEndPtr = kBool + 3;
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // both-error {{constant}} \
+ // both-note {{not supported}}
+
+ static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr);
+ static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr);
+ static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr);
+ static_assert(__builtin_char_memchr(kStr, '\0', 6) == kStr + 5);
+ static_assert(__builtin_char_memchr(kStr, '\xff', 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kFoo, 'x', 3) == nullptr);
+ static_assert(__builtin_char_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_char_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_char_memchr(nullptr, 'x', 0) == nullptr);
+
+ static_assert(*__builtin_char_memchr(kStr, '\xff', 8) == '\xff');
+ constexpr bool char_memchr_mutable() {
+ char buffer[] = "mutable";
+ *__builtin_char_memchr(buffer, 't', 8) = 'r';
+ *__builtin_char_memchr(buffer, 'm', 8) = 'd';
+ return __builtin_strcmp(buffer, "durable") == 0;
+ }
+ static_assert(char_memchr_mutable());
+
+ constexpr bool b = !memchr("hello", 'h', 3); // both-error {{constant expression}} \
+ // both-note {{non-constexpr function 'memchr' cannot be used in a constant expression}}
+
+}
+
+namespace Strchr {
+ constexpr const char *kStr = "abca\xff\0d";
+ constexpr char kFoo[] = {'f', 'o', 'o'};
+ static_assert(__builtin_strchr(kStr, 'a') == kStr);
+ static_assert(__builtin_strchr(kStr, 'b') == kStr + 1);
+ static_assert(__builtin_strchr(kStr, 'c') == kStr + 2);
+ static_assert(__builtin_strchr(kStr, 'd') == nullptr);
+ static_assert(__builtin_strchr(kStr, 'e') == nullptr);
+ static_assert(__builtin_strchr(kStr, '\0') == kStr + 5);
+ static_assert(__builtin_strchr(kStr, 'a' + 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, 'a' - 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, '\xff') == kStr + 4);
+ static_assert(__builtin_strchr(kStr, '\xff' + 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, '\xff' - 256) == nullptr);
+ static_assert(__builtin_strchr(kFoo, 'o') == kFoo + 1);
+ static_assert(__builtin_strchr(kFoo, 'x') == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_strchr(nullptr, 'x') == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+
+ constexpr bool a = !strchr("hello", 'h'); // both-error {{constant expression}} \
+ // both-note {{non-constexpr function 'strchr' cannot be used in a constant expression}}
+}
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/4/builds/5499 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/193/builds/6155 Here is the relevant piece of the build log for the reference |
| // strchr compares directly to the passed integer, and therefore | ||
| // always fails if given an int that is not a char. | ||
| if (Desired != | ||
| Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure about this? cppreference seems to disagree with you: https://en.cppreference.com/w/cpp/string/byte/strchr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is from ExprConstant.cpp:
llvm-project/clang/lib/AST/ExprConstant.cpp
Lines 10017 to 10023 in 0f73248
| // strchr compares directly to the passed integer, and therefore | |
| // always fails if given an int that is not a char. | |
| if (!APSInt::isSameValue(HandleIntToIntCast(Info, E, CharTy, | |
| E->getArg(1)->getType(), | |
| Desired), | |
| Desired)) | |
| return ZeroInitialization(E); |
llvm has recently started to use
__builitn_memchrat compile time, so implement this. Still needs some work but the basics are done.