@@ -335,6 +335,13 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
335335 return Addr;
336336}
337337
338+ enum class IsPattern { No, Yes };
339+
340+ static llvm::Constant *replaceUndef (CodeGenModule &CGM, IsPattern isPattern,
341+ llvm::Constant *constant);
342+ static llvm::Constant *constWithPadding (CodeGenModule &CGM, IsPattern isPattern,
343+ llvm::Constant *constant);
344+
338345// / AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
339346// / global variable that has already been created for it. If the initializer
340347// / has a different type than GV does, this may free GV and return a different
@@ -361,6 +368,51 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
361368 }
362369 return GV;
363370 }
371+ if (!getLangOpts ().CPlusPlus ) {
372+ // In C23 (N3096) $6.7.10:
373+ // """
374+ // If any object is initialized with an empty iniitializer, then it is subject to default
375+ // initialization:
376+ // - if it is an aggregate, every member is initialized (recursively) according to these rules,
377+ // and any padding is initialized to zero bits;
378+ // - if it is a union, the first named member is initialized (recursively) according to these
379+ // rules, and any padding is initialized to zero bits.
380+ //
381+ // If the aggregate or union contains elements or members that are aggregates or unions, these
382+ // rules apply recursively to the subaggregates or contained unions.
383+ //
384+ // If there are fewer initializers in a brace-enclosed list than there are elements or members
385+ // of an aggregate, or fewer characters in a string literal used to initialize an array of
386+ // known size than there are elements in the array, the remainder of the aggregate is subject
387+ // to default initialization.
388+ // """
389+ //
390+ // From my understanding, the standard is ambiguous in the following two areas:
391+ // 1. For a union type with empty initializer, if the first named member is not the largest
392+ // member, then the bytes comes after the first named member but before padding are left
393+ // unspecified. An example is:
394+ // union U { int a; long long b;};
395+ // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left unspecified.
396+ //
397+ // 2. It only mentions padding for empty initializer, but doesn't mention padding for a
398+ // non empty initialization list. And if the aggregation or union contains elements or members
399+ // that are aggregates or unions, and some are non empty initializers, while others are empty
400+ // initiailizers, the padding initialization is unclear. An example is:
401+ // struct S1 { int a; long long b; };
402+ // struct S2 { char c; struct S1 s1; };
403+ // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a and s2.s1.b are
404+ // // unclear.
405+ // struct S2 s2 = { 'c' };
406+ //
407+ // Here we choose to zero initiailize left bytes of a union type. Because projects like the
408+ // Linux kernel are relying on this behavior. If we don't explicitly zero initialize them, the
409+ // undef values can be optimized to return gabage data.
410+ // We also choose to zero initialize paddings for aggregates and unions, no matter they are
411+ // initialized by empty initializers or non empty initializers. This can provide a consistent
412+ // behavior. So projects like the Linux kernel can rely on it.
413+ Init = constWithPadding (CGM, IsPattern::No,
414+ replaceUndef (CGM, IsPattern::No, Init));
415+ }
364416
365417#ifndef NDEBUG
366418 CharUnits VarSize = CGM.getContext ().getTypeSizeInChars (D.getType ()) +
@@ -1038,8 +1090,6 @@ static bool shouldSplitConstantStore(CodeGenModule &CGM,
10381090 return false ;
10391091}
10401092
1041- enum class IsPattern { No, Yes };
1042-
10431093// / Generate a constant filled with either a pattern or zeroes.
10441094static llvm::Constant *patternOrZeroFor (CodeGenModule &CGM, IsPattern isPattern,
10451095 llvm::Type *Ty) {
@@ -1049,9 +1099,6 @@ static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern,
10491099 return llvm::Constant::getNullValue (Ty);
10501100}
10511101
1052- static llvm::Constant *constWithPadding (CodeGenModule &CGM, IsPattern isPattern,
1053- llvm::Constant *constant);
1054-
10551102// / Helper function for constWithPadding() to deal with padding in structures.
10561103static llvm::Constant *constStructWithPadding (CodeGenModule &CGM,
10571104 IsPattern isPattern,
@@ -1109,6 +1156,9 @@ static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern,
11091156 if (ZeroInitializer) {
11101157 OpValue = llvm::Constant::getNullValue (ElemTy);
11111158 PaddedOp = constWithPadding (CGM, isPattern, OpValue);
1159+ // Avoid iterating large arrays with zero initializer when possible.
1160+ if (PaddedOp->getType () == ElemTy)
1161+ return constant;
11121162 }
11131163 for (unsigned Op = 0 ; Op != Size; ++Op) {
11141164 if (!ZeroInitializer) {
@@ -1954,21 +2004,22 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
19542004 D.mightBeUsableInConstantExpressions (getContext ())) {
19552005 assert (!capturedByInit && " constant init contains a capturing block?" );
19562006 constant = ConstantEmitter (*this ).tryEmitAbstractForInitializer (D);
1957- if (constant && !constant->isZeroValue () &&
1958- (trivialAutoVarInit !=
1959- LangOptions::TrivialAutoVarInitKind::Uninitialized)) {
1960- IsPattern isPattern =
1961- (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Pattern)
1962- ? IsPattern::Yes
1963- : IsPattern::No;
1964- // C guarantees that brace-init with fewer initializers than members in
1965- // the aggregate will initialize the rest of the aggregate as-if it were
1966- // static initialization. In turn static initialization guarantees that
1967- // padding is initialized to zero bits. We could instead pattern-init if D
1968- // has any ImplicitValueInitExpr, but that seems to be unintuitive
1969- // behavior.
1970- constant = constWithPadding (CGM, IsPattern::No,
1971- replaceUndef (CGM, isPattern, constant));
2007+ if (constant && !constant->isZeroValue ()) {
2008+ if (!getLangOpts ().CPlusPlus ) {
2009+ // See comment in CodeGenFunction::AddInitializerToStaticVarDecl().
2010+ constant = constWithPadding (CGM, IsPattern::No,
2011+ replaceUndef (CGM, IsPattern::No, constant));
2012+ } else if (trivialAutoVarInit !=
2013+ LangOptions::TrivialAutoVarInitKind::Uninitialized) {
2014+ IsPattern isPattern =
2015+ (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Pattern)
2016+ ? IsPattern::Yes
2017+ : IsPattern::No;
2018+ // We could instead pattern-init padding if D has any
2019+ // ImplicitValueInitExpr, but that seems to be unintuitive behavior.
2020+ constant = constWithPadding (CGM, IsPattern::No,
2021+ replaceUndef (CGM, isPattern, constant));
2022+ }
19722023 }
19732024
19742025 if (D.getType ()->isBitIntType () &&
@@ -2861,3 +2912,9 @@ CodeGenModule::getOMPAllocateAlignment(const VarDecl *VD) {
28612912 }
28622913 return std::nullopt ;
28632914}
2915+
2916+ llvm::Constant *
2917+ CodeGenModule::zeroInitGlobalVarInitializer (llvm::Constant *Init) {
2918+ return constWithPadding (*this , IsPattern::No,
2919+ replaceUndef (*this , IsPattern::No, Init));
2920+ }
0 commit comments