@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
9292STATISTIC (NumColdCC, " Number of functions marked coldcc" );
9393STATISTIC (NumIFuncsResolved, " Number of statically resolved IFuncs" );
9494STATISTIC (NumIFuncsDeleted, " Number of IFuncs removed" );
95+ STATISTIC (NumGlobalArraysPadded,
96+ " Number of global arrays padded to alignment boundary" );
9597
9698static cl::opt<bool >
9799 EnableColdCCStressTest (" enable-coldcc-stress-test" ,
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
20292031 return Changed;
20302032}
20312033
2034+ static bool callInstIsMemcpy (CallInst *CI) {
2035+ if (!CI)
2036+ return false ;
2037+
2038+ Function *F = CI->getCalledFunction ();
2039+ if (!F || !F->isIntrinsic () || F->getIntrinsicID () != Intrinsic::memcpy)
2040+ return false ;
2041+
2042+ return true ;
2043+ }
2044+
2045+ static bool destArrayCanBeWidened (CallInst *CI) {
2046+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand (3 ));
2047+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2048+
2049+ if (!Alloca || !IsVolatile || IsVolatile->isOne ())
2050+ return false ;
2051+
2052+ if (!Alloca->isStaticAlloca ())
2053+ return false ;
2054+
2055+ if (!Alloca->getAllocatedType ()->isArrayTy ())
2056+ return false ;
2057+
2058+ return true ;
2059+ }
2060+
2061+ static GlobalVariable *widenGlobalVariable (GlobalVariable *OldVar, Function *F,
2062+ unsigned NumBytesToPad,
2063+ unsigned NumBytesToCopy) {
2064+ if (!OldVar->hasInitializer ())
2065+ return nullptr ;
2066+
2067+ ConstantDataArray *DataArray =
2068+ dyn_cast<ConstantDataArray>(OldVar->getInitializer ());
2069+ if (!DataArray)
2070+ return nullptr ;
2071+
2072+ // Update to be word aligned (memcpy(...,X,...))
2073+ // create replacement with padded null bytes.
2074+ StringRef Data = DataArray->getRawDataValues ();
2075+ std::vector<uint8_t > StrData (Data.begin (), Data.end ());
2076+ for (unsigned int p = 0 ; p < NumBytesToPad; p++)
2077+ StrData.push_back (' \0 ' );
2078+ auto Arr = ArrayRef (StrData.data (), NumBytesToCopy + NumBytesToPad);
2079+ // Create new padded version of global variable.
2080+ Constant *SourceReplace = ConstantDataArray::get (F->getContext (), Arr);
2081+ GlobalVariable *NewGV = new GlobalVariable (
2082+ *(F->getParent ()), SourceReplace->getType (), true , OldVar->getLinkage (),
2083+ SourceReplace, SourceReplace->getName ());
2084+ // Copy any other attributes from original global variable
2085+ // e.g. unamed_addr
2086+ NewGV->copyAttributesFrom (OldVar);
2087+ NewGV->takeName (OldVar);
2088+ return NewGV;
2089+ }
2090+
2091+ static void widenDestArray (CallInst *CI, const unsigned NumBytesToPad,
2092+ const unsigned NumBytesToCopy,
2093+ ConstantDataArray *SourceDataArray) {
2094+
2095+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2096+ if (Alloca) {
2097+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2098+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
2099+ unsigned NumElementsToCopy = divideCeil (TotalBytes, ElementByteWidth);
2100+ // Update destination array to be word aligned (memcpy(X,...,...))
2101+ IRBuilder<> BuildAlloca (Alloca);
2102+ AllocaInst *NewAlloca = BuildAlloca.CreateAlloca (ArrayType::get (
2103+ Alloca->getAllocatedType ()->getArrayElementType (), NumElementsToCopy));
2104+ NewAlloca->takeName (Alloca);
2105+ NewAlloca->setAlignment (Alloca->getAlign ());
2106+ Alloca->replaceAllUsesWith (NewAlloca);
2107+ Alloca->eraseFromParent ();
2108+ }
2109+ }
2110+
2111+ static bool tryWidenGlobalArrayAndDests (Function *F, GlobalVariable *SourceVar,
2112+ const unsigned NumBytesToPad,
2113+ const unsigned NumBytesToCopy,
2114+ ConstantInt *BytesToCopyOp,
2115+ ConstantDataArray *SourceDataArray) {
2116+ auto *NewSourceGV =
2117+ widenGlobalVariable (SourceVar, F, NumBytesToPad, NumBytesToCopy);
2118+ if (!NewSourceGV)
2119+ return false ;
2120+
2121+ // Update arguments of remaining uses that
2122+ // are memcpys.
2123+ for (auto *User : SourceVar->users ()) {
2124+ auto *CI = dyn_cast<CallInst>(User);
2125+ if (!callInstIsMemcpy (CI) || !destArrayCanBeWidened (CI))
2126+ continue ;
2127+
2128+ if (CI->getArgOperand (1 ) != SourceVar)
2129+ continue ;
2130+
2131+ widenDestArray (CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
2132+
2133+ CI->setArgOperand (2 , ConstantInt::get (BytesToCopyOp->getType (),
2134+ NumBytesToCopy + NumBytesToPad));
2135+ }
2136+ SourceVar->replaceAllUsesWith (NewSourceGV);
2137+
2138+ NumGlobalArraysPadded++;
2139+ return true ;
2140+ }
2141+
2142+ static bool tryWidenGlobalArraysUsedByMemcpy (
2143+ GlobalVariable *GV,
2144+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2145+
2146+ if (!GV->hasInitializer () || !GV->isConstant () || !GV->hasLocalLinkage () ||
2147+ !GV->hasGlobalUnnamedAddr ())
2148+ return false ;
2149+
2150+ for (auto *User : GV->users ()) {
2151+ CallInst *CI = dyn_cast<CallInst>(User);
2152+ if (!callInstIsMemcpy (CI) || !destArrayCanBeWidened (CI))
2153+ continue ;
2154+
2155+ Function *F = CI->getCalledFunction ();
2156+
2157+ auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand (2 ));
2158+ if (!BytesToCopyOp)
2159+ continue ;
2160+
2161+ ConstantDataArray *SourceDataArray =
2162+ dyn_cast<ConstantDataArray>(GV->getInitializer ());
2163+ if (!SourceDataArray)
2164+ continue ;
2165+
2166+ unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue ();
2167+
2168+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2169+ uint64_t DZSize = Alloca->getAllocatedType ()->getArrayNumElements ();
2170+ uint64_t SZSize = SourceDataArray->getType ()->getNumElements ();
2171+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2172+ // Calculate the number of elements to copy while avoiding floored
2173+ // division of integers returning wrong values i.e. copying one byte
2174+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
2175+ unsigned NumElementsToCopy = divideCeil (NumBytesToCopy, ElementByteWidth);
2176+
2177+ // For safety purposes lets add a constraint and only pad when
2178+ // NumElementsToCopy == destination array size ==
2179+ // source which is a constant
2180+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
2181+ continue ;
2182+
2183+ unsigned NumBytesToPad = GetTTI (*F).getNumBytesToPadGlobalArray (
2184+ NumBytesToCopy, SourceDataArray->getType ());
2185+ if (NumBytesToPad) {
2186+ return tryWidenGlobalArrayAndDests (F, GV, NumBytesToPad, NumBytesToCopy,
2187+ BytesToCopyOp, SourceDataArray);
2188+ }
2189+ }
2190+ return false ;
2191+ }
2192+
20322193static bool
20332194OptimizeGlobalVars (Module &M,
20342195 function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
20582219 continue ;
20592220 }
20602221
2222+ // For global variable arrays called in a memcpy
2223+ // we try to pad to nearest valid alignment boundary
2224+ Changed |= tryWidenGlobalArraysUsedByMemcpy (&GV, GetTTI);
2225+
20612226 Changed |= processGlobal (GV, GetTTI, GetTLI, LookupDomTree);
20622227 }
20632228 return Changed;
0 commit comments