66
77#include " XCLBinGen.h"
88
9+ #include < charconv>
910#include < filesystem>
1011#include < fstream>
1112#include < functional>
@@ -77,6 +78,107 @@ FailureOr<std::vector<std::string>> flagStringToVector(
7778 std::istream_iterator<std::string>{}};
7879}
7980
81+ // Extract an integer from a string, if possible.
82+ std::optional<int > safeStoi (std::string_view intString) {
83+ size_t start = intString.find_first_not_of (" \t\n\r\f\v " );
84+ if (start == std::string::npos) return std::nullopt ;
85+ int value = 0 ;
86+ const char *d0 = intString.data () + start;
87+ const char *d1 = intString.data () + intString.size ();
88+ auto [ptr, ec] = std::from_chars (d0, d1, value);
89+ if (ec == std::errc ()) return value;
90+ return std::nullopt ;
91+ }
92+
93+ // We assume that input string is of the form:
94+ //
95+ // ```
96+ // Stack Sizes:
97+ // Size Functions
98+ // 32 some_func
99+ // 64 some_other_func
100+ // 288 core_3_5
101+ // 288 core_2_5
102+ // 288 core_1_5
103+ // 288 core_0_5
104+ // 288 core_3_4
105+ // 288 core_3_3
106+ // 288 core_2_3
107+ // 288 core_3_2
108+ // 288 core_1_2
109+ // 288 core_0_2
110+ // ```
111+ //
112+ // In terms of how we estimate stack sizes, we assume that function call
113+ // structure is as follows: functions with names core_0_0, core_0_1, core_0_2,
114+ // et cetera are the entry point functions. These functions call into the
115+ // other functions like some_func and some_other_func, but never in a
116+ // nested manner. With these assumptions, an upper bound on the total stack size
117+ // of a core is the maximum sum of it's stack size, and another function's stack
118+ // size.
119+ FailureOr<llvm::DenseMap<std::pair<uint32_t , uint32_t >, uint32_t >>
120+ getUpperBoundStackSizes (const std::string &readElfOutput) {
121+ llvm::DenseMap<std::pair<uint32_t , uint32_t >, uint32_t > coreStackSizes;
122+
123+ // Split input on whitespace. For the example above, tokens becomes
124+ // ['Functions', '32', 'some_func', '64', 'some_other', 288, 'core_3_5', ...]
125+ SmallVector<std::string> tokens;
126+ size_t index0 = readElfOutput.find (" Functions" );
127+ std::istringstream stackSizesStream (readElfOutput.substr (index0));
128+ std::copy (std::istream_iterator<std::string>(stackSizesStream),
129+ std::istream_iterator<std::string>(), std::back_inserter (tokens));
130+
131+ uint32_t maxNonCoreStackSize = 0 ;
132+ for (uint32_t i = 1 ; i < tokens.size (); i += 2 ) {
133+ std::string_view stackSizeStr = tokens[i];
134+ std::string_view functionName = tokens[i + 1 ];
135+
136+ std::optional<int > maybeSize = safeStoi (stackSizeStr);
137+ if (!maybeSize) {
138+ llvm::errs () << " Failed to convert stack size (" << stackSizeStr
139+ << " ) to integer.\n " ;
140+ return failure ();
141+ }
142+ uint32_t size = maybeSize.value ();
143+ size_t coreIndex = functionName.find (" core_" );
144+
145+ // If the function is not a core function, in the example above either
146+ // 'some_func' or 'some_other_func', then we track the maximum stack size
147+ // for these.
148+ if (coreIndex == std::string::npos) {
149+ maxNonCoreStackSize = std::max<uint32_t >(maxNonCoreStackSize, size);
150+ continue ;
151+ }
152+
153+ // The case where the function is a core function.
154+ size_t colIndex = functionName.find (" _" , coreIndex) + 1 ;
155+ std::optional<int > col = safeStoi (functionName.substr (colIndex));
156+ if (!col.has_value ()) {
157+ llvm::errs () << " Failed to extract column from " << functionName << " \n " ;
158+ return failure ();
159+ }
160+
161+ size_t rowIndex = functionName.find (" _" , colIndex) + 1 ;
162+ std::optional<int > row = safeStoi (functionName.substr (rowIndex));
163+ if (!row.has_value ()) {
164+ llvm::errs () << " Failed to extract row from " << functionName << " \n " ;
165+ return failure ();
166+ }
167+
168+ coreStackSizes.insert ({{col.value (), row.value ()}, size});
169+ }
170+
171+ // Add the maximum non-core stack size to all core stack sizes. The
172+ // logic here is that each core calls into all the non-core functions
173+ // (without nesting calls), and so the maximum stack for the core is
174+ // the maximum non-core stack size plus the core stack.
175+ for (auto &[_, size] : coreStackSizes) {
176+ size += maxNonCoreStackSize;
177+ }
178+
179+ return coreStackSizes;
180+ }
181+
80182// Peano's `opt` program optimizes llvm-ir (.ll files). We run it with a system
81183// call. This functions constructs the flags to pass to `opt`. There are some
82184// default flags, most of which are copied from llvm-aie. See
@@ -400,11 +502,11 @@ bool hasEnding(std::string const &fullString, std::string const &ending) {
400502}
401503
402504LogicalResult runTool (
403- const std::string &program_, const std::vector <std::string> & args,
404- bool verbose, std::optional<std::vector<std::string>> env = std::nullopt ) {
405- std::string program = program_;
505+ std::string program, ArrayRef <std::string> args, bool verbose ,
506+ std::optional<std::vector<std::string>> env = std::nullopt ,
507+ std::optional<std:: string> userProvidedLogFilename = std:: nullopt ) {
406508#if defined(_WIN32)
407- if (!hasEnding (program_ , " .exe" )) program = program_ + " .exe" ;
509+ if (!hasEnding (program , " .exe" )) program = program + " .exe" ;
408510#endif // _WIN32
409511 if (verbose) {
410512 llvm::outs () << " \n Run: " ;
@@ -421,16 +523,23 @@ LogicalResult runTool(
421523 return failure ();
422524 }
423525
424- // Run the program, piping any output to a temporary file (we only want to
425- // print to terminal if verbose is true).
526+ // Run the program, piping any output to a file.
426527 SmallVector<StringRef, 8 > pArgs = {program};
427528 pArgs.append (args.begin (), args.end ());
428- SmallVector<char > temporaryPath;
429- {
529+ SmallVector<char > logPath;
530+ if (userProvidedLogFilename.has_value ()) {
531+ std::string lfn = userProvidedLogFilename.value ();
532+ logPath.append (lfn.begin (), lfn.end ());
533+ if (!std::filesystem::exists (lfn)) {
534+ std::ofstream ofs (lfn);
535+ ofs.close ();
536+ }
537+
538+ } else {
430539 std::string prefix{" tmpRunTool" };
431540 std::string suffix{" Logging" };
432541 auto errorCode =
433- llvm::sys::fs::createTemporaryFile (prefix, suffix, temporaryPath );
542+ llvm::sys::fs::createTemporaryFile (prefix, suffix, logPath );
434543 if (errorCode) {
435544 llvm::errs () << " Failed to create temporary file: " << errorCode.message ()
436545 << " \n " ;
@@ -444,12 +553,11 @@ LogicalResult runTool(
444553 // Explicit type but this never actually constructs an ArrayRef
445554 std::optional<ArrayRef<StringRef>> envSmallVec = std::nullopt ;
446555#else
447- std::string temporaryPathStr =
448- std::string (temporaryPath.begin (), temporaryPath.size ());
449- StringRef temporaryPathRef (temporaryPathStr);
556+ std::string logPathStr = std::string (logPath.begin (), logPath.size ());
557+ StringRef logPathRef (logPathStr);
450558 llvm::SmallVector<llvm::StringRef> envSmallVec;
451559 if (env) envSmallVec.append (env->begin (), env->end ());
452- auto tp = std::optional<StringRef>(temporaryPathRef );
560+ auto tp = std::optional<StringRef>(logPathRef );
453561 redirects = {tp, tp, tp};
454562#endif
455563
@@ -464,7 +572,7 @@ LogicalResult runTool(
464572
465573#ifndef _WIN32
466574 auto maybeOutputFromFile = [&]() -> std::optional<std::string> {
467- std::ifstream t (temporaryPathRef .str ());
575+ std::ifstream t (logPathRef .str ());
468576 std::stringstream buffer;
469577 if (t.is_open () && t.good ()) {
470578 buffer << t.rdbuf ();
@@ -474,7 +582,7 @@ LogicalResult runTool(
474582 }();
475583
476584 if (!maybeOutputFromFile) {
477- llvm::errs () << " Failed to open temporary file " << temporaryPathRef .str ()
585+ llvm::errs () << " Failed to open temporary file " << logPathRef .str ()
478586 << " \n " ;
479587 }
480588 const std::string &outputFromFile = maybeOutputFromFile.value ();
@@ -501,7 +609,6 @@ LogicalResult runTool(
501609#endif
502610 return failure ();
503611 }
504-
505612 return success ();
506613}
507614
@@ -746,7 +853,14 @@ LogicalResult generateCoreElfFiles(AIE::DeviceOp deviceOp,
746853 }
747854 flags.emplace_back (" --target=" + targetLower + " -none-unknown-elf" );
748855 flags.emplace_back (" -Wl,--gc-sections" );
749- flags.emplace_back (" -Wl,--orphan-handling=error" );
856+
857+ // Decision to use 'warn' for orphan sections: currently if the preceding
858+ // call to llc has the flag --stack-size-section, an orphan section
859+ // is created containing the stack sizes. The linker needs to know how to
860+ // handle this: options are 'place' or 'warn' or 'error'. 'place' would
861+ // result in larger binaries. The flag '--exclude-secion' should work
862+ // but doesn't appear to supported with peano.
863+ flags.emplace_back (" -Wl,--orphan-handling=warn" );
750864 flags.emplace_back (" -Wl,-T," + ldscriptPath.string ());
751865 flags.emplace_back (" -o" );
752866 flags.emplace_back (elfFile.string ());
@@ -1078,6 +1192,55 @@ void addLowerToLLVMPasses(OpPassManager &pm) {
10781192 pm.addPass (createCSEPass ());
10791193}
10801194
1195+ LogicalResult checkStackSize (const std::string &outputFile, bool verbose,
1196+ Path peanoReadElfBin, AIE::DeviceOp deviceOp) {
1197+ std::string stackSizesFile = outputFile + " .stacksizes" ;
1198+ std::vector<std::string> args{outputFile, " --stack-sizes" };
1199+ if (failed (runTool (peanoReadElfBin.string (), args, verbose, std::nullopt ,
1200+ stackSizesFile))) {
1201+ llvm::errs () << " Failed to get stack sizes with peano\n " ;
1202+ return failure ();
1203+ }
1204+
1205+ // Read the contents of the file stackSizesFile.
1206+ std::ifstream stackSizesFileStream (stackSizesFile);
1207+ std::stringstream stackSizesBuffer;
1208+ stackSizesBuffer << stackSizesFileStream.rdbuf ();
1209+ std::string stackSizes = stackSizesBuffer.str ();
1210+ FailureOr<llvm::DenseMap<std::pair<uint32_t , uint32_t >, uint32_t >>
1211+ maybeUpperBounds =
1212+ mlir::iree_compiler::AMDAIE::detail::getUpperBoundStackSizes (
1213+ stackSizes);
1214+ if (failed (maybeUpperBounds)) {
1215+ llvm::errs () << " Failed to get upper bounds of stack sizes\n " ;
1216+ return failure ();
1217+ }
1218+ llvm::DenseMap<std::pair<uint32_t , uint32_t >, uint32_t > upperBounds =
1219+ std::move (maybeUpperBounds.value ());
1220+
1221+ SmallVector<AIE::CoreOp> coreOps;
1222+ deviceOp->walk ([&](AIE::CoreOp coreOp) { coreOps.push_back (coreOp); });
1223+ for (auto coreOp : coreOps) {
1224+ int col = coreOp.getTileOp ().getCol ();
1225+ int row = coreOp.getTileOp ().getRow ();
1226+ auto iter = upperBounds.find ({col, row});
1227+ if (iter == upperBounds.end ()) {
1228+ llvm::errs () << " The stack size for core (" << col << " , " << row
1229+ << " ) has no upper bound. " ;
1230+ return failure ();
1231+ }
1232+ auto stackSize = coreOp.getStackSize ();
1233+ if (stackSize < iter->second ) {
1234+ llvm::errs () << " An upper bound of the stack size, inferred from "
1235+ " dumper stack size file, is"
1236+ << iter->second << " bytes. The assigned stack size is "
1237+ << stackSize << " bytes, which is insufficient. " ;
1238+ return failure ();
1239+ }
1240+ }
1241+ return success ();
1242+ }
1243+
10811244LogicalResult generateUnifiedObject (
10821245 MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile,
10831246 bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope,
@@ -1150,6 +1313,7 @@ LogicalResult generateUnifiedObject(
11501313 }
11511314 Path peanoOptBin = peanoDir / " bin" / " opt" ;
11521315 Path peanoLLCBin = peanoDir / " bin" / " llc" ;
1316+ Path peanoReadElfBin = peanoDir / " bin" / " llvm-readelf" ;
11531317
11541318 std::string OptLLVMIRFile = (tempDir / " input.opt.ll" ).string ();
11551319
@@ -1183,15 +1347,28 @@ LogicalResult generateUnifiedObject(
11831347 return failure ();
11841348 }
11851349
1186- if (failed (runTool (
1187- peanoLLCBin.string (),
1188- {OptLLVMIRFile, " -O2" , " --march=" + StringRef (targetArch).lower (),
1189- " --function-sections" , " --filetype=obj" , " -o" ,
1190- std::string (outputFile)},
1191- verbose))) {
1350+ std::vector<std::string> llcArgs{OptLLVMIRFile,
1351+ " -O2" ,
1352+ " --march=" + StringRef (targetArch).lower (),
1353+ " --function-sections" ,
1354+ " --filetype=obj" ,
1355+ " -o" ,
1356+ outputFile,
1357+ " --stack-size-section" };
1358+
1359+ if (failed (runTool (peanoLLCBin.string (), llcArgs, verbose))) {
11921360 llvm::errs () << " Failed to assemble ll with peano\n " ;
11931361 return failure ();
11941362 }
1363+
1364+ // If this is not windows, we can do this check. On windows checkTool
1365+ // doesn't pipe logging in the way thay's needed for this to work.
1366+ #ifndef _WIN32
1367+ if (failed (
1368+ checkStackSize (outputFile, verbose, peanoReadElfBin, deviceOp))) {
1369+ return failure ();
1370+ }
1371+ #endif
11951372 }
11961373
11971374 moduleOpCopy->erase ();
0 commit comments