Skip to content

Commit dafe2ba

Browse files
Merge pull request #354 from Infinoid/schedule-help
Schedule help
2 parents 557ad5f + 023f1c1 commit dafe2ba

File tree

1 file changed

+79
-2
lines changed

1 file changed

+79
-2
lines changed

tools/taco.cpp

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ static void printUsageInfo() {
118118
cout << endl;
119119
printFlag("s=\"<command>(<params>)\"",
120120
"Specify a scheduling command to apply to the generated code. "
121-
"Parameters take the form of a comma-delimited list. "
121+
"Parameters take the form of a comma-delimited list. See "
122+
"-help=scheduling for a list of scheduling commands. "
122123
"Examples: split(i,i0,i1,16), precompute(A(i,j)*x(j),i,i).");
123124
cout << endl;
124125
printFlag("c",
@@ -193,6 +194,74 @@ static void printUsageInfo() {
193194
printFlag("nthreads", "Specify number of threads for parallel execution");
194195
cout << endl;
195196
printFlag("prefix", "Specify a prefix for generated function names");
197+
cout << endl;
198+
printFlag("help", "Print this usage information.");
199+
cout << endl;
200+
printFlag("help=scheduling",
201+
"Print information on the scheduling directives that can be passed "
202+
"to '-s'.");
203+
}
204+
205+
static void printSchedulingHelp() {
206+
cout << "Scheduling commands modify the execution of the index expression." << endl;
207+
cout << "The '-s' parameter specifies one or more scheduling commands." << endl;
208+
cout << "Schedules are additive; more commands can be passed by separating" << endl;
209+
cout << "them with commas, or passing multiple '-s' parameters." << endl;
210+
cout << endl;
211+
cout << "Examples:" << endl;
212+
cout << " -s=\"precompute(A(i,j)*x(j),i,i)\"" << endl;
213+
cout << " -s=\"split(i,i0,i1,32),parallelize(i0,CPUThread,NoRaces)\"" << endl;
214+
cout << endl;
215+
cout << "See http://tensor-compiler.org/docs/scheduling/index.html for more examples." << endl;
216+
cout << endl;
217+
cout << "Commands:" << endl;
218+
printFlag("s=pos(i, ipos, tensor)", "Takes in an index variable `i` "
219+
"that iterates over the coordinate space of `tensor` and replaces "
220+
"it with a derived index variable `ipos` that iterates over the "
221+
"same iteration range, but with respect to the the position space. "
222+
"The `pos` transformation is not valid for dense level formats.");
223+
cout << endl;
224+
printFlag("s=fuse(i, j, f)", "Takes in two index variables `i` and `j`, where "
225+
"`j` is directly nested under `i`, and collapses them into a fused "
226+
"index variable `f` that iterates over the product of the "
227+
"coordinates `i` and `j`.");
228+
cout << endl;
229+
printFlag("s=split(i, i0, i1, factor)", "Splits (strip-mines) an index "
230+
"variable `i` into two nested index variables `i0` and `i1`. The "
231+
"size of the inner index variable `i1` is then held constant at "
232+
"`factor`, which must be a positive integer.");
233+
cout << endl;
234+
printFlag("s=precompute(expr, i, iw)", "Leverages scratchpad memories and "
235+
"reorders computations to increase locality. Given a subexpression "
236+
"`expr` to precompute, an index variable `i` to precompute over, "
237+
"and an index variable `iw` (which can be the same or different as "
238+
"`i`) to precompute with, the precomputed results are stored in a "
239+
"temporary tensor variable.");
240+
cout << endl;
241+
printFlag("s=reorder(i1, i2, ...)", "Takes in a new ordering for a "
242+
"set of index variables in the expression that are directly nested "
243+
"in the iteration order. The indexes are ordered from outermost "
244+
"to innermost.");
245+
cout << endl;
246+
printFlag("s=bound(i, ib, b, type)", "Replaces an index variable `i` "
247+
"with an index variable `ib` that obeys a compile-time constraint "
248+
"on its iteration space, incorporating knowledge about the size or "
249+
"structured sparsity pattern of the corresponding input. The "
250+
"meaning of `b` depends on the `type`. Possible bound types are: "
251+
"MinExact, MinConstraint, MaxExact, MaxConstraint.");
252+
cout << endl;
253+
printFlag("s=unroll(index, factor)", "Unrolls the loop corresponding to an "
254+
"index variable `i` by `factor` number of iterations, where "
255+
"`factor` is a positive integer.");
256+
cout << endl;
257+
printFlag("s=parallelize(i, u, strat)", "tags an index variable `i` for "
258+
"parallel execution on hardware type `u`. Data races are handled by "
259+
"an output race strategy `strat`. Since the other transformations "
260+
"expect serial code, parallelize must come last in a series of "
261+
"transformations. Possible parallel hardware units are: "
262+
"NotParallel, GPUBlock, GPUWarp, GPUThread, CPUThread, CPUVector. "
263+
"Possible output race strategies are: "
264+
"IgnoreRaces, NoRaces, Atomics, Temporary, ParallelReduction.");
196265
}
197266

198267
static int reportError(string errorMessage, int errorCode) {
@@ -536,7 +605,15 @@ int main(int argc, char* argv[]) {
536605
if (argparts.size() == 2)
537606
argValue = argparts[1];
538607

539-
if ("-f" == argName) {
608+
if ("-help" == argName) {
609+
if(argValue == "scheduling") {
610+
printSchedulingHelp();
611+
} else {
612+
printUsageInfo();
613+
}
614+
return 0;
615+
}
616+
else if ("-f" == argName) {
540617
vector<string> descriptor = util::split(argValue, ":");
541618
if (descriptor.size() < 2 || descriptor.size() > 4) {
542619
return reportError("Incorrect format descriptor", 4);

0 commit comments

Comments
 (0)