Skip to content

Commit d181432

Browse files
authored
Cleanup ldmatrix/stmatrix indexing (#5906)
Previously, when ldmatrix and stmatrix were first introduced with Ampere, their indexing were only implemented in the legacy indexer, so there's a fall-back path for them. As we dropped support of the Ampere-style scheduling and now assume they are supplied with the alternate loop domains, that fall-back path is no longer necessary, so this PR simplifies a conditional path. Some of the tests use the old style ldmatrix/stmatrix. I just deleted or deleted them.
1 parent 27c6334 commit d181432

File tree

3 files changed

+13
-1196
lines changed

3 files changed

+13
-1196
lines changed

csrc/index_compute.cpp

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,50 +1935,13 @@ bool shouldUseTensorIndexer(
19351935
GpuLower::current()->tmemInfo().hasTMemTensor();
19361936
};
19371937

1938-
// Check if TensorIndexer is supported.
1939-
auto is_tensor_indexer_supported = [&](bool assert) -> bool {
1940-
bool is_producer_ldmatrix_op = producer->definition() != nullptr &&
1941-
producer->definition()->isA<LoadStoreOp>() &&
1942-
producer->definition()->as<LoadStoreOp>()->opType() ==
1943-
LoadStoreOpType::LdMatrix;
1944-
bool is_producer_stmatrix_op_with_no_alloc_domain =
1945-
producer->definition() != nullptr &&
1946-
producer->definition()->isA<LoadStoreOp>() &&
1947-
producer->definition()->as<LoadStoreOp>()->opType() ==
1948-
LoadStoreOpType::StMatrix &&
1949-
!producer->hasAllocation();
1950-
1951-
if (assert) {
1952-
NVF_ERROR(
1953-
!is_producer_ldmatrix_op,
1954-
"TensorIndexer required but not supported as the producer is "
1955-
"produced by ldmatrix: ",
1956-
producer->definition()->toString());
1957-
NVF_ERROR(
1958-
!is_producer_stmatrix_op_with_no_alloc_domain,
1959-
"TensorIndexer required but not supported as the producer is "
1960-
"produced by stmatrix and it does not have allocation domain: ",
1961-
producer->definition()->toString());
1962-
}
1963-
1964-
return !is_producer_ldmatrix_op &&
1965-
!is_producer_stmatrix_op_with_no_alloc_domain;
1966-
};
1967-
1968-
// TensorIndexer is always used if it's required
1969-
if (is_tensor_indexer_required()) {
1970-
// Make sure it's supported
1971-
is_tensor_indexer_supported(/*assert=*/true);
1972-
return true;
1973-
}
1974-
1975-
// If opted in, TensorIndexer is used as long as it's supported
1976-
if (GpuLower::current()->idModelOptions().isTensorIndexerEnabled() &&
1977-
is_tensor_indexer_supported(/*assert=*/false)) {
1978-
return true;
1979-
}
1980-
1981-
return false;
1938+
// TensorIndexer is always used when required or if not disabled.
1939+
// Note: Previously, ldmatrix and stmatrix were first introduced
1940+
// with Ampere, their indexing were only implemented in the legacy
1941+
// indexer in a rather manual way. The current implementation uses
1942+
// the alternate loop domain to enable TensorIndexer-based indexing.
1943+
return is_tensor_indexer_required() ||
1944+
GpuLower::current()->idModelOptions().isTensorIndexerEnabled();
19821945
}
19831946

19841947
} // namespace

0 commit comments

Comments
 (0)