Skip to content

Commit e630bca

Browse files
committed
fixed batched extension
1 parent 372817e commit e630bca

File tree

2 files changed

+51
-31
lines changed

2 files changed

+51
-31
lines changed

projects/hipdnn/test_sdk/include/hipdnn_test_sdk/utilities/DynamicTolerances.hpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <vector>
1212

1313
#include <hipdnn_data_sdk/types.hpp>
14+
#include <hipdnn_data_sdk/utilities/ShapeUtilities.hpp>
1415
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
1516
#include <hipdnn_data_sdk/utilities/TensorView.hpp>
1617

@@ -541,7 +542,11 @@ using hipdnn_data_sdk::utilities::TensorView;
541542
* This is the appropriate subordinate matrix norm for element-wise error bounds
542543
* via Higham's analysis: max_ij |error_ij| <= gamma_k * ||A||_inf * ||B||_inf.
543544
*
544-
* Uses strides to correctly handle both packed and non-packed tensor layouts.
545+
* For batched tensors (>2D), the infinity-norm is computed across all batches,
546+
* returning the maximum row sum over all rows in all batches.
547+
*
548+
* Uses iterateAlongDimensions + ConstTensorView to correctly handle padded
549+
* and non-packed tensor layouts via stride-aware indexing.
545550
*
546551
* @tparam T The data type of the tensor elements.
547552
* @param tensor The input tensor (must have at least 2 dimensions).
@@ -550,37 +555,33 @@ using hipdnn_data_sdk::utilities::TensorView;
550555
template <typename T>
551556
double computeMatrixInfNorm(ITensor& tensor)
552557
{
553-
const auto& dims = tensor.dims();
554-
const auto& strides = tensor.strides();
558+
using hipdnn_data_sdk::utilities::iterateAlongDimensions;
555559

556-
auto rows = dims[dims.size() - 2];
557-
auto cols = dims[dims.size() - 1];
558-
559-
auto rowStride = strides[strides.size() - 2];
560-
auto colStride = strides[strides.size() - 1];
560+
const auto& dims = tensor.dims();
561+
TensorView<T> view(tensor);
561562

562-
const auto* data = static_cast<const T*>(tensor.rawHostData());
563+
auto cols = dims.back();
563564

564-
// For tensors with batch dimensions, compute across all batches
565-
auto batchCount = static_cast<int64_t>(tensor.elementCount()) / (rows * cols);
565+
// outerDims = [batch..., rows] — everything except the last dim (cols)
566+
auto outerDims = std::vector<int64_t>(dims.begin(), dims.end() - 1);
566567

567568
double maxRowSum = 0.0;
568569

569-
for(int64_t batch = 0; batch < batchCount; ++batch)
570-
{
571-
auto batchOffset = batch * rows * rowStride;
570+
iterateAlongDimensions(outerDims, [&](const std::vector<int64_t>& outerIndices) {
571+
double rowSum = 0.0;
572+
573+
auto fullIndices = outerIndices;
574+
fullIndices.push_back(0);
572575

573-
for(int64_t i = 0; i < rows; ++i)
576+
for(int64_t j = 0; j < cols; ++j)
574577
{
575-
double rowSum = 0.0;
576-
for(int64_t j = 0; j < cols; ++j)
577-
{
578-
auto idx = batchOffset + i * rowStride + j * colStride;
579-
rowSum += static_cast<double>(hipdnn_data_sdk::types::fabs(data[idx]));
580-
}
581-
maxRowSum = std::max(maxRowSum, rowSum);
578+
fullIndices.back() = j;
579+
rowSum += static_cast<double>(
580+
hipdnn_data_sdk::types::fabs(view.getHostValue(fullIndices)));
582581
}
583-
}
582+
583+
maxRowSum = std::max(maxRowSum, rowSum);
584+
});
584585

585586
return maxRowSum;
586587
}

projects/hipdnn/test_sdk/tests/utilities/TestDynamicTolerances.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cmath>
55
#include <gtest/gtest.h>
66
#include <hipdnn_data_sdk/types.hpp>
7+
#include <hipdnn_data_sdk/utilities/ShapeUtilities.hpp>
78
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
89
#include <hipdnn_test_sdk/utilities/CpuFpReferenceValidation.hpp>
910
#include <hipdnn_test_sdk/utilities/DynamicTolerances.hpp>
@@ -1176,24 +1177,32 @@ struct MatmulToleranceTestCase
11761177
// rowValues[i] specifies the constant value for all elements in row i.
11771178
// This ensures different rows have different sums, exercising the max-row-sum logic
11781179
// in computeMatrixInfNorm.
1180+
// Supports batched (>2D) tensors via iterateAlongDimensions.
11791181
template <typename T>
11801182
hipdnn_data_sdk::utilities::Tensor<T>
11811183
createTensorFromRowValues(const std::vector<int64_t>& dims,
11821184
const std::vector<double>& rowValues)
11831185
{
1186+
using hipdnn_data_sdk::utilities::iterateAlongDimensions;
1187+
11841188
hipdnn_data_sdk::utilities::Tensor<T> tensor(dims);
1185-
auto* data = static_cast<T*>(tensor.memory().hostData());
11861189

1187-
auto rows = dims[dims.size() - 2];
1188-
auto cols = dims[dims.size() - 1];
1190+
auto cols = dims.back();
1191+
1192+
// outerDims = [batch..., rows] — everything except the last dim (cols)
1193+
auto outerDims = std::vector<int64_t>(dims.begin(), dims.end() - 1);
1194+
1195+
iterateAlongDimensions(outerDims, [&](const std::vector<int64_t>& outerIndices) {
1196+
auto row = outerIndices.back();
1197+
auto fullIndices = outerIndices;
1198+
fullIndices.push_back(0);
11891199

1190-
for(int64_t i = 0; i < rows; ++i)
1191-
{
11921200
for(int64_t j = 0; j < cols; ++j)
11931201
{
1194-
data[i * cols + j] = static_cast<T>(rowValues[static_cast<size_t>(i)]);
1202+
fullIndices.back() = j;
1203+
tensor.setHostValue(static_cast<T>(rowValues[static_cast<size_t>(row)]), fullIndices);
11951204
}
1196-
}
1205+
});
11971206

11981207
return tensor;
11991208
}
@@ -1224,7 +1233,17 @@ std::vector<MatmulToleranceTestCase> getMatmulToleranceTestCases<TypeTriple<floa
12241233
{1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0},
12251234
computeGamma(10, u) * 20.0 * 10.0},
12261235
// K=100: A=2x100, rows={1,2}. ||A||_inf=200, B=100x2, all 1.0. ||B||_inf=2
1227-
{{2, 100}, {100, 2}, {1.0, 2.0}, bRowValues100, computeGamma(100, u) * 200.0 * 2.0}};
1236+
{{2, 100}, {100, 2}, {1.0, 2.0}, bRowValues100, computeGamma(100, u) * 200.0 * 2.0},
1237+
// Batched K=3: A={2,2,3}, B={2,3,4}. Same row values as 2D K=3 case.
1238+
// Batch dim doesn't change max row sum: ||A||_inf=6, ||B||_inf=12
1239+
{{2, 2, 3}, {2, 3, 4}, {1.0, 2.0}, {1.0, 3.0, 0.5}, computeGamma(3, u) * 6.0 * 12.0},
1240+
// Batched K=10: A={3,2,10}, B={3,10,2}. 3 batches.
1241+
// ||A||_inf=20, ||B||_inf=10 (same as 2D K=10)
1242+
{{3, 2, 10},
1243+
{3, 10, 2},
1244+
{1.0, 2.0},
1245+
{1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0},
1246+
computeGamma(10, u) * 20.0 * 10.0}};
12281247
}
12291248

12301249
// Float / Double / Float (Input casting error)

0 commit comments

Comments
 (0)