Skip to content

Commit 8b4d3cb

Browse files
authored
[hipDNN] Expand TensorAttribute validate and refactor where it's called (#3474)
## Motivation Run `TensorAttributes::validate()` on node inputs in graph validation so nodes can assume their inputs have been validated, and expanded the checks covered in that function. ## Technical Details - Expanded tensor validation to include - Dims and strides must be set - Dims must be positive - Tensor cannot be virtual and pass by value - Dims and strides must have the same cardinality - Moved tensor validation checks from nodes into validate function. In graph::validate(), all graph tensor inputs that are not also graph outputs will be validated. After that, each node will validate their outputs in post_validate_node(), ensuring that they've had an opportunity to infer tensor output attributes before checking validation. - Removed checks that force strides to be positive (valid tensors can exist without positive strides, this should be checked at the plugin level) - [BREAKING CHANGE] removed `validate_dims_set_and_positive()` and `validate_dims_and_strides_set_and_positive()` - Fixed suspected UB from having two different FakeNode classes defined in separate source files
1 parent 5104576 commit 8b4d3cb

16 files changed

+394
-839
lines changed

projects/hipdnn/frontend/include/hipdnn_frontend/Error.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,20 @@ enum class ErrorCode
2626
ATTRIBUTE_NOT_SET
2727
};
2828

29+
// NOLINTNEXTLINE(readability-identifier-naming)
30+
inline std::string to_string(ErrorCode code)
31+
{
32+
static std::vector<std::string> s_errorCodes{
33+
"OK", "INVALID_VALUE", "HIPDNN_BACKEND_ERROR", "ATTRIBUTE_NOT_SET"};
34+
35+
return s_errorCodes[static_cast<size_t>(code)];
36+
}
37+
38+
inline std::ostream& operator<<(std::ostream& os, const ErrorCode& error)
39+
{
40+
return os << to_string(error);
41+
}
42+
2943
typedef ErrorCode error_code_t; // NOLINT(readability-identifier-naming)
3044

3145
struct Error
@@ -81,6 +95,11 @@ struct Error
8195
}
8296
};
8397

98+
inline std::ostream& operator<<(std::ostream& os, const Error& error)
99+
{
100+
return os << "{" << error.code << ", " << error.get_message() << "}";
101+
}
102+
84103
typedef Error error_object; // NOLINT(readability-identifier-naming)
85104
typedef Error error_t; // NOLINT(readability-identifier-naming)
86105

projects/hipdnn/frontend/include/hipdnn_frontend/Graph.hpp

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class Graph : public INode
4343
std::unique_ptr<ScopedHipdnnBackendDescriptor> _engineHeuristicDesc;
4444
std::unique_ptr<ScopedHipdnnBackendDescriptor> _engineConfigDesc;
4545
std::unique_ptr<ScopedHipdnnBackendDescriptor> _executionPlanDesc;
46+
4647
std::optional<int64_t> _preferredEngineId = std::nullopt;
4748

4849
static std::shared_ptr<TensorAttributes> outputTensor(const std::string& name)
@@ -335,6 +336,31 @@ class Graph : public INode
335336
return {ErrorCode::OK, ""};
336337
}
337338

339+
std::pair<std::unordered_set<std::shared_ptr<TensorAttributes>>,
340+
std::unordered_set<std::shared_ptr<TensorAttributes>>>
341+
getGraphInputTensorAttributesAndRemainder() const
342+
{
343+
std::unordered_set<std::shared_ptr<TensorAttributes>> allNodeOutputs;
344+
std::unordered_set<std::shared_ptr<TensorAttributes>> graphInputs;
345+
346+
auto collectNodeOutputs = [&](const INode& node) {
347+
auto nodeOutputs = node.getNodeOutputTensorAttributes();
348+
allNodeOutputs.insert(nodeOutputs.begin(), nodeOutputs.end());
349+
};
350+
auto collectGraphInputs = [&](const INode& node) {
351+
auto nodeInputs = node.getNodeInputTensorAttributes();
352+
std::copy_if(nodeInputs.begin(),
353+
nodeInputs.end(),
354+
std::inserter(graphInputs, graphInputs.end()),
355+
[&](const auto& nodePtr) { return allNodeOutputs.count(nodePtr) == 0; });
356+
};
357+
358+
visit(collectNodeOutputs);
359+
visit(collectGraphInputs);
360+
361+
return {graphInputs, allNodeOutputs};
362+
}
363+
338364
public:
339365
Graph()
340366
: INode(GraphAttributes{})
@@ -346,36 +372,23 @@ class Graph : public INode
346372
{
347373
HIPDNN_FE_LOG_INFO("Validating graph {}", graph_attributes.get_name());
348374

349-
std::unordered_set<std::shared_ptr<TensorAttributes>> allTensors;
350-
gatherHipdnnTensorsSubtree(allTensors);
375+
auto [inputTensors, remainingTensors] = getGraphInputTensorAttributesAndRemainder();
351376

352-
auto result = checkNoDuplicateTensorIdsImpl(allTensors);
353-
if(result.code != ErrorCode::OK)
354-
{
355-
return result;
356-
}
377+
std::unordered_set<std::shared_ptr<TensorAttributes>> allTensors = inputTensors;
378+
allTensors.insert(remainingTensors.begin(), remainingTensors.end());
357379

358-
result = topologicallySortGraph();
359-
if(result.code != ErrorCode::OK)
360-
{
361-
return result;
362-
}
380+
HIPDNN_CHECK_ERROR(checkNoDuplicateTensorIdsImpl(allTensors));
363381

364-
result = validateSubtree();
365-
if(result.code != ErrorCode::OK)
366-
{
367-
return result;
368-
}
382+
HIPDNN_CHECK_ERROR(topologicallySortGraph());
369383

370-
for(const auto& tensor : allTensors)
384+
for(const auto& tensor : inputTensors)
371385
{
372-
result = tensor->validate();
373-
if(result.code != ErrorCode::OK)
374-
{
375-
return result;
376-
}
386+
tensor->fill_from_context(graph_attributes);
387+
HIPDNN_CHECK_ERROR(tensor->validate());
377388
}
378389

390+
HIPDNN_CHECK_ERROR(validateSubtree());
391+
379392
return {ErrorCode::OK, ""};
380393
}
381394

projects/hipdnn/frontend/include/hipdnn_frontend/attributes/TensorAttributes.hpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -193,20 +193,24 @@ class TensorAttributes
193193
"Tensor " + _name + " does not have a data type set"};
194194
}
195195

196-
return {ErrorCode::OK, ""};
197-
}
196+
HIPDNN_RETURN_IF_TRUE(_isVirtual && get_pass_by_value(),
197+
ErrorCode::INVALID_VALUE,
198+
"Tensor " + _name + " cannot be virtual and pass by value");
199+
HIPDNN_RETURN_IF_NE(_dim.size(),
200+
_stride.size(),
201+
ErrorCode::INVALID_VALUE,
202+
"Tensor " + _name + " dims and strides have different sizes");
203+
204+
HIPDNN_RETURN_IF_TRUE(_dim.empty(),
205+
ErrorCode::ATTRIBUTE_NOT_SET,
206+
"Tensor " + _name + " dims must be non-empty");
198207

199-
bool validate_dims_set_and_positive() const // NOLINT(readability-identifier-naming
200-
{
201208
auto isPositive = [](int64_t value) constexpr { return value > 0; };
202-
return !_dim.empty() && std::all_of(_dim.begin(), _dim.end(), isPositive);
203-
}
209+
HIPDNN_RETURN_IF_FALSE(std::all_of(_dim.begin(), _dim.end(), isPositive),
210+
ErrorCode::INVALID_VALUE,
211+
"Tensor " + _name + " must have only positive dimensions");
204212

205-
bool validate_dims_and_strides_set_and_positive() const // NOLINT(readability-identifier-naming
206-
{
207-
auto isPositive = [](int64_t value) constexpr { return value > 0; };
208-
return validate_dims_set_and_positive() && _stride.size() == _dim.size()
209-
&& std::all_of(_stride.begin(), _stride.end(), isPositive);
213+
return {ErrorCode::OK, ""};
210214
}
211215

212216
flatbuffers::Offset<hipdnn_sdk::data_objects::TensorAttributes>

projects/hipdnn/frontend/include/hipdnn_frontend/node/BatchnormBackwardNode.hpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,6 @@ class BatchnormBackwardNode : public BaseNode<BatchnormBackwardNode>
9090
HIPDNN_CHECK_ERROR(validateMinimumTensorDimensions(dy, 2, "Gradient input tensor (dy)"));
9191
HIPDNN_CHECK_ERROR(validateMinimumTensorDimensions(scale, 2, "Scale tensor"));
9292

93-
HIPDNN_RETURN_IF_FALSE(
94-
x->validate_dims_and_strides_set_and_positive(),
95-
ErrorCode::INVALID_VALUE,
96-
"BatchnormBackwardNode: Input tensor (x) dimensions and strides must be set and "
97-
"positive");
98-
99-
HIPDNN_RETURN_IF_FALSE(
100-
dy->validate_dims_and_strides_set_and_positive(),
101-
ErrorCode::INVALID_VALUE,
102-
"BatchnormBackwardNode: Gradient input tensor (dy) dimensions and strides must be "
103-
"set and positive");
104-
10593
// SECTION 3: Validate Tensor Shape Consistency
10694
// Why: Gradients flow through the same computational graph as forward pass.
10795
// - dy has same shape as y (and therefore x) from forward pass

projects/hipdnn/frontend/include/hipdnn_frontend/node/BatchnormInferenceNode.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,6 @@ class BatchnormInferenceNode : public BaseNode<BatchnormInferenceNode>
8484
HIPDNN_CHECK_ERROR(validateMinimumTensorDimensions(mean, 2, "Mean tensor"));
8585
HIPDNN_CHECK_ERROR(validateMinimumTensorDimensions(invVar, 2, "Inverse variance tensor"));
8686

87-
HIPDNN_RETURN_IF_FALSE(
88-
x->validate_dims_and_strides_set_and_positive(),
89-
ErrorCode::INVALID_VALUE,
90-
"BatchnormInferenceNode: Input tensor (x) dimensions and strides must be set and "
91-
"positive");
92-
9387
// SECTION 3: Validate Output Tensor Shape Consistency
9488
// Why: BN preserves tensor shape during inference just as in training.
9589
// Output y[n,c,h,w] has same shape as input x[n,c,h,w].

projects/hipdnn/frontend/include/hipdnn_frontend/node/BatchnormNode.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,6 @@ class BatchnormNode : public BaseNode<BatchnormNode>
8383
// Without ε, division by zero occurs when var ≈ 0. Must be a scalar.
8484
HIPDNN_CHECK_ERROR(validateScalarParameter(epsilon, "Epsilon"));
8585

86-
HIPDNN_RETURN_IF_FALSE(
87-
x->validate_dims_and_strides_set_and_positive(),
88-
ErrorCode::INVALID_VALUE,
89-
"BatchnormNode: Input tensor (x) dimensions and strides must be set and positive");
90-
9186
// SECTION 3: Validate Output Tensor Shape Consistency
9287
// Why: BN preserves tensor shape - it only transforms values, not dimensions.
9388
// Output y[n,c,h,w] has same shape as input x[n,c,h,w].

projects/hipdnn/frontend/include/hipdnn_frontend/node/ConvolutionDgradNode.hpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,6 @@ class ConvolutionDgradNode : public BaseNode<ConvolutionDgradNode>
6363

6464
auto& dyDims = dy->get_dim();
6565

66-
HIPDNN_RETURN_IF_FALSE(
67-
dy->validate_dims_and_strides_set_and_positive(),
68-
ErrorCode::INVALID_VALUE,
69-
"ConvolutionDgradNode: dy tensor dimensions and strides must be set and positive");
70-
7166
HIPDNN_RETURN_IF_LT(
7267
dyDims.size(),
7368
3,
@@ -76,11 +71,6 @@ class ConvolutionDgradNode : public BaseNode<ConvolutionDgradNode>
7671

7772
auto& wDims = w->get_dim();
7873

79-
HIPDNN_RETURN_IF_FALSE(
80-
w->validate_dims_and_strides_set_and_positive(),
81-
ErrorCode::INVALID_VALUE,
82-
"ConvolutionDgradNode: Weight tensor dimensions and strides must be set and positive");
83-
8474
HIPDNN_RETURN_IF_NE(
8575
wDims.size(),
8676
dyDims.size(),
@@ -96,7 +86,6 @@ class ConvolutionDgradNode : public BaseNode<ConvolutionDgradNode>
9686
"ConvolutionDgradNode: dy tensor channels must match weight tensor output channels");
9787

9888
auto& dxDims = dx->get_dim();
99-
auto& dxStrides = dx->get_stride();
10089

10190
if(!dxDims.empty())
10291
{
@@ -131,19 +120,6 @@ class ConvolutionDgradNode : public BaseNode<ConvolutionDgradNode>
131120
ErrorCode::INVALID_VALUE,
132121
"ConvolutionDgradNode: Weight tensor output channels must be divisible by "
133122
"the number of groups");
134-
135-
HIPDNN_RETURN_IF_FALSE(
136-
dx->validate_dims_set_and_positive(),
137-
ErrorCode::INVALID_VALUE,
138-
"ConvolutionDgradNode: dx tensor dimensions must be set and positive");
139-
}
140-
141-
if(!dxStrides.empty())
142-
{
143-
HIPDNN_RETURN_IF_FALSE(dx->validate_dims_and_strides_set_and_positive(),
144-
ErrorCode::INVALID_VALUE,
145-
"ConvolutionDgradNode: dx tensor dimensions and strides "
146-
"must be set and positive");
147123
}
148124

149125
// Validate spatial parameter counts match spatial dimensions

projects/hipdnn/frontend/include/hipdnn_frontend/node/ConvolutionFpropNode.hpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,6 @@ class ConvolutionFpropNode : public BaseNode<ConvolutionFpropNode>
6363
// Validate input tensor dimensions and strides
6464
auto& xDims = x->get_dim();
6565

66-
HIPDNN_RETURN_IF_FALSE(
67-
x->validate_dims_and_strides_set_and_positive(),
68-
ErrorCode::INVALID_VALUE,
69-
"ConvolutionFpropNode: Input tensor dimensions and strides must be set and positive");
70-
7166
HIPDNN_RETURN_IF_LT(
7267
xDims.size(),
7368
3,
@@ -77,11 +72,6 @@ class ConvolutionFpropNode : public BaseNode<ConvolutionFpropNode>
7772
// Validate weight tensor dimensions and strides
7873
auto& wDims = w->get_dim();
7974

80-
HIPDNN_RETURN_IF_FALSE(
81-
w->validate_dims_and_strides_set_and_positive(),
82-
ErrorCode::INVALID_VALUE,
83-
"ConvolutionFpropNode: Weight tensor dimensions and strides must be set and positive");
84-
8575
HIPDNN_RETURN_IF_NE(
8676
wDims.size(),
8777
xDims.size(),
@@ -111,7 +101,6 @@ class ConvolutionFpropNode : public BaseNode<ConvolutionFpropNode>
111101

112102
// Validate output tensor dimensions and strides if they are set
113103
auto& yDims = y->get_dim();
114-
auto& yStrides = y->get_stride();
115104

116105
if(!yDims.empty())
117106
{
@@ -135,19 +124,6 @@ class ConvolutionFpropNode : public BaseNode<ConvolutionFpropNode>
135124
ErrorCode::INVALID_VALUE,
136125
"ConvolutionFpropNode: Output tensor channels must match weight "
137126
"tensor output channels");
138-
139-
HIPDNN_RETURN_IF_FALSE(
140-
y->validate_dims_set_and_positive(),
141-
ErrorCode::INVALID_VALUE,
142-
"ConvolutionFpropNode: Output tensor dimensions must be set and positive");
143-
}
144-
145-
if(!yStrides.empty())
146-
{
147-
HIPDNN_RETURN_IF_FALSE(y->validate_dims_and_strides_set_and_positive(),
148-
ErrorCode::INVALID_VALUE,
149-
"ConvolutionFpropNode: Output tensor dimensions and strides "
150-
"must be set and positive");
151127
}
152128

153129
// Validate spatial parameter counts match spatial dimensions

projects/hipdnn/frontend/include/hipdnn_frontend/node/ConvolutionWgradNode.hpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ class ConvolutionWgradNode : public BaseNode<ConvolutionWgradNode>
4848
auto& xDims = x->get_dim();
4949
auto& dyDims = dy->get_dim();
5050
auto& dwDims = dw->get_dim();
51-
auto& dwStrides = dw->get_stride();
5251

5352
auto spatialDims = dyDims.size() - 2; // N & C dimensions aren't spatial
5453
auto& prePadding = attributes.get_pre_padding();
@@ -72,23 +71,13 @@ class ConvolutionWgradNode : public BaseNode<ConvolutionWgradNode>
7271
ErrorCode::ATTRIBUTE_NOT_SET,
7372
"ConvolutionWgradNode missing dilation for pre-validation");
7473

75-
HIPDNN_RETURN_IF_FALSE(
76-
x->validate_dims_and_strides_set_and_positive(),
77-
ErrorCode::INVALID_VALUE,
78-
"ConvolutionWgradNode: x tensor dimensions and strides must be set and positive");
79-
8074
// dy implicitly checked here too since they must be equal
8175
HIPDNN_RETURN_IF_LT(
8276
xDims.size(),
8377
3,
8478
ErrorCode::INVALID_VALUE,
8579
"ConvolutionWgradNode: x tensor must have at least 3 dimensions (N, C, spatial)");
8680

87-
HIPDNN_RETURN_IF_FALSE(
88-
dy->validate_dims_and_strides_set_and_positive(),
89-
ErrorCode::INVALID_VALUE,
90-
"ConvolutionWgradNode: dy tensor dimensions and strides must be set and positive");
91-
9281
HIPDNN_RETURN_IF_NE(dyDims.size(),
9382
xDims.size(),
9483
ErrorCode::INVALID_VALUE,
@@ -132,11 +121,6 @@ class ConvolutionWgradNode : public BaseNode<ConvolutionWgradNode>
132121
"ConvolutionWgradNode: dw tensor output channels must be divisible by "
133122
"the number of groups");
134123

135-
HIPDNN_RETURN_IF_FALSE(
136-
dw->validate_dims_set_and_positive(),
137-
ErrorCode::INVALID_VALUE,
138-
"ConvolutionWgradNode: dw tensor dimensions must be set and positive");
139-
140124
// Verifies that spatial dimensions are compatible
141125
for(size_t i = 0; i < spatialDims; ++i)
142126
{
@@ -171,14 +155,6 @@ class ConvolutionWgradNode : public BaseNode<ConvolutionWgradNode>
171155
}
172156
}
173157

174-
if(!dwStrides.empty())
175-
{
176-
HIPDNN_RETURN_IF_FALSE(dw->validate_dims_and_strides_set_and_positive(),
177-
ErrorCode::INVALID_VALUE,
178-
"ConvolutionWgradNode: dw tensor dimensions and strides "
179-
"must be set and positive");
180-
}
181-
182158
HIPDNN_RETURN_IF_NE(
183159
prePadding.size(),
184160
spatialDims,

projects/hipdnn/frontend/include/hipdnn_frontend/node/Node.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,12 @@ class BaseNode : public INode
164164
return {ErrorCode::ATTRIBUTE_NOT_SET,
165165
"Node " + self().attributes.name + " does not have a compute_data_type set"};
166166
}
167+
168+
for(const auto& tensorAttr : getNodeOutputTensorAttributes())
169+
{
170+
HIPDNN_CHECK_ERROR(tensorAttr->validate());
171+
}
172+
167173
return {ErrorCode::OK, ""};
168174
}
169175

0 commit comments

Comments
 (0)