Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion backends/cadence/hifi/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) {
}

void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size);
Result<void*> temp_mem_res = ctx.allocate_temp(size);
return temp_mem_res.ok() ? temp_mem_res.get() : nullptr;
if (temp_mem_res.ok()) {
void* ptr = temp_mem_res.get();
ET_LOG(Info, "Successfully allocated temp memory at %p", ptr);
return ptr;
} else {
ET_LOG(
Error,
"Failed to allocate temp memory, error: 0x%x",
static_cast<uint32_t>(temp_mem_res.error()));
return nullptr;
}
}

// Quantize a fp32 value to an int8_t/uint8_t value
Expand Down
14 changes: 12 additions & 2 deletions backends/cadence/hifi/operators/op_mm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ Tensor& mm_out(
(WORD32* __restrict__)kernels::allocate_temp_memory(
ctx, (n * p) * sizeof(WORD32));

// Allocate zero-initialized bias for matmul function (it doesn't accept
// NULL)
FLOAT32* __restrict__ p_bias_zero =
(FLOAT32* __restrict__)kernels::allocate_temp_memory(
ctx, m * sizeof(FLOAT32));

// Initialize bias to zero since mm operation has no bias
memset(p_bias_zero, 0, m * sizeof(FLOAT32));

WORD32 p_inp_shape[2];
p_inp_shape[0] = n;
p_inp_shape[1] = p;
Expand Down Expand Up @@ -109,19 +118,20 @@ Tensor& mm_out(

const FLOAT32* __restrict__ p_vec = (const FLOAT32* __restrict__)p_o;

// mm will always be converted to addmm and to linear, and move transpose to
// graph
WORD32 val = xa_nn_matmul_f32xf32_f32(
p_out,
p_mat1,
p_vec,
NULL,
p_bias_zero,
rows,
cols1,
row_stride1,
vec_count,
vec_offset,
out_offset,
out_stride);

return out;
}

Expand Down
155 changes: 138 additions & 17 deletions backends/cadence/utils/facto_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from functools import lru_cache
from typing import List, OrderedDict, Tuple

import facto.specdb.function as fn

import torch
from facto.inputgen.argtuple.gen import ArgumentTupleGenerator
from facto.inputgen.specs.model import ConstraintProducer as cp
Expand All @@ -22,21 +24,28 @@

def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.int, torch.float]),
cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
cp.Dtype.In(
lambda deps: [
torch.int8,
torch.int16,
torch.uint8,
torch.uint16,
torch.float32,
]
),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
cp.Size.Ge(lambda deps, r, d: 1),
cp.Size.Le(lambda deps, r, d: 2**9),
cp.Rank.Le(lambda deps: 2**3),
]

match op_name:
case "where.self":
if index == 0: # condition
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.bool]),
cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
Expand All @@ -45,28 +54,43 @@ def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
]
else:
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.float, torch.int]),
cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
cp.Dtype.In(
lambda deps: [
torch.int8,
torch.int16,
torch.uint8,
torch.uint16,
torch.float32,
]
),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
cp.Size.Ge(lambda deps, r, d: 1),
cp.Size.Le(lambda deps, r, d: 2**9),
]
case "embedding.default":
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.float, torch.int]),
cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
cp.Size.Ge(lambda deps, r, d: 1),
cp.Size.Le(lambda deps, r, d: 2**9),
]
case "sigmoid.default":
tensor_constraints.extend(
[
cp.Dtype.In(lambda deps: [torch.float]),
cp.Rank.Le(lambda deps: 2**2),
cp.Dtype.In(lambda deps: [torch.float32]),
cp.Value.Ge(lambda deps, dtype, struct: -2),
cp.Value.Le(lambda deps, dtype, struct: 2),
]
)
case "rsqrt.default":
tensor_constraints.extend(
[
cp.Dtype.In(lambda deps: [torch.float]),
cp.Rank.Le(lambda deps: 2**2),
cp.Dtype.In(lambda deps: [torch.float32]),
cp.Value.Gt(
lambda deps, dtype, struct: 0
), # only generate real numbers
Expand All @@ -76,14 +100,12 @@ def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
case "mean.dim":
tensor_constraints.extend(
[
cp.Dtype.In(lambda deps: [torch.float]),
cp.Rank.Le(lambda deps: 2**2),
cp.Dtype.In(lambda deps: [torch.float32]),
]
)
case "exp.default":
tensor_constraints.extend(
[
cp.Rank.Le(lambda deps: 2**3),
cp.Value.Ge(lambda deps, dtype, struct: -(2**2)),
cp.Value.Le(lambda deps, dtype, struct: 2**2),
]
Expand All @@ -96,26 +118,96 @@ def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
cp.Value.Le(lambda deps, dtype, struct: 2),
]
)
case _:
case "constant_pad_nd.default":
tensor_constraints.extend(
[
cp.Rank.Le(lambda deps: 2**2),
cp.Dtype.In(lambda deps: [torch.float32]),
cp.Size.Le(lambda deps, r, d: 2**2),
]
)
case "avg_pool2d.default":
tensor_constraints.extend(
[
cp.Rank.Eq(lambda deps: 4),
]
)
case "bmm.default" | "addmm.default" | "mm.default":
tensor_constraints.extend(
[
cp.Dtype.Eq(lambda deps: torch.float),
cp.Size.Le(lambda deps, r, d: 2**2),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
]
)
case "div.Tensor":
tensor_constraints.extend(
[
cp.Value.Ne(lambda deps, dtype, struct: 0),
]
)
case "div.Tensor_mode" | "minimum.default":
if index == 0:
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.int64, torch.int32, torch.float32]),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
cp.Size.Ge(lambda deps, r, d: 1),
cp.Size.Le(lambda deps, r, d: 2**2),
]
else:
tensor_constraints = [
cp.Dtype.In(lambda deps: [torch.int64, torch.int32, torch.float32]),
cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
cp.Value.Le(lambda deps, dtype, struct: 2**4),
cp.Rank.Ge(lambda deps: 1),
cp.Rank.Eq(lambda deps: deps[0].dim()),
cp.Size.Eq(lambda deps, r, d: fn.safe_size(deps[0], d)),
]
case "_native_batch_norm_legit_no_training.default":
tensor_constraints.extend(
[
cp.Rank.Le(lambda deps: 3),
],
)
case "reciprocal.default":
tensor_constraints = [
cp.Value.Ge(lambda deps, dtype, struct: -(2**2)),
cp.Value.Le(lambda deps, dtype, struct: 2**2),
cp.Size.Le(lambda deps, r, d: 2**3),
]
case "_softmax.default":
tensor_constraints.extend(
[
cp.Dtype.Eq(lambda deps: torch.float32),
cp.Size.Le(lambda deps, r, d: 2**2),
]
)
case _:
pass
return tensor_constraints


def apply_scalar_contraints(op_name: str) -> list[ScalarDtype]:
match op_name:
case "add.Scalar" | "sub.Scalar" | "mul.Scalar" | "div.Scalar":
case (
"add.Scalar"
| "sub.Scalar"
| "mul.Scalar"
| "div.Scalar"
| "constant_pad_nd.default"
):
return [ScalarDtype.int]
case "full.default":
return [ScalarDtype.int]

case _:
return [ScalarDtype.float, ScalarDtype.int]


@lru_cache(maxsize=None)
def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, str]]]:
def facto_testcase_gen( # noqa: C901
op_name: str,
) -> List[Tuple[List[str], OrderedDict[str, str]]]:
# minimal example to test add.Tensor using FACTO
spec = SpecDictDB[op_name]

Expand Down Expand Up @@ -149,6 +241,12 @@ def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, s
cp.Dtype.In(lambda deps: apply_scalar_contraints(op_name)),
]
)
if in_spec.name == "dtype": # full.default
spec.inspec[index].constraints.extend(
[
cp.Dtype.In(lambda deps: [torch.long, torch.float]),
]
)
elif in_spec.type.is_tensor():
spec.inspec[index].constraints.extend(
apply_tensor_contraints(op_name, index)
Expand All @@ -166,6 +264,29 @@ def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, s
cp.Dtype.In(lambda deps: [torch.bool]),
]
)
elif in_spec.type.is_length_list():
spec.inspec[index].constraints.extend(
[
cp.Value.Ge(lambda deps, dtype, struct: 0),
]
)
if op_name == "avg_pool2d.default":
spec.inspec[index].constraints.extend(
[
cp.Length.Eq(lambda deps: 2),
]
)
elif in_spec.type.is_shape():
spec.inspec[index].constraints.extend(
[
cp.Rank.Ge(lambda deps: 1),
cp.Rank.Le(lambda deps: 2**2),
cp.Value.Gt(lambda deps, dtype, struct: 0),
cp.Value.Le(lambda deps, dtype, struct: 2**2),
cp.Size.Ge(lambda deps, r, d: 1),
cp.Size.Le(lambda deps, r, d: 2**2),
]
)

return [
(posargs, inkwargs)
Expand Down
Loading