diff --git a/doc/ref_internals.rst b/doc/ref_internals.rst index 86e2edad2..0ad56a39b 100644 --- a/doc/ref_internals.rst +++ b/doc/ref_internals.rst @@ -24,6 +24,11 @@ the codegen pipeline user-provided types are converted to .. automodule:: loopy.types +Type inference +^^^^^^^^^^^^^^ + +.. automodule:: loopy.type_inference + Codegen ------- diff --git a/loopy/check.py b/loopy/check.py index 1a63c90bc..0a901e3a3 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -50,6 +50,7 @@ AddressSpace, ArrayArg, ArrayDimImplementationTag, + AxisTag, InameImplementationTag, TemporaryVariable, auto, @@ -1426,6 +1427,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk( iname, AutoLocalInameTagBase, max_num=1) if ltags: + tag: AxisTag tag, = ltags local_axes_used.add(tag.axis) elif gtags: diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index f487078c7..af783bab6 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -38,6 +38,7 @@ from sys import intern from typing import ( TYPE_CHECKING, + AbstractSet, Any, Callable, ClassVar, @@ -59,7 +60,7 @@ memoize_method, natsorted, ) -from pytools.tag import Tag, Taggable +from pytools.tag import Tag, Taggable, TagT import loopy.codegen import loopy.kernel.data # to help out Sphinx @@ -539,14 +540,16 @@ def _get_inames_domain_backend(self, inames): def iname_tags(self, iname): return self.inames[iname].tags - def iname_tags_of_type(self, iname, tag_type_or_types, - max_num=None, min_num=None): + def iname_tags_of_type( + self, iname: str, + tag_type_or_types: type[TagT] | tuple[type[TagT], ...], + max_num: int | None = None, + min_num: int | None = None + ) -> AbstractSet[TagT]: """Return a subset of *tags* that matches type *tag_type*. Raises exception if the number of tags found were greater than *max_num* or less than *min_num*. - :arg tags: An iterable of tags. - :arg tag_type_or_types: a subclass of :class:`loopy.kernel.data.InameTag`. :arg max_num: the maximum number of tags expected to be found. :arg min_num: the minimum number of tags expected to be found. """ diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 9895685fb..5844143f4 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -1217,10 +1217,11 @@ def _apply_offset(sub: Expression, ary: ArrayBase) -> Expression: def get_access_info(kernel: LoopKernel, - ary: ArrayArg | TemporaryVariable, - index: Expression | tuple[Expression, ...], - eval_expr: Callable[[Expression], int], - vectorization_info: VectorizationInfo) -> AccessInfo: + ary: ArrayArg | TemporaryVariable, + index: Expression | tuple[Expression, ...], + eval_expr: Callable[[Expression], int], + vectorization_info: VectorizationInfo | None + ) -> AccessInfo: """ :arg ary: an object of type :class:`ArrayBase` :arg index: a tuple of indices representing a subscript into ary diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 3dd1cf82b..474f444ed 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -43,7 +43,7 @@ import numpy as np from pytools import ImmutableRecord -from pytools.tag import Tag, Taggable, UniqueTag as UniqueTagBase +from pytools.tag import Tag, Taggable, TagT, UniqueTag as UniqueTagBase from loopy.diagnostic import LoopyError from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag @@ -64,7 +64,7 @@ if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Iterable, Mapping from pymbolic import ArithmeticExpression, Variable @@ -98,6 +98,10 @@ .. class:: ToLoopyTypeConvertible See :class:`loopy.ToLoopyTypeConvertible`. + +.. class:: TagT + + A type variable with a lower bound of :class:`pytools.tag.Tag`. """ # This docstring is included in ref_internals. Do not include parts of the public @@ -143,7 +147,12 @@ def _names_from_dim_tags( # {{{ iname tags -def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): +def filter_iname_tags_by_type( + tags: Iterable[Tag], + tag_type: type[TagT] | tuple[type[TagT], ...], + max_num: int | None = None, + min_num: int | None = None, + ) -> set[TagT]: """Return a subset of *tags* that matches type *tag_type*. Raises exception if the number of tags found were greater than *max_num* or less than *min_num*. @@ -154,7 +163,9 @@ def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): :arg min_num: the minimum number of tags expected to be found. """ - result = {tag for tag in tags if isinstance(tag, tag_type)} + result: set[TagT] = cast( + "set[TagT]", + {tag for tag in tags if isinstance(tag, tag_type)}) def strify_tag_type(): if isinstance(tag_type, tuple): @@ -170,6 +181,7 @@ def strify_tag_type(): if len(result) < min_num: raise LoopyError("must have more than {} tags " "of type(s): {}".format(max_num, strify_tag_type())) + return result diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index b1723e9d6..83c13dfe5 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -48,12 +48,13 @@ from loopy.diagnostic import LoopyError from loopy.expression import dtype_to_type_context from loopy.target.c import CExpression -from loopy.type_inference import TypeReader +from loopy.type_inference import TypeInferenceMapper, TypeReader from loopy.types import LoopyType from loopy.typing import Expression, is_integer if TYPE_CHECKING: + from loopy.codegen import CodeGenerationState from loopy.symbolic import TypeCast @@ -79,7 +80,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): expected type for untyped expressions such as python scalars. The type of the expressions takes precedence over *type_context*. """ - def __init__(self, codegen_state, fortran_abi=False, type_inf_mapper=None): + def __init__(self, + codegen_state: CodeGenerationState, + fortran_abi: bool = False, + type_inf_mapper: TypeInferenceMapper | None = None + ) -> None: self.kernel = codegen_state.kernel self.codegen_state = codegen_state diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 096cb2cd6..34a88328c 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -29,16 +29,26 @@ from typing import TYPE_CHECKING, Iterable, Sequence, cast import numpy as np +from typing_extensions import Never import pymbolic.primitives as p from cgen import Collection, Const, Declarator, Generable from pymbolic import var from pymbolic.mapper.stringifier import PREC_NONE +from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_method from loopy.diagnostic import LoopyError -from loopy.kernel.data import AddressSpace, ArrayArg, TemporaryVariable -from loopy.symbolic import CombineMapper, Literal +from loopy.kernel.data import AddressSpace, ArrayArg, LocalInameTag, TemporaryVariable +from loopy.symbolic import ( + CoefficientCollector, + CombineMapper, + GroupHardwareAxisIndex, + Literal, + LocalHardwareAxisIndex, + SubstitutionMapper, + flatten, +) from loopy.target.c import CFamilyASTBuilder, CFamilyTarget from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper @@ -46,28 +56,45 @@ if TYPE_CHECKING: from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult + from loopy.kernel import LoopKernel + from loopy.kernel.instruction import Assignment from loopy.schedule import CallKernel from loopy.types import LoopyType from loopy.typing import Expression class IsVaryingMapper(CombineMapper[bool, []]): + # FIXME: Update this if/when ispc reduction support is added. + + def __init__(self, kernel: LoopKernel) -> None: + self.kernel = kernel + super().__init__() + def combine(self, values: Iterable[bool]) -> bool: return reduce(operator.or_, values, False) def map_constant(self, expr): return False - def map_group_hw_index(self, expr): - return False - - def map_local_hw_index(self, expr): - if expr.axis == 0: - return True - else: - raise LoopyError("ISPC only supports one local axis") + def map_group_hw_index(self, expr: GroupHardwareAxisIndex) -> Never: + # These only exist for a brief blip in time inside the expr-to-cexpr + # mapper. We should never see them. + raise AssertionError() + + def map_local_hw_index(self, expr: LocalHardwareAxisIndex) -> Never: + # These only exist for a brief blip in time inside the expr-to-cexpr + # mapper. We should never see them. + raise AssertionError() + + def map_variable(self, expr: p.Variable) -> bool: + iname = self.kernel.inames.get(expr.name) + if iname is not None: + ltags = iname.tags_of_type(LocalInameTag) + if ltags: + ltag, = ltags + assert ltag.axis == 0 + return True - def map_variable(self, expr): return False @@ -127,8 +154,7 @@ def map_variable(self, expr, type_context): return expr else: - return super().map_variable( - expr, type_context) + return super().map_variable(expr, type_context) def map_subscript(self, expr, type_context): from loopy.kernel.data import TemporaryVariable @@ -175,8 +201,8 @@ def rec(self, expr, type_context=None, needed_type: LoopyType | None = None): # else: actual_type = self.infer_type(expr) if actual_type != needed_type: - # FIXME: problematic: quadratic complexity - is_varying = IsVaryingMapper()(expr) + # FIXME: problematic: potential quadratic complexity + is_varying = IsVaryingMapper(self.kernel)(expr) registry = self.codegen_state.ast_builder.target.get_dtype_registry() cast = var("(" f"{'varying' if is_varying else 'uniform'} " @@ -409,7 +435,12 @@ def get_temporary_var_declarator(self, # }}} # {{{ emit_... - def emit_assignment(self, codegen_state, insn): + + def emit_assignment( + self, + codegen_state: CodeGenerationState, + insn: Assignment + ): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper @@ -442,83 +473,61 @@ def emit_assignment(self, codegen_state, insn): from loopy.kernel.array import get_access_info from loopy.symbolic import simplify_using_aff - index_tuple = tuple( - simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) - access_info = get_access_info(kernel, ary, index_tuple, - lambda expr: evaluate(expr, codegen_state.var_subst_map), - codegen_state.vectorization_info) + if not isinstance(lhs, p.Subscript): + raise LoopyError("streaming store must have a subscript as argument") from loopy.kernel.data import ArrayArg, TemporaryVariable - if not isinstance(ary, (ArrayArg, TemporaryVariable)): raise LoopyError("array type not supported in ISPC: %s" % type(ary).__name) + index_tuple = tuple( + simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) + + access_info = get_access_info(kernel, ary, index_tuple, + lambda expr: cast("int", + evaluate(expr, codegen_state.var_subst_map)), + codegen_state.vectorization_info) + + l0_inames = { + iname for iname in insn.within_inames + if kernel.inames[iname].tags_of_type(LocalInameTag)} + if len(access_info.subscripts) != 1: raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts - from pymbolic.primitives import Sum, Variable, flattened_sum - if isinstance(subscript, Sum): - terms = subscript.children - else: - terms = (subscript.children,) - - new_terms = [] - - from loopy.kernel.data import LocalInameTag, filter_iname_tags_by_type - from loopy.symbolic import get_dependencies - - saw_l0 = False - for term in terms: - if (isinstance(term, Variable) - and kernel.iname_tags_of_type(term.name, LocalInameTag)): - tag, = kernel.iname_tags_of_type( - term.name, LocalInameTag, min_num=1, max_num=1) - if tag.axis == 0: - if saw_l0: - raise LoopyError( - "streaming store must have stride 1 in " - "local index, got: %s" % subscript) - saw_l0 = True - continue - else: - for dep in get_dependencies(term): - if dep in kernel.all_inames() and ( - filter_iname_tags_by_type(kernel.inames[dep].tags, - LocalInameTag)): - tag, = filter_iname_tags_by_type( - kernel.inames[dep].tags, LocalInameTag, 1) - if tag.axis == 0: - raise LoopyError( - "streaming store must have stride 1 in " - "local index, got: %s" % subscript) - - new_terms.append(term) - - if not saw_l0: - raise LoopyError("streaming store must have stride 1 in " - "local index, got: %s" % subscript) + if l0_inames: + l0_iname, = l0_inames + coeffs = CoefficientCollector([l0_iname])(subscript) + if coeffs[p.Variable(l0_iname)] != 1: + raise ValueError("coefficient of streaming store index " + "in l.0 variable must be 1") + + subscript = flatten( + SubstitutionMapper(make_subst_func({l0_iname: 0}))(subscript)) + del l0_iname if access_info.vector_index is not None: raise LoopyError("streaming store may not use a short-vector " "data type") - rhs_has_programindex = any( - isinstance(tag, LocalInameTag) and tag.axis == 0 - for tag in kernel.iname_tags(dep) - for dep in get_dependencies(insn.expression)) - - if not rhs_has_programindex: - rhs_code = "broadcast(%s, 0)" % rhs_code + if (l0_inames + and not IsVaryingMapper(codegen_state.kernel)(insn.expression)): + # rhs is uniform, must be cast to varying in order for streaming_store + # to perform a vector store. + registry = codegen_state.ast_builder.target.get_dtype_registry() + rhs_code = var("(varying " + f"{registry.dtype_to_ctype(lhs_dtype)}" + f") ({rhs_code})") from cgen import Statement return Statement( "streaming_store(%s + %s, %s)" % ( access_info.array_name, - ecm(flattened_sum(new_terms), PREC_NONE, "i"), + ecm(subscript, PREC_NONE, "i"), rhs_code)) # }}} diff --git a/loopy/type_inference.py b/loopy/type_inference.py index b3c6ffddd..8894af573 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -1,3 +1,7 @@ +""" +.. autoclass:: TypeInferenceMapper +""" + from __future__ import annotations diff --git a/pyproject.toml b/pyproject.toml index f5fcb6025..6d0d765b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,11 @@ fortran = [ "f2py @ git+https://github.com/pearu/f2py.git", "ply>=3.6", ] +dev = [ + "mypy", + "types-colorama", + "types-Pygments", +] [tool.hatch.metadata] allow-direct-references = true