-
Notifications
You must be signed in to change notification settings - Fork 150
Add dace::float32sr type to DaCe #2148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
c2de579
fd83b4b
d0ee5c6
3c33f58
ac7aa7a
31ac0c8
6fd0397
6178ed9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,6 +113,36 @@ def _is_op_boolean(op: str): | |
| return False | ||
|
|
||
|
|
||
| def _handle_casting_for_stochastically_rounded_types(input_datatypes, restype, cast_types): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring? data type hints?? |
||
| float_to_sr = { | ||
| dace.float32: dace.float32sr, | ||
| } | ||
|
|
||
| for i, dtype in enumerate(input_datatypes): | ||
| if hasattr(dtype, "stochastically_rounded"): | ||
| if cast_types[i] and dtype.type == eval(cast_types[i]).type: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. random eval, not good!!! |
||
| cast_types[i] = None | ||
|
|
||
| # check if stoc rounded inputs | ||
| stochastically_rounded = True | ||
| for i, dtype in enumerate(input_datatypes): | ||
| if not hasattr(dtype, "stochastically_rounded"): | ||
| stochastically_rounded = False | ||
| break | ||
|
|
||
| if stochastically_rounded: | ||
| # make the result SR | ||
| if restype in float_to_sr: | ||
| restype = float_to_sr[restype] | ||
|
|
||
| # cast the intermediate types | ||
| for i, dtype in enumerate(cast_types): | ||
| if dtype in float_to_sr: | ||
| cast_types[i] = float_to_sr[dtype] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is cast_types not also a return value here, just to clarify that it gets mutated? |
||
|
|
||
| return restype | ||
|
|
||
|
|
||
| def result_type(arguments: Sequence[Union[str, Number, symbolic.symbol, sp.Basic]], | ||
| operator: str = None) -> Tuple[Union[List[dtypes.typeclass], dtypes.typeclass, str], ...]: | ||
|
|
||
|
|
@@ -144,12 +174,16 @@ def result_type(arguments: Sequence[Union[str, Number, symbolic.symbol, sp.Basic | |
| raise TypeError("Type {t} of argument {a} is not supported".format(t=type(arg), a=arg)) | ||
|
|
||
| complex_types = {dtypes.complex64, dtypes.complex128, np.complex64, np.complex128} | ||
| float_types = {dtypes.float16, dtypes.float32, dtypes.float64, np.float16, np.float32, np.float64} | ||
| float_types = {dace.float16, dace.float32, dace.float32sr, dace.float64, np.float16, np.float32, np.float64} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, you should use the module if it exists. |
||
| signed_types = {dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, np.int8, np.int16, np.int32, np.int64} | ||
| # unsigned_types = {np.uint8, np.uint16, np.uint32, np.uint64} | ||
|
|
||
| coarse_types = [] | ||
| for dtype in datatypes: | ||
| for dt in datatypes: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why the rename? |
||
| dtype = dt | ||
| if hasattr(dt, "srtype"): # unwrap stochastically rounded vars | ||
| dtype = dt.srtype | ||
|
|
||
| if dtype in complex_types: | ||
| coarse_types.append(3) # complex | ||
| elif dtype in float_types: | ||
|
|
@@ -336,18 +370,20 @@ def result_type(arguments: Sequence[Union[str, Number, symbolic.symbol, sp.Basic | |
| else: # Operators with 3 or more arguments | ||
| restype = np_result_type(dtypes_for_result) | ||
| coarse_result_type = None | ||
| if result_type in complex_types: | ||
| if restype in complex_types: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is/was
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bug introduced by this refactor: c909f8b#diff-b20441227a628465d1c6ca8819915b77cf036f8fc1a8e26fbceb8a930fde9d1dR353 The origional code, formerly in the replacements.py file, looked like I believe this var was renamed to restype to not conflict when the "result_type" function |
||
| coarse_result_type = 3 # complex | ||
| elif result_type in float_types: | ||
| elif restype in float_types: | ||
| coarse_result_type = 2 # float | ||
| elif result_type in signed_types: | ||
| elif restype in signed_types: | ||
| coarse_result_type = 1 # signed integer, bool | ||
| else: | ||
| coarse_result_type = 0 # unsigned integer | ||
| for i, t in enumerate(coarse_types): | ||
| if t != coarse_result_type: | ||
| casting[i] = cast_str(restype) | ||
|
|
||
| restype = _handle_casting_for_stochastically_rounded_types(datatypes, restype, casting) | ||
|
|
||
| return restype, casting | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,7 @@ def expansion(node, parent_state, parent_sdfg, n=None, **kwargs): | |
| (desc_x, stride_x), (desc_y, stride_y), desc_res, sz = node.validate(parent_sdfg, parent_state) | ||
| dtype = desc_x.dtype.base_type | ||
| veclen = desc_x.dtype.veclen | ||
| cast = "(float *)" if dtype == dace.float32sr else "" | ||
|
|
||
| try: | ||
| func, _, _ = blas_helpers.cublas_type_metadata(dtype) | ||
|
|
@@ -82,7 +83,8 @@ def expansion(node, parent_state, parent_sdfg, n=None, **kwargs): | |
| n = n or node.n or sz | ||
| if veclen != 1: | ||
| n /= veclen | ||
| code = f"_result = cblas_{func}({n}, _x, {stride_x}, _y, {stride_y});" | ||
|
|
||
| code = f"_result = cblas_{func}({n}, {cast} _x, {stride_x}, {cast} _y, {stride_y});" | ||
| # The return type is scalar in cblas_?dot signature | ||
| tasklet = dace.sdfg.nodes.Tasklet(node.name, | ||
| node.in_connectors, {'_result': dtype}, | ||
|
|
@@ -204,7 +206,11 @@ def validate(self, sdfg, state): | |
| if desc_x.dtype != desc_y.dtype: | ||
| raise TypeError(f"Data types of input operands must be equal: {desc_x.dtype}, {desc_y.dtype}") | ||
| if desc_x.dtype.base_type != desc_res.dtype.base_type: | ||
| raise TypeError(f"Data types of input and output must be equal: {desc_x.dtype}, {desc_res.dtype}") | ||
| input_types = (desc_x.dtype.base_type, desc_res.dtype.base_type) | ||
| if dace.float32sr in input_types and dace.float32sr in input_types: | ||
| pass # ignore mismatch if it is stochastically rounded | ||
|
||
| else: | ||
| raise TypeError(f"Data types of input and output must be equal: {desc_x.dtype}, {desc_res.dtype}") | ||
|
|
||
| # Squeeze input memlets | ||
| squeezed1 = copy.deepcopy(in_memlets[0].subset) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -232,6 +232,8 @@ def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): | |
| name_out="_y") | ||
| dtype_a = outer_array_a.dtype.type | ||
| dtype = outer_array_x.dtype.base_type | ||
| cast = "(float *)" if dtype == dace.float32sr else "" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that C++ can handle those casts if necessary |
||
|
|
||
| veclen = outer_array_x.dtype.veclen | ||
| alpha = f'{dtype.ctype}({node.alpha})' | ||
| beta = f'{dtype.ctype}({node.beta})' | ||
|
|
@@ -280,7 +282,7 @@ def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): | |
| alpha = '&__alpha' | ||
| beta = '&__beta' | ||
|
|
||
| code += f"""cblas_{func}({layout}, {trans}, {m}, {n}, {alpha}, _A, {lda}, | ||
| code += f"""cblas_{func}({layout}, {trans}, {m}, {n}, {alpha}, {cast} _A, {lda}, | ||
| _x, {strides_x[0]}, {beta}, _y, {strides_y[0]});""" | ||
|
|
||
| tasklet = dace.sdfg.nodes.Tasklet(node.name, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ def _make_sdfg(node, parent_state, parent_sdfg, implementation): | |
|
|
||
| inp_desc, inp_shape, out_desc, out_shape = node.validate(parent_sdfg, parent_state) | ||
| dtype = inp_desc.dtype | ||
| cast = "(float *)" if dtype == dace.float32sr else "" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. casting a scalar to a pointer!!! |
||
|
|
||
| sdfg = dace.SDFG("{l}_sdfg".format(l=node.label)) | ||
|
|
||
|
|
@@ -35,7 +36,7 @@ def _make_sdfg(node, parent_state, parent_sdfg, implementation): | |
| _, me, mx = state.add_mapped_tasklet('_uzero_', | ||
| dict(__i="0:%s" % out_shape[0], __j="0:%s" % out_shape[1]), | ||
| dict(_inp=Memlet.simple('_b', '__i, __j')), | ||
| '_out = (__i < __j) ? 0 : _inp;', | ||
| f'_out = (__i < __j) ? {cast}(0) : _inp;', | ||
| dict(_out=Memlet.simple('_b', '__i, __j')), | ||
| language=dace.dtypes.Language.CPP, | ||
| external_edges=True) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| #define __DACE_REDUCTION_H | ||
|
|
||
| #include <cstdint> | ||
| #include <dace/stocastic_rounding.h> | ||
|
|
||
| #include "types.h" | ||
| #include "vector.h" | ||
|
|
@@ -121,6 +122,40 @@ namespace dace { | |
| } | ||
| }; | ||
|
|
||
| template <> | ||
| struct wcr_custom<dace::float32sr> { | ||
| template <typename WCR> | ||
| static DACE_HDFI dace::float32sr reduce_atomic(WCR wcr, dace::float32sr *ptr, const dace::float32sr& value) { | ||
| #ifdef DACE_USE_GPU_ATOMICS | ||
| // Stochastic rounding version of atomic float reduction | ||
| int *iptr = reinterpret_cast<int *>(ptr); | ||
| int old = *iptr, assumed; | ||
| do { | ||
| assumed = old; | ||
| float old_val = __int_as_float(assumed); | ||
| float new_val = static_cast<float>(wcr(static_cast<dace::float32sr>(old_val), value)); | ||
| old = atomicCAS(iptr, assumed, __float_as_int(new_val)); | ||
| } while (assumed != old); | ||
| return static_cast<dace::float32sr>(__int_as_float(old)); | ||
| #else | ||
| dace::float32sr old; | ||
| #pragma omp critical | ||
| { | ||
| old = *ptr; | ||
| *ptr = wcr(old, value); | ||
| } | ||
| return old; | ||
| #endif | ||
| } | ||
|
|
||
| template <typename WCR> | ||
| static DACE_HDFI dace::float32sr reduce(WCR wcr, dace::float32sr *ptr, const dace::float32sr& value) { | ||
| dace::float32sr old = *ptr; | ||
| *ptr = wcr(old, value); | ||
| return old; | ||
| } | ||
| }; | ||
|
|
||
| template <> | ||
| struct wcr_custom<double> { | ||
| template <typename WCR> | ||
|
|
@@ -313,6 +348,31 @@ namespace dace { | |
| DACE_HDFI float operator()(const float &a, const float &b) const { return ::max(a, b); } | ||
| }; | ||
|
|
||
|
|
||
| template <> | ||
| struct _wcr_fixed<ReductionType::Min, dace::float32sr> { | ||
|
|
||
| static DACE_HDFI dace::float32sr reduce_atomic(dace::float32sr *ptr, const dace::float32sr& value) { | ||
| return wcr_custom<dace::float32sr>::reduce_atomic( | ||
| _wcr_fixed<ReductionType::Min, dace::float32sr>(), ptr, value); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't you just use the float32 atomicMin/Max here? |
||
| } | ||
|
|
||
|
|
||
| DACE_HDFI dace::float32sr operator()(const dace::float32sr &a, const dace::float32sr &b) const { return ::min(a, b); } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does that work? should they not be cast to float explicitly first? |
||
| }; | ||
|
|
||
| template <> | ||
| struct _wcr_fixed<ReductionType::Max, dace::float32sr> { | ||
|
|
||
| static DACE_HDFI dace::float32sr reduce_atomic(dace::float32sr *ptr, const dace::float32sr& value) { | ||
| return wcr_custom<dace::float32sr>::reduce_atomic( | ||
| _wcr_fixed<ReductionType::Max, dace::float32sr>(), ptr, value); | ||
| } | ||
|
|
||
| DACE_HDFI dace::float32sr operator()(const dace::float32sr &a, const dace::float32sr &b) const { return ::max(a, b); } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
| }; | ||
|
|
||
|
|
||
| template <> | ||
| struct _wcr_fixed<ReductionType::Min, double> { | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No documentation?