Skip to content

Commit d61a644

Browse files
Merge pull request #2802 from devitocodes/fixup-cire
compiler: Improve alias scheduling
2 parents a00cca8 + a2f0eb2 commit d61a644

File tree

8 files changed

+67
-66
lines changed

8 files changed

+67
-66
lines changed

devito/core/cpu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
177177
# Reduce flops
178178
clusters = cire(clusters, 'sops', sregistry, options, platform)
179179
clusters = factorize(clusters, **kwargs)
180-
clusters = optimize_pows(clusters)
181180

182181
# The previous passes may have created fusion opportunities
183182
clusters = fuse(clusters)

devito/core/gpu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
218218
# Reduce flops
219219
clusters = cire(clusters, 'sops', sregistry, options, platform)
220220
clusters = factorize(clusters, **kwargs)
221-
clusters = optimize_pows(clusters)
222221

223222
# The previous passes may have created fusion opportunities
224223
clusters = fuse(clusters)

devito/operator/operator.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
from devito.parameters import configuration
3030
from devito.passes import (
3131
Graph, lower_index_derivatives, generate_implicit, generate_macros,
32-
minimize_symbols, unevaluate, error_mapper, is_on_device, lower_dtypes
32+
minimize_symbols, optimize_pows, unevaluate, error_mapper, is_on_device,
33+
lower_dtypes
3334
)
3435
from devito.symbolics import estimate_cost, subs_op_args
3536
from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple,
@@ -409,6 +410,10 @@ def _lower_clusters(cls, expressions, profiler=None, **kwargs):
409410
# Lower all remaining high order symbolic objects
410411
clusters = lower_index_derivatives(clusters, **kwargs)
411412

413+
# Turn pows into multiplications. This must happen as late as possible
414+
# in the compilation process to maximize the optimization potential
415+
clusters = optimize_pows(clusters)
416+
412417
# Make sure no reconstructions can unpick any of the symbolic
413418
# optimizations performed so far
414419
clusters = unevaluate(clusters)

devito/passes/clusters/aliases.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def collect(extracted, ispace, minstorage):
525525
k = group.dimensions_translated
526526
else:
527527
k = group.dimensions
528+
k = frozenset(d for d in k if not d.is_NonlinearDerived)
528529
mapper.setdefault(k, []).append(group)
529530

530531
aliases = AliasList()
@@ -912,7 +913,8 @@ def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
912913
indices.append(i.dim - i.lower + s)
913914

914915
dtype = sympy_dtype(pivot, base=meta.dtype)
915-
obj = make(name=name, dimensions=dimensions, halo=halo, dtype=dtype)
916+
obj = make(name=name, dimensions=dimensions, halo=halo, dtype=dtype,
917+
shift=shift)
916918
expression = Eq(obj[indices], uxreplace(pivot, subs))
917919

918920
callback = lambda idx: obj[[i + s for i, s in zip(idx, shift)]]

devito/types/misc.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from devito.types import Array, CompositeObject, Indexed, Symbol, LocalObject
1212
from devito.types.basic import IndexedData
13-
from devito.tools import CustomDtype, Pickable, frozendict
13+
from devito.tools import CustomDtype, Pickable, as_tuple, frozendict
1414

1515
__all__ = ['Timer', 'Pointer', 'VolatileInt', 'FIndexed', 'Wildcard', 'Fence',
1616
'Global', 'Hyperplane', 'Indirection', 'Temp', 'TempArray', 'Jump',
@@ -235,12 +235,25 @@ class TempArray(Array):
235235

236236
is_autopaddable = True
237237

238+
__rkwargs__ = (Array.__rkwargs__ + ('shift',))
239+
240+
def __init_finalize__(self, *args, shift=None, **kwargs):
241+
super().__init_finalize__(*args, **kwargs)
242+
243+
# An integer for each Dimension representing the shift applied to the halo
244+
# for homogeneity reasons
245+
self._shift = as_tuple(shift)
246+
238247
def __padding_setup__(self, **kwargs):
239248
padding = kwargs.pop('padding', None)
240249
if padding is None:
241250
padding = self.__padding_setup_smart__(**kwargs)
242251
return super().__padding_setup__(padding=padding, **kwargs)
243252

253+
@property
254+
def shift(self):
255+
return self._shift
256+
244257

245258
class Fence:
246259

examples/performance/00_overview.ipynb

Lines changed: 18 additions & 54 deletions
Large diffs are not rendered by default.

tests/test_dse.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,7 @@ def g2_tilde(field, phi, theta):
17451745
assert len([i for i in FindSymbols().visit(bns['x0_blk0']) if i.is_Array]) == 7
17461746
assert len(FindNodes(VExpanded).visit(pbs['x0_blk0'])) == 3
17471747

1748-
@pytest.mark.parametrize('so_ops', [(4, 147), (8, 211)])
1748+
@pytest.mark.parametrize('so_ops', [(4, 146), (8, 210)])
17491749
@switchconfig(profiling='advanced')
17501750
def test_tti_J_akin_complete(self, so_ops):
17511751
grid = Grid(shape=(16, 16, 16))
@@ -2664,6 +2664,25 @@ def test_sparse_const(self):
26642664
op()
26652665
assert np.all(src.data == 8)
26662666

2667+
def test_space_and_time_invariant_together(self):
2668+
grid = Grid(shape=(34, 45, 50))
2669+
2670+
a = Function(name='a', grid=grid, space_order=8)
2671+
vx = TimeFunction(name='vx', grid=grid, space_order=8)
2672+
tzz = vx.func(name='tzz')
2673+
2674+
eqn = Eq(tzz.forward, a.dy.dz * (vx.dx.dy + vx.dx.dz) + tzz)
2675+
2676+
op = Operator(eqn, opt=('advanced', {'openmp': False}))
2677+
2678+
op.cfunction
2679+
2680+
assert_structure(
2681+
op,
2682+
['t,x0_blk0,y0_blk0,x,y,z', 't,x0_blk0,y0_blk0,x,y,z'],
2683+
'tx0_blk0y0_blk0xyzyz'
2684+
)
2685+
26672686

26682687
class TestIsoAcoustic:
26692688

@@ -2706,9 +2725,9 @@ def test_fullopt(self):
27062725
bns, _ = assert_blocking(op0, {})
27072726
bns, _ = assert_blocking(op1, {'x0_blk0'}) # due to loop blocking
27082727

2709-
assert summary0[('section0', None)].ops == 50
2728+
assert summary0[('section0', None)].ops == 55
27102729
assert summary0[('section1', None)].ops == 44
2711-
assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
2730+
assert np.isclose(summary0[('section0', None)].oi, 3.136, atol=0.001)
27122731

27132732
assert summary1[('section0', None)].ops == 31
27142733
assert summary1[('section1', None)].ops == 88
@@ -2760,7 +2779,7 @@ def tti_noopt(self):
27602779
# Make sure no opts were applied
27612780
op = wavesolver.op_fwd(False)
27622781
assert len(op._func_table) == 0
2763-
assert summary[('section0', None)].ops == 743
2782+
assert summary[('section0', None)].ops == 753
27642783

27652784
return v, rec
27662785

@@ -2846,7 +2865,7 @@ class TestTTIv2:
28462865

28472866
@switchconfig(profiling='advanced')
28482867
@pytest.mark.parametrize('space_order,expected', [
2849-
(4, 200), (12, 392)
2868+
(4, 190), (12, 382)
28502869
])
28512870
def test_opcounts(self, space_order, expected):
28522871
grid = Grid(shape=(3, 3, 3))

tests/test_unexpansion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def test_v4(self):
260260
'cire-mingain': 400}))
261261

262262
# Check code generation
263-
assert op._profiler._sections['section1'].sops == 1443
263+
assert op._profiler._sections['section1'].sops == 1442
264264
assert_structure(op, ['x,y,z',
265265
't,x0_blk0,y0_blk0,x,y,z',
266266
't,x0_blk0,y0_blk0,x,y,z,i1',
@@ -431,7 +431,7 @@ def test_v1(self):
431431
'openmp': False}))
432432

433433
# Check code generation
434-
assert op._profiler._sections['section1'].sops == 191
434+
assert op._profiler._sections['section1'].sops == 190
435435
assert_structure(op, ['x,y,z',
436436
't,x0_blk0,y0_blk0,x,y,z',
437437
't,x0_blk0,y0_blk0,x,y,z,i0',

0 commit comments

Comments
 (0)