Skip to content

Commit cec94ca

Browse files
authored
Merge pull request #13351 from dzhwinter/memory/stable
fix memory optimize
2 parents 65efebb + 5212b2a commit cec94ca

File tree

2 files changed

+80
-46
lines changed

2 files changed

+80
-46
lines changed

python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import print_function
1616
import unittest
1717

18+
import paddle.fluid as fluid
1819
import paddle.fluid.layers as layers
1920
import paddle.fluid.optimizer as optimizer
2021
from paddle.fluid.framework import Program, program_guard
@@ -67,5 +68,34 @@ def test_inplace_ops(self):
6768
print(str(result_program))
6869

6970

71+
class TestMemoryTranspiler3(unittest.TestCase):
72+
def setUp(self):
73+
program = Program()
74+
with program_guard(program, startup_program=Program()):
75+
word = fluid.layers.data(name='word', shape=[1], dtype='int64')
76+
emb = [
77+
fluid.layers.embedding(
78+
word, size=[65536, 256], param_attr='emb') for _ in range(6)
79+
]
80+
81+
left = emb.pop(0)
82+
while len(emb) != 0:
83+
right = emb.pop(0)
84+
left = fluid.layers.concat([left, right])
85+
emb = fluid.layers.mean(left)
86+
fluid.backward.append_backward(emb)
87+
self.program = program
88+
89+
def test_cascade_reuse(self):
90+
block = self.program.block(0)
91+
# variable reuse in programdesc
92+
# TODO(dzhwinter): confirm cascade strategy. disable temporialy
93+
self.assertTrue("concat_4.tmp_0@GRAD" in block.vars)
94+
# self.assertTrue("concat_3.tmp_0@GRAD" not in block.vars)
95+
# self.assertTrue("concat_2.tmp_0@GRAD" not in block.vars)
96+
# self.assertTrue("concat_1.tmp_0@GRAD" not in block.vars)
97+
# self.assertTrue("concat_0.tmp_0@GRAD" not in block.vars)
98+
99+
70100
if __name__ == "__main__":
71101
unittest.main()

python/paddle/fluid/transpiler/memory_optimization_transpiler.py

100644100755
Lines changed: 50 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __init__(self, program, ops, forward_num, skip_opt):
5656
self._live_in = defaultdict(set)
5757
self._live_out = defaultdict(set)
5858
self._skip_opt = skip_opt
59+
self.pool = []
5960

6061
def _add_connections(self, connections):
6162
"""Populates _successors and _presuccessors for two neighbor nodes."""
@@ -77,6 +78,7 @@ def _build_graph(self):
7778
for i in range(self.op_size):
7879
self._uses[i].update(self._ops[i].input_arg_names())
7980
self._defs[i].update(self._ops[i].output_arg_names())
81+
self._live_in[i] = self._uses[i]
8082

8183
def _update_graph(self, old_name, new_name, begin_idx=0):
8284
for i in range(begin_idx, self.op_size):
@@ -88,39 +90,39 @@ def _update_graph(self, old_name, new_name, begin_idx=0):
8890
self._defs[i].add(new_name)
8991
if old_name in self._live_in[i]:
9092
self._live_in[i].remove(old_name)
91-
self._live_out[i].add(new_name)
93+
self._live_in[i].add(new_name)
9294
if old_name in self._live_out[i]:
9395
self._live_out[i].remove(old_name)
9496
self._live_out[i].add(new_name)
9597

96-
def _reach_fixed_point(self, live_in, live_out):
97-
"""Check if the liveness set has stablized."""
98-
if len(live_in) != len(self._live_in):
99-
return False
100-
if len(live_out) != len(self._live_out):
101-
return False
102-
for i in range(self.op_size):
103-
if (live_in[i] != self._live_in[i] or
104-
live_out[i] != self._live_out[i]):
105-
return False
106-
return True
107-
10898
def _dataflow_analyze(self):
10999
self._build_graph()
110100
live_in = defaultdict(set)
111-
live_out = defaultdict(set)
112-
# Repeatedly apply liveness updates until the algorithm stablize
113-
# on a complete set live input vars and live output vars.
114-
while True:
115-
for i in reversed(list(range(self.op_size))):
116-
live_in[i] = set(self._live_in[i])
117-
live_out[i] = set(self._live_out[i])
118-
for s in self._successors[i]:
119-
self._live_out[i] |= self._live_in[s]
120-
self._live_in[i] = self._uses[i] | (
121-
self._live_out[i] - self._defs[i])
122-
if self._reach_fixed_point(live_in, live_out):
123-
break
101+
worklist = list(range(len(self._ops) - 1, -1, -1))
102+
while worklist:
103+
i = worklist.pop(0)
104+
live_in[i] = set(self._live_in[i])
105+
for s in self._successors[i]:
106+
self._live_out[i] |= self._live_in[s]
107+
self._live_in[i] = self._uses[i] | (
108+
self._live_out[i] - self._defs[i])
109+
if live_in[i] != self._live_in[i]:
110+
for d in self._presuccessors[i]:
111+
worklist.append(d)
112+
113+
def _fill_pool(self, i, is_forward):
114+
block_desc = self._ops[i].block()
115+
in_diff, _ = self._get_diff(self._live_in[i], self._live_out[i])
116+
can_optimize = [
117+
x for x in in_diff
118+
if self._check_var_validity(block_desc, x, is_forward)
119+
]
120+
if can_optimize:
121+
for var_name in can_optimize:
122+
cache = (var_name, self._find_var(block_desc, var_name,
123+
is_forward).shape())
124+
if cache not in self.pool:
125+
self.pool.append(cache)
124126

125127
def _get_diff(self, a, b):
126128
u = a & b
@@ -211,7 +213,6 @@ def compare_shape(x_shape, cache_shape, opt_level):
211213
# update skip set to meet users' demand
212214
if skip_opt_set:
213215
self._skip_opt.update(skip_opt_set)
214-
self.pool = []
215216
for i in range(self.op_size):
216217
op = self._ops[i]
217218
if op.type() in SUB_BLOCK_OPS:
@@ -234,16 +235,24 @@ def compare_shape(x_shape, cache_shape, opt_level):
234235
for index, cache_pair in enumerate(self.pool):
235236
cache_var = cache_pair[0]
236237
cache_shape = cache_pair[1]
237-
if not compare_shape(x_shape, cache_shape, level):
238-
continue
239-
240238
if not self._has_var(block_desc, cache_var, is_forward):
239+
if PRINT_LOG:
240+
print("cache %s not exists!" %
241+
(cpt.to_text(cache_var)))
241242
continue
243+
if x == cache_var:
244+
if PRINT_LOG:
245+
print("x : ", cpt.to_text(x), " cache : ",
246+
cpt.to_text(cache_var), " is same var!")
247+
break
242248

243249
x_dtype = self._find_var(block_desc, x,
244250
is_forward).dtype()
245251
cache_dtype = self._find_var(block_desc, cache_var,
246252
is_forward).dtype()
253+
254+
if not compare_shape(x_shape, cache_shape, level):
255+
continue
247256
# TODO(qijun): actually, we should compare
248257
# dtype_to_size[x_dtype] and dtype_to_size[cache_dtype]
249258
if x_dtype != cache_dtype:
@@ -256,8 +265,6 @@ def compare_shape(x_shape, cache_shape, opt_level):
256265
"var shape is %s ") % (index, x, cache_var,
257266
str(cache_shape)))
258267
self.pool.pop(index)
259-
if x == cache_var:
260-
break
261268
# Rename the var to the cache var already with
262269
# memory allocated in order to reuse the memory.
263270
_rename_arg_(self._ops, x, cache_var, begin_idx=i)
@@ -266,16 +273,7 @@ def compare_shape(x_shape, cache_shape, opt_level):
266273
is_forward)
267274
self._update_graph(x, cache_var, begin_idx=i)
268275
break
269-
270-
in_diff, _ = self._get_diff(self._live_in[i], self._live_out[i])
271-
can_optimize = [
272-
x for x in in_diff
273-
if self._check_var_validity(block_desc, x, is_forward)
274-
]
275-
if can_optimize:
276-
for var_name in can_optimize:
277-
self.pool.append((var_name, self._find_var(
278-
block_desc, var_name, is_forward).shape()))
276+
self._fill_pool(i, is_forward)
279277

280278

281279
def _process_sub_block_pair(pdesc, sub_block_pair):
@@ -383,10 +381,13 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
383381
384382
Note: it doesn't not support subblock nested in subblock.
385383
386-
:param input_program: Input Program
387-
:param print_log: whether to print debug log.
388-
:param level: If level=0, reuse if the shape is completely equal, o
389-
:return:
384+
Args:
385+
input_program(str): Input Program
386+
skip_opt_set(set): vars wil be skipped in memory optimze
387+
print_log(bool): whether to print debug log.
388+
level(int): If level=0, reuse if the shape is completely equal, o
389+
Returns:
390+
None
390391
"""
391392
if level != 0 and level != 1:
392393
raise ValueError("only support opt_level 0 or 1.")
@@ -407,6 +408,9 @@ def release_memory(input_program, skip_opt_set=None):
407408
408409
Args:
409410
input_program(Program): The program will be inserted :code:`delete_op`.
411+
skip_opt_set(set): vars wil be skipped in memory optimze
412+
Returns:
413+
None
410414
"""
411415
cfgs = _get_cfgs(input_program)
412416
for cfg in cfgs:

0 commit comments

Comments
 (0)