Skip to content

Commit 6344bfb

Browse files
YqGe585maxiaolong001
authored andcommitted
support custom device empty_cache() (PaddlePaddle#74539)
1 parent 9a77e10 commit 6344bfb

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

paddle/fluid/pybind/pybind.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3340,6 +3340,17 @@ All parameter, weight, gradient are variables in Paddle.
33403340
},
33413341
py::return_value_policy::copy);
33423342

3343+
m.def("device_empty_cache", [] {
3344+
std::vector<std::string> dev_types =
3345+
phi::DeviceManager::GetAllCustomDeviceTypes();
3346+
std::string dev_type = dev_types[0];
3347+
std::vector<size_t> devices =
3348+
phi::DeviceManager::GetSelectedDeviceList(dev_type);
3349+
for (auto device : devices) {
3350+
memory::Release(phi::CustomPlace(dev_type, device));
3351+
}
3352+
});
3353+
33433354
py::class_<phi::DeviceProp>(m, "_customDeviceProperties", py::module_local())
33443355
.def_property_readonly(
33453356
"name", [](const phi::DeviceProp &prop) { return prop.name; })

python/paddle/device/__init__.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,35 @@ def extract_device_id(device: _CustomPlaceLike, op_name: str) -> int:
698698
return device_id
699699

700700

701+
def empty_cache() -> None:
702+
'''
703+
Releases idle cached memory held by the allocator so that those can be used in other GPU
704+
application and visible in `nvidia-smi`. In most cases you don't need to use this function,
705+
Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
706+
Because it keeps gpu memory in a pool so that next allocations can be done much faster.
707+
708+
Examples:
709+
.. code-block:: python
710+
711+
>>> # doctest: +REQUIRES(env:GPU)
712+
>>> import paddle
713+
>>> paddle.device.set_device('gpu')
714+
715+
>>> tensor = paddle.randn([512, 512, 512], "float64")
716+
>>> del tensor
717+
>>> paddle.device.empty_cache()
718+
'''
719+
custom_devices = paddle.device.get_all_custom_device_type()
720+
if core.is_compiled_with_cuda():
721+
core.cuda_empty_cache()
722+
elif core.is_compiled_with_custom_device(custom_devices[0]):
723+
core.device_empty_cache()
724+
else:
725+
raise ValueError(
726+
"The API paddle.device.empty_cache is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU or custom device support to call this API."
727+
)
728+
729+
701730
def max_memory_allocated(device: _CustomPlaceLike | None = None) -> int:
702731
'''
703732
Return the peak size of memory that is allocated to tensor of the given device. This

0 commit comments

Comments
 (0)