Skip to content

Commit 0d13911

Browse files
Print the reminder for the illegal memory error in the AutoBatchSize under tf (#4283)
#3822 added a reminder for the illegal memory error. However, this reminder is only needed for tf. This PR moves the illegal memory reminder from base class AutoBatchSize to the inherited class under tf. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced `AutoBatchSize` class to initialize batch size from an environment variable, improving user guidance on memory management with TensorFlow. - **Bug Fixes** - Removed redundant logging during initialization to streamline the process when GPU resources are available. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent cdad312 commit 0d13911

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

deepmd/tf/utils/batch_size.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import os
3+
24
from packaging.version import (
35
Version,
46
)
@@ -11,9 +13,23 @@
1113
OutOfMemoryError,
1214
)
1315
from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
16+
from deepmd.utils.batch_size import (
17+
log,
18+
)
1419

1520

1621
class AutoBatchSize(AutoBatchSizeBase):
22+
def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
23+
super().__init__(initial_batch_size, factor)
24+
DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
25+
if not DP_INFER_BATCH_SIZE > 0:
26+
if self.is_gpu_available():
27+
log.info(
28+
"If you encounter the error 'an illegal memory access was encountered', this may be due to a TensorFlow issue. "
29+
"To avoid this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. "
30+
"The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). "
31+
)
32+
1733
def is_gpu_available(self) -> bool:
1834
"""Check if GPU is available.
1935

deepmd/utils/batch_size.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,6 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
6161
self.maximum_working_batch_size = initial_batch_size
6262
if self.is_gpu_available():
6363
self.minimal_not_working_batch_size = 2**31
64-
log.info(
65-
"If you encounter the error 'an illegal memory access was encountered', this may be due to a TensorFlow issue. "
66-
"To avoid this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. "
67-
"The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). "
68-
)
6964
else:
7065
self.minimal_not_working_batch_size = (
7166
self.maximum_working_batch_size + 1

0 commit comments

Comments
 (0)