Skip to content

Commit ab020eb

Browse files
authored
fix: Fix memory estimation not working on MacOS (#1330)
### Description - Use `pss` for memory estimation only on Linux. - Use `rss` for memory estimation only on MacOs. ### Issues - Closes: #1329
1 parent a9e8b14 commit ab020eb

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

src/crawlee/_utils/system.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from __future__ import annotations
22

33
import os
4+
import sys
45
from contextlib import suppress
56
from datetime import datetime, timezone
67
from logging import getLogger
7-
from typing import Annotated, Any
8+
from typing import Annotated
89

910
import psutil
1011
from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PlainValidator
@@ -13,6 +14,24 @@
1314

1415
logger = getLogger(__name__)
1516

17+
if sys.platform == 'linux':
18+
"""Get the most suitable available used memory metric.
19+
20+
`Proportional Set Size (PSS)`, is the amount of own memory and memory shared with other processes, accounted in a
21+
way that the shared amount is divided evenly between the processes that share it. Available on Linux. Suitable for
22+
avoiding overestimation by counting the same shared memory used by children processes multiple times.
23+
24+
`Resident Set Size (RSS)` is the non-swapped physical memory a process has used; it includes shared memory. It
25+
should be available everywhere.
26+
"""
27+
28+
def _get_used_memory(process: psutil.Process) -> int:
29+
return int(process.memory_full_info().pss)
30+
else:
31+
32+
def _get_used_memory(process: psutil.Process) -> int:
33+
return int(process.memory_info().rss)
34+
1635

1736
class CpuInfo(BaseModel):
1837
"""Information about the CPU usage."""
@@ -88,14 +107,14 @@ def get_memory_info() -> MemoryInfo:
88107
current_process = psutil.Process(os.getpid())
89108

90109
# Retrieve estimated memory usage of the current process.
91-
current_size_bytes = int(_get_used_memory(current_process.memory_full_info()))
110+
current_size_bytes = _get_used_memory(current_process)
92111

93112
# Sum memory usage by all children processes, try to exclude shared memory from the sum if allowed by OS.
94113
for child in current_process.children(recursive=True):
95114
# Ignore any NoSuchProcess exception that might occur if a child process ends before we retrieve
96115
# its memory usage.
97116
with suppress(psutil.NoSuchProcess):
98-
current_size_bytes += _get_used_memory(child.memory_full_info())
117+
current_size_bytes += _get_used_memory(child)
99118

100119
vm = psutil.virtual_memory()
101120

@@ -104,20 +123,3 @@ def get_memory_info() -> MemoryInfo:
104123
current_size=ByteSize(current_size_bytes),
105124
system_wide_used_size=ByteSize(vm.total - vm.available),
106125
)
107-
108-
109-
def _get_used_memory(memory_full_info: Any) -> int:
110-
"""Get the most suitable available used memory metric.
111-
112-
`Proportional Set Size (PSS)`, is the amount of own memory and memory shared with other processes, accounted in a
113-
way that the shared amount is divided evenly between the processes that share it. Available on Linux. Suitable for
114-
avoiding overestimation by counting the same shared memory used by children processes multiple times.
115-
116-
`Resident Set Size (RSS)` is the non-swapped physical memory a process has used; it includes shared memory. It
117-
should be available everywhere.
118-
"""
119-
try:
120-
# Linux
121-
return int(memory_full_info.pss)
122-
except AttributeError:
123-
return int(memory_full_info.rss)

tests/unit/_utils/test_system.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
import os
3+
import sys
44
from multiprocessing import Barrier, Process, Value, synchronize
55
from multiprocessing.shared_memory import SharedMemory
66
from typing import TYPE_CHECKING
@@ -26,7 +26,7 @@ def test_get_cpu_info_returns_valid_values() -> None:
2626
assert 0 <= cpu_info.used_ratio <= 1
2727

2828

29-
@pytest.mark.skipif(os.name == 'nt', reason='Improved estimation not available on Windows')
29+
@pytest.mark.skipif(sys.platform != 'linux', reason='Improved estimation available only on Linux')
3030
def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
3131
"""Test that memory usage estimation is not overestimating memory usage by counting shared memory multiple times.
3232

0 commit comments

Comments
 (0)