|
30 | 30 | from collections import OrderedDict |
31 | 31 | from collections.abc import MutableMapping |
32 | 32 | from functools import lru_cache |
33 | | -from os import scandir |
34 | 33 | from pickle import PicklingError |
35 | 34 | from threading import Lock, RLock |
36 | 35 | from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any |
@@ -270,9 +269,15 @@ def _getsize(store: BaseStore, path: Path = None) -> int: |
270 | 269 | # also include zarr.json? |
271 | 270 | # members += ['zarr.json'] |
272 | 271 | else: |
273 | | - members = listdir(store, path) |
274 | | - prefix = _path_to_prefix(path) |
275 | | - members = [prefix + k for k in members] |
| 272 | + to_visit = [path] |
| 273 | + members = [] |
| 274 | + while to_visit: |
| 275 | + print(to_visit) |
| 276 | + current_path = to_visit.pop() |
| 277 | + current_members = listdir(store, current_path) |
| 278 | + prefix = _path_to_prefix(current_path) |
| 279 | + members.extend([prefix + k for k in current_members]) |
| 280 | + to_visit.extend([prefix + k for k in current_members]) |
276 | 281 | for k in members: |
277 | 282 | try: |
278 | 283 | v = store[k] |
@@ -976,8 +981,12 @@ def getsize(self, path: Path = None): |
976 | 981 | elif isinstance(value, self.cls): |
977 | 982 | # total size for directory |
978 | 983 | size = 0 |
979 | | - for v in value.values(): |
980 | | - if not isinstance(v, self.cls): |
| 984 | + to_visit = list(value.values()) |
| 985 | + while to_visit: |
| 986 | + v = to_visit.pop() |
| 987 | + if isinstance(v, self.cls): |
| 988 | + to_visit.extend(v.values()) |
| 989 | + else: |
981 | 990 | size += buffer_size(v) |
982 | 991 | return size |
983 | 992 |
|
@@ -1274,9 +1283,13 @@ def getsize(self, path=None): |
1274 | 1283 | return os.path.getsize(fs_path) |
1275 | 1284 | elif os.path.isdir(fs_path): |
1276 | 1285 | size = 0 |
1277 | | - for child in scandir(fs_path): |
1278 | | - if child.is_file(): |
1279 | | - size += child.stat().st_size |
| 1286 | + for root, _, files in os.walk(fs_path): |
| 1287 | + # Include the size of the directory itself, as this can be substantial |
| 1288 | + # for directories with many files. |
| 1289 | + size += os.path.getsize(root) |
| 1290 | + for file in files: |
| 1291 | + file_path = os.path.join(root, file) |
| 1292 | + size += os.path.getsize(file_path) |
1280 | 1293 | return size |
1281 | 1294 | else: |
1282 | 1295 | return 0 |
@@ -1921,29 +1934,19 @@ def listdir(self, path=None): |
1921 | 1934 | def getsize(self, path=None): |
1922 | 1935 | path = normalize_storage_path(path) |
1923 | 1936 | with self.mutex: |
1924 | | - children = self.listdir(path) |
1925 | | - if children: |
1926 | | - size = 0 |
1927 | | - for child in children: |
1928 | | - if path: |
1929 | | - name = path + "/" + child |
1930 | | - else: |
1931 | | - name = child |
1932 | | - try: |
1933 | | - info = self.zf.getinfo(name) |
1934 | | - except KeyError: |
1935 | | - pass |
1936 | | - else: |
1937 | | - size += info.compress_size |
1938 | | - return size |
1939 | | - elif path: |
| 1937 | + to_visit = [path] if path else self.listdir(path) |
| 1938 | + total_size = 0 |
| 1939 | + while to_visit: |
| 1940 | + current_path = to_visit.pop() |
1940 | 1941 | try: |
1941 | | - info = self.zf.getinfo(path) |
1942 | | - return info.compress_size |
| 1942 | + info = self.zf.getinfo(current_path) |
| 1943 | + total_size += info.compress_size |
1943 | 1944 | except KeyError: |
1944 | | - return 0 |
1945 | | - else: |
1946 | | - return 0 |
| 1945 | + children = self.listdir(current_path) |
| 1946 | + for child in children: |
| 1947 | + full_path = current_path + "/" + child if current_path else child |
| 1948 | + to_visit.append(full_path) |
| 1949 | + return total_size |
1947 | 1950 |
|
1948 | 1951 | def clear(self): |
1949 | 1952 | if self.mode == "r": |
@@ -2527,6 +2530,8 @@ def listdir(self, path: Path = None): |
2527 | 2530 | return listing |
2528 | 2531 |
|
2529 | 2532 | def getsize(self, path=None) -> int: |
| 2533 | + print("WYF") |
| 2534 | + print(self._store, path) |
2530 | 2535 | return getsize(self._store, path=path) |
2531 | 2536 |
|
2532 | 2537 | def _pop_value(self): |
@@ -2795,10 +2800,9 @@ def getsize(self, path=None): |
2795 | 2800 | size = self.cursor.execute( |
2796 | 2801 | """ |
2797 | 2802 | SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr |
2798 | | - WHERE k LIKE (? || "%") AND |
2799 | | - 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") |
| 2803 | + WHERE k LIKE (? || "%") |
2800 | 2804 | """, |
2801 | | - (path, path), |
| 2805 | + (path,), |
2802 | 2806 | ) |
2803 | 2807 | for (s,) in size: |
2804 | 2808 | return s |
|
0 commit comments