Skip to content

Commit 2b777ad

Browse files
jet-logicjet-logic
authored andcommitted
use findskel
1 parent 1830aee commit 2b777ad

File tree

8 files changed

+538
-110
lines changed

8 files changed

+538
-110
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
strategy:
1515
matrix:
1616
os: [ubuntu-latest, macos-latest, windows-latest]
17-
python-version: ["3.8", "3.11", "3.12"]
17+
python-version: ["3.9", "3.11", "3.12"]
1818
runs-on: ${{ matrix.os }}
1919
steps:
2020
- uses: actions/checkout@v4

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
strategy:
1212
fail-fast: false
1313
matrix:
14-
python-version: ["3.8", "3.11", "3.12"]
14+
python-version: ["3.8"]
1515

1616
steps:
1717
- uses: actions/checkout@v2

dupln/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,8 @@ def iter_db(db: "dict[int, dict[int, dict[int, set[str]]]]"):
270270
yield (dev, size, ino, paths, size_map, ino_map)
271271

272272

273-
def list_uniques(db, tot):
274-
# type: (Dict[int, Dict[int, Dict[int, Set[str]]]], Any) -> None
273+
def list_uniques(db, tot, found):
274+
# type: (Dict[int, Dict[int, Dict[int, Set[str]]]], object, Union[Callable[[int], str], None]) -> None
275275
"""
276276
Print all unique files (no duplicates found).
277277
@@ -296,11 +296,11 @@ def list_uniques(db, tot):
296296
tot.size += n * size
297297
tot.disk_size += size
298298
path = paths.pop()
299-
print(path)
299+
found(path)
300300

301301

302302
def list_duplicates(db, tot, size_filter=None, filesizef=None, found=None):
303-
# type: (Dict[int, Dict[int, Dict[int, Set[str]]]], Any, Union[Callable[[int], bool], None], Union[Callable[[int], str], None], Any) -> None
303+
# type: (Dict[int, Dict[int, Dict[int, Set[str]]]], object, Union[Callable[[int], bool], None], Union[Callable[[int], str], None], Any) -> None
304304
"""
305305
Print all duplicate files with optional filtering.
306306

dupln/__main__.py

Lines changed: 80 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
1-
from .main import Main, flag, arg
2-
from . import (
3-
add_file,
4-
get_linker,
5-
link_duplicates,
6-
list_uniques,
7-
scan_dir,
8-
)
9-
from typing import TYPE_CHECKING, Any, Union
1+
from typing import TYPE_CHECKING
2+
from logging import info
3+
from .findskel import FindSkel
4+
from .main import Main, flag
5+
from . import add_file, get_linker, link_duplicates, list_uniques
106

117
if TYPE_CHECKING:
128
from argparse import ArgumentParser
13-
from typing import Sequence
9+
from typing import Sequence, Union
1410

1511

1612
def filesizef(s):
@@ -24,33 +20,6 @@ def filesizef(s):
2420
return ("%.1f" % s).rstrip("0").rstrip(".") + x
2521

2622

27-
def filesizep(s: str):
28-
if s[0].isnumeric():
29-
q = s.lower().rstrip("b")
30-
for i, v in enumerate("kmgtpezy"):
31-
if q[-1].endswith(v):
32-
return float(q[0:-1]) * (2 ** (10 * (i + 1)))
33-
return float(q)
34-
return float(s)
35-
36-
37-
def sizerangep(s=""):
38-
f, d, t = s.partition("..")
39-
if d:
40-
a, b = [filesizep(f) if f else 0, filesizep(t) if t else float("inf")]
41-
return (a, b)
42-
elif f:
43-
c = filesizep(f)
44-
return (c, c)
45-
else:
46-
return (0, float("inf"))
47-
48-
49-
def size_range_check(s=""):
50-
a, b = sizerangep(s)
51-
return lambda n: n >= a and n <= b
52-
53-
5423
class Counter(object):
5524
def __getattr__(self, name):
5625
return self.__dict__.setdefault(name, 0)
@@ -76,16 +45,52 @@ def _format_entry(self, key, value):
7645
return str(key) + " " + self._format_value(value, key) + ";"
7746

7847
def _format_value(self, value, key):
79-
# type: (Any, str) -> str
48+
# type: (object, str) -> str
8049
if key in ("size", "disk_size"):
8150
return filesizef(value)
8251
return str(value)
8352

8453

85-
class Base(Main):
86-
paths: "list[str]" = arg("PATH", "search to", nargs="+")
54+
class Main2(Main):
55+
def parse_arguments(
56+
self, argp: "ArgumentParser", args: "Sequence[str]|None"
57+
) -> None:
58+
"""Parse command line arguments."""
59+
p = self._walk_subparsers(argp)
60+
61+
if p:
62+
self._arg_parent = None
63+
n = argp.parse_args(args)
64+
try:
65+
s = self._arg_final = n._arg_final
66+
except AttributeError:
67+
raise
68+
else:
69+
for k, v in n.__dict__.items():
70+
setattr(s, k, v)
71+
s.ready()
72+
s.start()
73+
s.done()
74+
else:
75+
argp.parse_args(args, self)
76+
self.ready()
77+
self.start()
78+
self.done()
79+
80+
81+
class Stat(Main2, FindSkel):
8782
carry_on: bool = flag("carry-on", "Continue on file errors", default=None)
88-
total = Counter()
83+
84+
def __init__(self) -> None:
85+
super().__init__()
86+
self._glob_includes = []
87+
self._glob_excludes = []
88+
self._dir_depth = ()
89+
self._file_sizes = []
90+
self._paths_from = []
91+
self._paths = []
92+
self.total = Counter()
93+
self.db = dict()
8994

9095
def ready(self):
9196
from logging import basicConfig
@@ -94,56 +99,34 @@ def ready(self):
9499
format = environ.get("LOG_FORMAT", "%(levelname)s: %(message)s")
95100
level = environ.get("LOG_LEVEL", "INFO")
96101
basicConfig(format=format, level=level)
97-
return super().ready()
98102

99-
def start(self):
100-
# print(self.__class__.__name__, self.__dict__)
101-
from logging import error, info
102-
from os import stat
103-
from stat import S_ISDIR
103+
#####
104+
def accept(e, **kwargs):
105+
# print(e.path)
106+
return e.is_file()
104107

105-
db = dict()
106-
tot = self.total = Counter()
107-
carry_on = self.carry_on
108+
self.on_check_accept(accept)
109+
#####
110+
return super().ready()
108111

109-
def statx(f):
110-
try:
111-
st = stat(f)
112-
except Exception:
113-
tot.file_err += 1
114-
if carry_on is False:
115-
raise
116-
from sys import exc_info
117-
118-
error(exc_info()[1])
119-
return 0, 0, 0, 0, 0
120-
121-
return st.st_mode, st.st_size, st.st_ino, st.st_dev, st.st_mtime
122-
123-
for x in self.paths:
124-
mode, size, ino, dev, mtime = statx(x)
125-
# print(x, S_ISDIR(mode))
126-
if S_ISDIR(mode):
127-
scan_dir(x, db, statx)
128-
else:
129-
add_file(db, x, size, ino, dev, mtime)
112+
def run(self, db: dict, total: object):
113+
link_duplicates(db, None, total, self.carry_on)
130114

131-
try:
132-
self.go(db)
115+
def init_argparse(self, argp: "ArgumentParser"):
116+
argp.description = r"Stats about linked files under given directory"
117+
return super().init_argparse(argp)
133118

119+
def start(self):
120+
self._walk_paths()
121+
try:
122+
self.run(self.db, self.total)
134123
finally:
135-
# print(len(db))
136124
self.total and info("Total {}".format(self.total))
137-
return self.total
138-
139125

140-
class Stat(Base):
141-
def go(self, db: dict):
142-
link_duplicates(db, None, self.total, self.carry_on)
143-
144-
def init_argparse(self, argp: "ArgumentParser"):
145-
argp.description = r"Stats about linked files under given directory"
146-
return super().init_argparse(argp)
126+
def process_entry(self, de):
127+
st = de.stat()
128+
# print(de.path)
129+
add_file(self.db, de.path, st.st_size, st.st_ino, st.st_dev, st.st_mtime)
147130

148131

149132
class Link(Stat):
@@ -153,35 +136,33 @@ class Link(Stat):
153136
default="os.link",
154137
)
155138

156-
def go(self, db: dict):
157-
link_duplicates(
158-
db,
159-
get_linker(self.linker),
160-
self.total,
161-
self.carry_on,
162-
)
139+
def run(self, db: dict, total: object):
140+
link_duplicates(db, get_linker(self.linker), total, self.carry_on)
163141

164142
def init_argparse(self, argp: "ArgumentParser"):
165143
argp.description = r"Link files under given directory"
166144
return super().init_argparse(argp)
167145

168146

169147
class Uniques(Stat):
170-
def go(self, db: dict):
171-
list_uniques(db, self.total)
172148

173149
def init_argparse(self, argp: "ArgumentParser"):
174150
argp.description = r"List unique files under given directory"
175151
return super().init_argparse(argp)
176152

153+
def run(self, db: dict, total: object):
154+
print("Uniques:run")
155+
list_uniques(db, total, print)
156+
177157

178158
class Duplicates(Stat):
179-
size_range = flag(
180-
"sizes", "size range from..to", default=None, parser=size_range_check
181-
)
182-
human_sizes: bool = flag("hrfs", "human readable file sizes", default=False)
159+
human_sizes: bool = flag("human", "human readable file sizes", default=False)
160+
161+
def init_argparse(self, argp: "ArgumentParser"):
162+
argp.description = r"List duplicates files under given directory"
163+
return super().init_argparse(argp)
183164

184-
def go(self, db: dict):
165+
def run(self, db: dict, total: object):
185166
from . import list_duplicates
186167

187168
if self.human_sizes:
@@ -195,14 +176,10 @@ def found(ino: int = 0, size: int = 0, dev: int = 0, paths: list[str] = []):
195176
for p in paths:
196177
print(f" - {p}")
197178

198-
list_duplicates(db, self.total, size_filter=self.size_range, found=found)
199-
200-
def init_argparse(self, argp: "ArgumentParser"):
201-
argp.description = r"List duplicates files under given directory"
202-
return super().init_argparse(argp)
179+
list_duplicates(db, total, found=found)
203180

204181

205-
class App(Main):
182+
class App(Main2):
206183

207184
def add_arguments(self, argp: "ArgumentParser"):
208185
argp.prog = f"python -m {__package__}"

0 commit comments

Comments
 (0)