Skip to content

Commit 861e170

Browse files
committed
refactor(file-count): stopping when number of files actually exceed thresholds, therefore dramatically faster for large directories
1 parent e3664e1 commit 861e170

File tree

4 files changed

+132
-45
lines changed

4 files changed

+132
-45
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Monitoring Plugins:
4141
* by-ssh: add missing `--verbose` parameter
4242
* fs-ro: ignore `/run/credentials` (https://systemd.io/CREDENTIALS/)
4343
* keycloak-stats: fix incorrect symlink for lib
44-
* ntp-*: prevent `TypeError: ''=' not supported between instances of 'int' and 'str'`
44+
* ntp-\*: prevent `TypeError: ''=' not supported between instances of 'int' and 'str'`
4545

4646

4747
### Changed ("refactor", "chore" etc.)
@@ -64,6 +64,7 @@ Grafana:
6464
Monitoring Plugins:
6565

6666
* all plugins: ignore unknown arguments instead of generating an error (this helps with updating Icinga and Nagios service definitions considerably)
67+
* file-count: stopping when number of files actually exceed thresholds, therefore dramatically faster for large directories
6768
* nextcloud-version: modernize code
6869
* php-status: always assume http://localhost/monitoring.php and, if not found, be tolerant
6970
* redis-status, valkey-status: modernize code and unify both plugins again after [PR #954](https://github.com/Linuxfabrik/monitoring-plugins/pull/954)

check-plugins/file-count/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
Checks the number of matching files or directories found. It can be also used to check the existence / absence of a single file.
66

7-
Depending on the file and user (e.g. running as *icinga*), sudo (sudoers) is needed. It supports globs in accordance with [Python 3](https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob) or [Python 2](https://docs.python.org/2.7/library/glob.html). Beware that using recursive globs might cause high memory usage. Also note that there are small differences in recursive file matching between Python 2 and Python 3. Optionally, the check can be restricted to only consider files that were modified in a given timerange.
7+
Depending on the file and user (e.g. running as *icinga*), sudo (sudoers) is needed. It supports globs in accordance with [Python 3](https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob). Beware that using recursive globs might cause high memory usage. Optionally, the check can be restricted to only consider files that were modified in a given timerange.
88

99

1010
## Fact Sheet
@@ -38,8 +38,8 @@ options:
3838
accordance with https://docs.python.org/3/library/path
3939
lib.html#pathlib.Path.glob. Beware of using recursive
4040
globs. This is mutually exclusive with -u / --url.
41-
--only-dirs Only consider directories.
42-
--only-files Only consider files.
41+
--only-dirs Only count directories.
42+
--only-files Only count files.
4343
--password PASSWORD SMB Password.
4444
--pattern PATTERN The search string to match against the names of SMB
4545
directories or files. This pattern can use '*' as a

check-plugins/file-count/file-count

Lines changed: 123 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,25 @@
1111
"""See the check's README for more details.
1212
"""
1313

14-
import argparse # pylint: disable=C0413
15-
import sys # pylint: disable=C0413
16-
from pathlib import Path # pylint: disable=C0413
14+
import argparse
15+
import stat
16+
import sys
17+
from pathlib import Path
1718

18-
import lib.base # pylint: disable=C0413
19+
import lib.base
1920

2021
try:
21-
import lib.smb # pylint: disable=C0413
22+
import lib.smb
2223
HAVE_SMB = True
2324
except ModuleNotFoundError as e:
2425
HAVE_SMB = False
2526
missing_lib = e.name
26-
import lib.time # pylint: disable=C0413
27-
import lib.txt # pylint: disable=C0413
28-
from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413
29-
STATE_UNKNOWN, STATE_WARN)
27+
import lib.time
28+
import lib.txt
29+
from lib.globals import (STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN)
3030

3131
__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
32-
__version__ = '2025100601'
32+
__version__ = '2025122301'
3333

3434
DESCRIPTION = 'Checks the number of matching files.'
3535

@@ -45,7 +45,7 @@ def parse_args():
4545
parser.add_argument(
4646
'-V', '--version',
4747
action='version',
48-
version='%(prog)s: v{} by {}'.format(__version__, __author__)
48+
version=f'%(prog)s: v{__version__} by {__author__}'
4949
)
5050

5151
parser.add_argument(
@@ -64,21 +64,24 @@ def parse_args():
6464

6565
parser.add_argument(
6666
'--filename',
67-
help='File (or directory) name to check. Supports glob in accordance with https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob. Beware of using recursive globs. This is mutually exclusive with -u / --url.',
67+
help='File (or directory) name to check. Supports glob in accordance with '
68+
'https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob. '
69+
'Beware of using recursive globs. '
70+
'This is mutually exclusive with -u / --url.',
6871
dest='FILENAME',
6972
)
7073

7174
parser.add_argument(
7275
'--only-dirs',
73-
help='Only consider directories.',
76+
help='Only count directories.',
7477
dest='ONLY_DIRS',
7578
action='store_true',
7679
default=False,
7780
)
7881

7982
parser.add_argument(
8083
'--only-files',
81-
help='Only consider files.',
84+
help='Only count files.',
8285
dest='ONLY_FILES',
8386
action='store_true',
8487
default=False,
@@ -92,28 +95,35 @@ def parse_args():
9295

9396
parser.add_argument(
9497
'--pattern',
95-
help="The search string to match against the names of SMB directories or files. This pattern can use '*' as a wildcard for multiple chars and '?' as a wildcard for a single char. Does not support regex patterns. Default: %(default)s.",
98+
help="The search string to match against the names of SMB directories or files. "
99+
"This pattern can use '*' as a wildcard for multiple chars and '?' as a wildcard for "
100+
"a single char. "
101+
"Does not support regex patterns. "
102+
"Default: %(default)s.",
96103
dest='PATTERN',
97104
default=DEFAULT_PATTERN,
98105
)
99106

100107
parser.add_argument(
101108
'--timeout',
102-
help='Network timeout in seconds. Default: %(default)s (seconds)',
109+
help='Network timeout in seconds. '
110+
'Default: %(default)s (seconds)',
103111
dest='TIMEOUT',
104112
type=int,
105113
default=DEFAULT_TIMEOUT,
106114
)
107115

108116
parser.add_argument(
109117
'--timerange',
110-
help='Set the timerange (seconds) in which the files should be considered. Supports ranges.',
118+
help='Set the timerange (seconds) in which the files should be considered. '
119+
'Supports ranges.',
111120
dest='TIMERANGE',
112121
)
113122

114123
parser.add_argument(
115124
'-u', '--url',
116-
help='Set the url of the file (or directory) to check, starting with "smb://". This is mutually exclusive with --filename.',
125+
help='Set the url of the file (or directory) to check, starting with "smb://". '
126+
'This is mutually exclusive with --filename.',
117127
dest='URL',
118128
type=str,
119129
)
@@ -126,14 +136,47 @@ def parse_args():
126136

127137
parser.add_argument(
128138
'-w', '--warning',
129-
help='Set the warning number of files. Supports ranges.',
139+
help='Set the warning number of files. '
140+
'Supports ranges.',
130141
dest='WARN',
131142
)
132143

133144
args, _ = parser.parse_known_args()
134145
return args
135146

136147

148+
def get_early_break_threshold(warn, crit):
149+
"""Extract a numeric threshold for early loop termination.
150+
Returns None if thresholds are too complex to parse.
151+
"""
152+
thresholds = []
153+
for threshold in [warn, crit]:
154+
if not threshold:
155+
continue
156+
# Try to extract numeric value from simple threshold formats
157+
# Handles: "100", "10:100", ":100", "10:", "@10:100"
158+
threshold = threshold.lstrip('@')
159+
if ':' in threshold:
160+
parts = threshold.split(':')
161+
# Get the upper bound if it exists
162+
if parts[-1]:
163+
try:
164+
thresholds.append(int(parts[-1]))
165+
except ValueError:
166+
return None
167+
else:
168+
# Simple numeric threshold
169+
try:
170+
thresholds.append(int(threshold))
171+
except ValueError:
172+
return None
173+
174+
if thresholds:
175+
# Use the maximum threshold + 1 to confirm we're "more than"
176+
return max(thresholds) + 1
177+
return None
178+
179+
137180
def main():
138181
"""The main function. Hier spielt die Musik.
139182
"""
@@ -146,48 +189,88 @@ def main():
146189

147190
# fetch data
148191
if args.FILENAME and args.URL:
149-
lib.base.cu('The --filename and -u / --url parameter are mutually exclusive. Please only use one.')
192+
lib.base.cu(
193+
'The --filename and -u / --url parameter are mutually exclusive. Please only use one.'
194+
)
150195

151196
now = lib.time.now()
152197
msg = ''
153198
file_count = 0
199+
exceeded_threshold = False
200+
201+
# Determine if we can break early for performance
202+
early_break_at = get_early_break_threshold(args.WARN, args.CRIT)
154203

155204
if args.FILENAME:
156205
path = Path(args.FILENAME)
157-
for item in sorted(Path(path.anchor).glob(str(path.relative_to(path.anchor)))):
158-
if item.is_file() and args.ONLY_DIRS:
206+
for item in Path(path.anchor).glob(str(path.relative_to(path.anchor))):
207+
# Get stat info once to avoid multiple syscalls
208+
try:
209+
stat_info = item.stat()
210+
except (OSError, PermissionError):
211+
# Skip files we can't stat
212+
continue
213+
214+
is_dir = stat.S_ISDIR(stat_info.st_mode)
215+
is_file = stat.S_ISREG(stat_info.st_mode)
216+
217+
if is_file and args.ONLY_DIRS:
159218
continue
160-
if item.is_dir() and args.ONLY_FILES:
219+
if is_dir and args.ONLY_FILES:
161220
continue
162221
if args.TIMERANGE:
163-
age = (now - item.stat().st_mtime)
222+
age = now - stat_info.st_mtime
164223
if not lib.base.coe(lib.base.match_range(age, args.TIMERANGE)):
165224
continue
166225

167226
file_count += 1
168227

228+
# Break early if we've exceeded the threshold because it makes no sense to count any
229+
# further, it just costs time and resources
230+
if early_break_at and file_count >= early_break_at:
231+
exceeded_threshold = True
232+
break
233+
169234
if args.URL:
170235
split_url = args.URL.split('://')
171236
if len(split_url) != 2:
172-
lib.base.cu('Could not parse the protocol of the url "{}".'.format(args.URL))
237+
lib.base.cu(f'Could not parse the protocol of the url "{args.URL}".')
173238
proto, url = split_url
174239

175240
if proto == 'smb':
176241
if not HAVE_SMB:
177-
lib.base.cu('Python module "{}" is not installed.'.format(missing_lib))
178-
for item in lib.base.coe(lib.smb.glob(url, args.USERNAME, args.PASSWORD, args.TIMEOUT, pattern=args.PATTERN)):
179-
if item.is_file() and args.ONLY_DIRS:
242+
lib.base.cu(f'Python module "{missing_lib}" is not installed.') # pylint: disable=E0601
243+
for item in lib.base.coe(
244+
lib.smb.glob(url, args.USERNAME, args.PASSWORD, args.TIMEOUT, pattern=args.PATTERN)
245+
):
246+
# Get stat info once to avoid multiple calls
247+
try:
248+
stat_info = item.stat()
249+
except (OSError, PermissionError):
250+
# Skip files we can't stat
251+
continue
252+
253+
is_dir = stat.S_ISDIR(stat_info.st_mode)
254+
is_file = stat.S_ISREG(stat_info.st_mode)
255+
256+
if is_file and args.ONLY_DIRS:
180257
continue
181-
if item.is_dir() and args.ONLY_FILES:
258+
if is_dir and args.ONLY_FILES:
182259
continue
183260
if args.TIMERANGE:
184-
age = (now - item.stat().st_mtime)
261+
age = now - stat_info.st_mtime
185262
if not lib.base.coe(lib.base.match_range(age, args.TIMERANGE)):
186263
continue
187264

188265
file_count += 1
266+
267+
# Break early if we've exceeded the threshold because it makes no sense to count any
268+
# further, it just costs time and resources
269+
if early_break_at and file_count >= early_break_at:
270+
exceeded_threshold = True
271+
break
189272
else:
190-
lib.base.cu('The protocol "{}" is not supported.'.format(proto))
273+
lib.base.cu(f'The protocol "{proto}" is not supported.')
191274

192275
if not lib.base.coe(lib.base.match_range(file_count, args.CRIT)):
193276
state = STATE_CRIT
@@ -196,12 +279,15 @@ def main():
196279
else:
197280
state = STATE_OK
198281

199-
msg = 'Found {} matching {} (thresholds {}/{})'.format(
200-
file_count,
201-
lib.txt.pluralize('file', file_count),
202-
args.WARN,
203-
args.CRIT,
204-
)
282+
if exceeded_threshold:
283+
# Report "more than" the threshold value we broke at
284+
display_count = file_count - 1
285+
msg = f'Found more than {display_count} matching ' \
286+
f'{lib.txt.pluralize("file", display_count)} (thresholds {args.WARN}/{args.CRIT})'
287+
else:
288+
msg = f'Found {file_count} matching ' \
289+
f'{lib.txt.pluralize("file", file_count)} (thresholds {args.WARN}/{args.CRIT})'
290+
205291
perfdata = lib.base.get_perfdata('file_count', file_count, None, args.WARN, args.CRIT, 0, None)
206292

207293
# over and out

check-plugins/file-count/icingaweb2-module-director/file-count.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@
443443
"4": {
444444
"varname": "file_count_only_dirs",
445445
"caption": "File Count: Only Dirs?",
446-
"description": "Only consider directories.",
446+
"description": "Only count directories.",
447447
"datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean",
448448
"format": null,
449449
"settings": {},
@@ -452,7 +452,7 @@
452452
"5": {
453453
"varname": "file_count_only_files",
454454
"caption": "File Count: Only Files?",
455-
"description": "Only consider files.",
455+
"description": "Only count files.",
456456
"datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean",
457457
"format": null,
458458
"settings": {},
@@ -569,7 +569,7 @@
569569
"16": {
570570
"varname": "file_count_windows_only_dirs",
571571
"caption": "File Count: Only Dirs?",
572-
"description": "Only consider directories.",
572+
"description": "Only count directories.",
573573
"datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean",
574574
"format": null,
575575
"settings": {},
@@ -578,7 +578,7 @@
578578
"17": {
579579
"varname": "file_count_windows_only_files",
580580
"caption": "File Count: Only Files?",
581-
"description": "Only consider files.",
581+
"description": "Only count files.",
582582
"datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean",
583583
"format": null,
584584
"settings": {},

0 commit comments

Comments
 (0)