Skip to content

Commit cb0d4a5

Browse files
author
Prashant D
committed
mon/LogMonitor: Use generic cluster log level config
We do not control the verbosity of the LogEntry which is getting logged to stderr, graylog and journald. This causes excessive flooding of logs to /var/log, making a filesystem to fill up quickly. Also we have different config variables namely mon_cluster_log_file_level and mon_cluster_log_to_syslog_level to control verbosity at cluster log file and syslog level respectively. Add a generic cluster log level config variable which controls cluster log verbosity for all external entities. Additionally, this patch addresses the regression of `mon_cluster_log_file_level` option which doesn't take effect because of code refactoring of LogMonitor::update_from_paxos (commit : 7c84e06). Fixes: https://tracker.ceph.com/issues/57061 Fixes: https://tracker.ceph.com/issues/57049 Signed-off-by: Prashant D <[email protected]>
1 parent 6185d08 commit cb0d4a5

File tree

6 files changed

+251
-47
lines changed

6 files changed

+251
-47
lines changed

PendingReleaseNotes

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@
7878
of POOL_APP_NOT_ENABLED health warning for that pool.
7979
The user might temporarily mute this warning using
8080
``ceph health mute POOL_APP_NOT_ENABLED``.
81+
* The `mon_cluster_log_file_level` and `mon_cluster_log_to_syslog_level` options
82+
have been removed. Henceforth, users should use the new generic option
83+
`mon_cluster_log_level` to control the cluster log level verbosity for the cluster
84+
log file as well as for all external entities.
8185
CephFS: Disallow delegating preallocated inode ranges to clients. Config
8286
`mds_client_delegate_inos_pct` defaults to 0 which disables async dirops
8387
in the kclient.

qa/standalone/ceph-helpers.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,6 +1744,22 @@ function test_wait_for_peered() {
17441744
teardown $dir || return 1
17451745
}
17461746

1747+
function wait_for_string() {
1748+
local logfile=$1
1749+
local searchstr=$2
1750+
1751+
status=1
1752+
for ((i=0; i < $TIMEOUT; i++)); do
1753+
echo $i
1754+
if ! grep "$searchstr" $logfile; then
1755+
sleep 1
1756+
else
1757+
status=0
1758+
break
1759+
fi
1760+
done
1761+
return $status
1762+
}
17471763

17481764
#######################################################################
17491765

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (C) 2022 Red Hat <[email protected]>
4+
#
5+
# Author: Prashant D <[email protected]>
6+
#
7+
# This program is free software; you can redistribute it and/or modify
8+
# it under the terms of the GNU Library Public License as published by
9+
# the Free Software Foundation; either version 2, or (at your option)
10+
# any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU Library Public License for more details.
16+
#
17+
18+
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19+
20+
function run() {
21+
local dir=$1
22+
shift
23+
24+
export CEPH_MON="127.0.0.1:7156" # git grep '\<7156\>' : there must be only one
25+
export CEPH_ARGS
26+
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27+
CEPH_ARGS+="--mon-host=$CEPH_MON "
28+
29+
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
30+
for func in $funcs ; do
31+
setup $dir || return 1
32+
$func $dir || return 1
33+
teardown $dir || return 1
34+
done
35+
}
36+
37+
function TEST_cluster_log_level() {
38+
local dir=$1
39+
40+
run_mon $dir a || return 1
41+
run_mgr $dir x || return 1
42+
run_osd $dir 0 || return 1
43+
44+
ceph config set mon.a mon_cluster_log_level debug
45+
ceph osd pool create replicated1 8 8
46+
ceph osd pool set replicated1 size 1 --yes-i-really-mean-it
47+
ceph osd pool set replicated1 min_size 1
48+
49+
WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean
50+
ERRORS=0
51+
truncate $dir/log -s 0
52+
ceph pg deep-scrub 1.0
53+
search_str="cluster [[]DBG[]] 1.0 deep-scrub"
54+
TIMEOUT=60 wait_for_string $dir/log "$search_str"
55+
grep -q "$search_str" $dir/log
56+
return_code=$?
57+
if [ $return_code -ne 0 ]; then
58+
echo "Failed : Could not find DBG log in the cluster log file"
59+
ERRORS=$(($ERRORS + 1))
60+
fi
61+
62+
ceph osd down 0
63+
TIMEOUT=20 wait_for_osd up 0 || return 1
64+
grep -q "cluster [[]INF[]] osd.0.*boot" $dir/log
65+
return_code=$?
66+
if [ $return_code -ne 0 ]; then
67+
echo "Failed : Could not find INF log in the cluster log file"
68+
ERRORS=$(($ERRORS + 1))
69+
fi
70+
71+
ceph config set mon.a mon_cluster_log_level info
72+
ceph pg deep-scrub 1.1
73+
search_str="cluster [[]DBG[]] 1.1 deep-scrub"
74+
TIMEOUT=60 wait_for_string $dir/log "$search_str"
75+
grep -q "$search_str" $dir/log
76+
return_code=$?
77+
if [ $return_code -eq 0 ]; then
78+
echo "Failed : Found DBG log in the cluster log file"
79+
ERRORS=$(($ERRORS + 1))
80+
fi
81+
82+
ceph config set mon.a mon_cluster_log_level warn
83+
ceph osd set noup
84+
ceph osd down osd.0
85+
ceph osd unset noup
86+
TIMEOUT=60 wait_for_osd up 0 || return 1
87+
search_str="cluster [[]WRN[]] Health check failed: noup flag(s) set (OSDMAP_FLAGS)"
88+
grep -q "$search_str" $dir/log
89+
return_code=$?
90+
if [ $return_code -ne 0 ]; then
91+
echo "Failed : No WRN entries found in the cluster log file"
92+
ERRORS=$(($ERRORS + 1))
93+
fi
94+
95+
ceph osd out 0
96+
ceph osd in 0
97+
WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean
98+
search_str="cluster [[]INF[]] Client client.admin marked osd.0 out, while it was still marked up"
99+
ceph log last 1000 | grep -q "$search_str" || return 1
100+
TIMEOUT=60 wait_for_string $dir/log "$search_str"
101+
grep -q "$search_str" $dir/log
102+
return_code=$?
103+
if [ $return_code -eq 0 ]; then
104+
echo "Failed : Found INF log in the cluster log file"
105+
ERRORS=$(($ERRORS + 1))
106+
fi
107+
108+
if [ $ERRORS -gt 0 ]; then
109+
echo "TEST FAILED WITH $ERRORS ERRORS"
110+
return 1
111+
fi
112+
113+
echo "TEST PASSED"
114+
return 0
115+
}
116+
117+
function TEST_journald_cluster_log_level() {
118+
local dir=$1
119+
120+
run_mon $dir a || return 1
121+
run_mgr $dir x || return 1
122+
run_osd $dir 0 || return 1
123+
124+
ceph config set mon.a mon_cluster_log_level debug
125+
ceph osd pool create replicated1 8 8
126+
ceph osd pool set replicated1 size 1 --yes-i-really-mean-it
127+
ceph osd pool set replicated1 min_size 1
128+
129+
WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean
130+
ERRORS=0
131+
ceph config set mon.a mon_cluster_log_to_journald true
132+
133+
ceph pg deep-scrub 1.0
134+
search_str="1.0 deep-scrub"
135+
TIMEOUT=60
136+
sleep $TIMEOUT
137+
journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=7 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
138+
grep -q "$search_str" $dir/journal.log
139+
return_code=$?
140+
if [ $return_code -ne 0 ]; then
141+
echo "Failed : Could not find DBG log in the journalctl log file"
142+
ERRORS=$(($ERRORS + 1))
143+
fi
144+
145+
ceph osd down 0
146+
TIMEOUT=20 wait_for_osd up 0 || return 1
147+
search_str="osd.0.*boot"
148+
journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
149+
grep -q "$search_str" $dir/journal.log
150+
return_code=$?
151+
if [ $return_code -ne 0 ]; then
152+
echo "Failed : Could not find INF log in the journalctl log file"
153+
ERRORS=$(($ERRORS + 1))
154+
fi
155+
156+
ceph config set mon.a mon_cluster_log_level info
157+
ceph pg deep-scrub 1.1
158+
TIMEOUT=60
159+
sleep $TIMEOUT
160+
search_str="1.1 deep-scrub"
161+
journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=7 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
162+
grep -q "$search_str" $dir/journal.log
163+
return_code=$?
164+
if [ $return_code -eq 0 ]; then
165+
echo "Failed : Found $clog_entries DBG log entries in the journalctl log file"
166+
ERRORS=$(($ERRORS + 1))
167+
fi
168+
169+
ceph config set mon.a mon_cluster_log_level warn
170+
ceph osd set noup
171+
ceph osd down osd.0
172+
ceph osd unset noup
173+
TIMEOUT=60 wait_for_osd up 0 || return 1
174+
search_str="Health check failed: noup flag(s) set (OSDMAP_FLAGS)"
175+
journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=4 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
176+
grep -q "$search_str" $dir/journal.log
177+
return_code=$?
178+
if [ $return_code -ne 0 ]; then
179+
echo "Failed : No WRN entries found in the journalctl log file"
180+
ERRORS=$(($ERRORS + 1))
181+
fi
182+
183+
ceph osd out 0
184+
ceph osd in 0
185+
WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean
186+
search_str="Client client.admin marked osd.0 out, while it was still marked up"
187+
ceph log last | grep -q "$search_str" || return 1
188+
journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
189+
grep -q "$search_str" $dir/journal.log
190+
return_code=$?
191+
if [ $return_code -eq 0 ]; then
192+
echo "Failed : Found $clog_entries INF log entries in the journalctl log file"
193+
ERRORS=$(($ERRORS + 1))
194+
fi
195+
196+
if [ $ERRORS -gt 0 ]; then
197+
echo "TEST FAILED WITH $ERRORS ERRORS"
198+
return 1
199+
fi
200+
201+
echo "TEST PASSED"
202+
return 0
203+
}
204+
205+
main mon-cluster-log "$@"

src/common/options/mon.yaml.in

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -112,18 +112,6 @@ options:
112112
flags:
113113
- runtime
114114
with_legacy: true
115-
- name: mon_cluster_log_to_syslog_level
116-
type: str
117-
level: advanced
118-
desc: Syslog level for cluster log messages
119-
default: info
120-
services:
121-
- mon
122-
see_also:
123-
- mon_cluster_log_to_syslog
124-
flags:
125-
- runtime
126-
with_legacy: true
127115
- name: mon_cluster_log_to_syslog_facility
128116
type: str
129117
level: advanced
@@ -172,10 +160,12 @@ options:
172160
flags:
173161
- runtime
174162
with_legacy: true
175-
- name: mon_cluster_log_file_level
163+
- name: mon_cluster_log_level
176164
type: str
177165
level: advanced
178-
desc: Lowest level to include is cluster log file
166+
desc: Lowest level to include in cluster log file and/or in external log server
167+
long_desc: Log level to control the cluster log message verbosity for the cluster
168+
log file as well as for all external entities.
179169
default: debug
180170
services:
181171
- mon

src/mon/LogMonitor.cc

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,10 @@ ceph::logging::JournaldClusterLogger &LogMonitor::log_channel_info::get_journald
208208
void LogMonitor::log_channel_info::clear()
209209
{
210210
log_to_syslog.clear();
211-
syslog_level.clear();
212211
syslog_facility.clear();
213212
log_file.clear();
214213
expanded_log_file.clear();
215-
log_file_level.clear();
214+
log_level.clear();
216215
log_to_graylog.clear();
217216
log_to_graylog_host.clear();
218217
log_to_graylog_port.clear();
@@ -356,16 +355,25 @@ void LogMonitor::log_external(const LogEntry& le)
356355
channel = CLOG_CHANNEL_CLUSTER;
357356
}
358357

358+
string level = channels.get_log_level(channel);
359+
if (int log_level = LogEntry::str_to_level(level);log_level > le.prio) {
360+
// Do not log LogEntry to any external entity if le.prio is
361+
// less than channel log level.
362+
return;
363+
}
364+
365+
if (g_conf().get_val<bool>("mon_cluster_log_to_stderr")) {
366+
cerr << channel << " " << le << std::endl;
367+
}
368+
359369
if (channels.do_log_to_syslog(channel)) {
360-
string level = channels.get_level(channel);
361370
string facility = channels.get_facility(channel);
362371
if (level.empty() || facility.empty()) {
363372
derr << __func__ << " unable to log to syslog -- level or facility"
364373
<< " not defined (level: " << level << ", facility: "
365374
<< facility << ")" << dendl;
366375
} else {
367-
le.log_to_syslog(channels.get_level(channel),
368-
channels.get_facility(channel));
376+
le.log_to_syslog(level, facility);
369377
}
370378
}
371379

@@ -1191,16 +1199,6 @@ void LogMonitor::update_log_channels()
11911199
return;
11921200
}
11931201

1194-
r = get_conf_str_map_helper(
1195-
g_conf().get_val<string>("mon_cluster_log_to_syslog_level"),
1196-
oss, &channels.syslog_level,
1197-
CLOG_CONFIG_DEFAULT_KEY);
1198-
if (r < 0) {
1199-
derr << __func__ << " error parsing 'mon_cluster_log_to_syslog_level'"
1200-
<< dendl;
1201-
return;
1202-
}
1203-
12041202
r = get_conf_str_map_helper(
12051203
g_conf().get_val<string>("mon_cluster_log_to_syslog_facility"),
12061204
oss, &channels.syslog_facility,
@@ -1221,11 +1219,11 @@ void LogMonitor::update_log_channels()
12211219
}
12221220

12231221
r = get_conf_str_map_helper(
1224-
g_conf().get_val<string>("mon_cluster_log_file_level"), oss,
1225-
&channels.log_file_level,
1222+
g_conf().get_val<string>("mon_cluster_log_level"), oss,
1223+
&channels.log_level,
12261224
CLOG_CONFIG_DEFAULT_KEY);
12271225
if (r < 0) {
1228-
derr << __func__ << " error parsing 'mon_cluster_log_file_level'"
1226+
derr << __func__ << " error parsing 'mon_cluster_log_level'"
12291227
<< dendl;
12301228
return;
12311229
}
@@ -1279,10 +1277,9 @@ void LogMonitor::handle_conf_change(const ConfigProxy& conf,
12791277
const std::set<std::string> &changed)
12801278
{
12811279
if (changed.count("mon_cluster_log_to_syslog") ||
1282-
changed.count("mon_cluster_log_to_syslog_level") ||
12831280
changed.count("mon_cluster_log_to_syslog_facility") ||
12841281
changed.count("mon_cluster_log_file") ||
1285-
changed.count("mon_cluster_log_file_level") ||
1282+
changed.count("mon_cluster_log_level") ||
12861283
changed.count("mon_cluster_log_to_graylog") ||
12871284
changed.count("mon_cluster_log_to_graylog_host") ||
12881285
changed.count("mon_cluster_log_to_graylog_port") ||

0 commit comments

Comments
 (0)