Skip to content

Commit 75bcfd1

Browse files
committed
Merge PR ceph#55758 into main
* refs/pull/55758/head: doc: update 'journal reset' command with --yes-i-really-really-mean-it qa: fix cephfs-journal-tool command options and make fs inactive cephfs-journal-tool: Add warning messages during 'journal reset' and prevent execution on active fs Reviewed-by: Dhairya Parmar <[email protected]> Reviewed-by: Venky Shankar <[email protected]>
2 parents d00314d + 42953ec commit 75bcfd1

14 files changed

+65
-23
lines changed

PendingReleaseNotes

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
201201
and the new feature bit for more information.
202202

203203
* cls_cxx_gather is marked as deprecated.
204+
* CephFS: cephfs-journal-tool is guarded against running on an online file system.
205+
The 'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset' and
206+
'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset --force'
207+
commands require '--yes-i-really-really-mean-it'.
204208

205209
* Dashboard: Rearranged Navigation Layout: The navigation layout has been reorganized
206210
for improved usability and easier access to key features.

doc/cephfs/cephfs-journal-tool.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ examining, modifying, and extracting data from journals.
1515

1616
This tool is **dangerous** because it directly modifies internal
1717
data structures of the file system. Make backups, be careful, and
18-
seek expert advice. If you are unsure, do not run this tool.
18+
seek expert advice. If you are unsure, do not run this tool. As a
19+
precaution, cephfs-journal-tool doesn't work on an active filesystem.
1920

2021
Syntax
2122
------

doc/cephfs/disaster-recovery-experts.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ truncate it like so:
6868

6969
::
7070

71-
cephfs-journal-tool [--rank=N] journal reset
71+
cephfs-journal-tool [--rank=<fs_name>:{mds-rank|all}] journal reset --yes-i-really-really-mean-it
7272

73-
Specify the MDS rank using the ``--rank`` option when the file system has/had
73+
Specify the filesystem and the MDS rank using the ``--rank`` option when the file system has/had
7474
multiple active MDS.
7575

7676
.. warning::
@@ -135,7 +135,7 @@ objects.
135135
# InoTable
136136
cephfs-table-tool 0 reset inode
137137
# Journal
138-
cephfs-journal-tool --rank=0 journal reset
138+
cephfs-journal-tool --rank=<fs_name>:0 journal reset --yes-i-really-really-mean-it
139139
# Root inodes ("/" and MDS directory)
140140
cephfs-data-scan init
141141

@@ -253,7 +253,7 @@ Next, we will create the intial metadata for the fs:
253253
cephfs-table-tool cephfs_recovery:0 reset session
254254
cephfs-table-tool cephfs_recovery:0 reset snap
255255
cephfs-table-tool cephfs_recovery:0 reset inode
256-
cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force
256+
cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force --yes-i-really-really-mean-it
257257

258258
Now perform the recovery of the metadata pool from the data pool:
259259

qa/tasks/cephfs/test_damage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ def test_open_ino_errors(self):
498498

499499
# Drop everything from the MDS cache
500500
self.fs.fail()
501-
self.fs.journal_tool(['journal', 'reset'], 0)
501+
self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
502502
self.fs.set_joinable()
503503
self.fs.wait_for_daemons()
504504

qa/tasks/cephfs/test_data_scan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,9 @@ def get_state(mds_id):
447447
if False:
448448
with self.assertRaises(CommandFailedError):
449449
# Normal reset should fail when no objects are present, we'll use --force instead
450-
self.fs.journal_tool(["journal", "reset"], 0)
450+
self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
451451

452-
self.fs.journal_tool(["journal", "reset", "--force"], 0)
452+
self.fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
453453
self.fs.data_scan(["init"])
454454
self.fs.data_scan(["scan_extents"], worker_count=workers)
455455
self.fs.data_scan(["scan_inodes"], worker_count=workers)

qa/tasks/cephfs/test_flush.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from tasks.cephfs.cephfs_test_case import CephFSTestCase
44
from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
55

6-
76
class TestFlush(CephFSTestCase):
87
def test_flush(self):
98
self.mount_a.run_shell(["mkdir", "mydir"])
@@ -44,7 +43,10 @@ def test_flush(self):
4443

4544
# ...and the journal is truncated to just a single subtreemap from the
4645
# newly created segment
46+
self.fs.fail()
4747
summary_output = self.fs.journal_tool(["event", "get", "summary"], 0)
48+
self.fs.set_joinable()
49+
self.fs.wait_for_daemons()
4850
try:
4951
self.assertEqual(summary_output,
5052
dedent(
@@ -72,6 +74,8 @@ def test_flush(self):
7274
).strip())
7375
flush_data = self.fs.mds_asok(["flush", "journal"])
7476
self.assertEqual(flush_data['return_code'], 0)
77+
78+
self.fs.fail()
7579
self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0),
7680
dedent(
7781
"""
@@ -80,6 +84,8 @@ def test_flush(self):
8084
Errors: 0
8185
"""
8286
).strip())
87+
self.fs.set_joinable()
88+
self.fs.wait_for_daemons()
8389

8490
# Now for deletion!
8591
# We will count the RADOS deletions and MDS file purges, to verify that

qa/tasks/cephfs/test_forward_scrub.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def test_orphan_scan(self):
184184
# inotable versions (due to scan_links)
185185
self.fs.flush()
186186
self.fs.fail()
187-
self.fs.journal_tool(["journal", "reset", "--force"], 0)
187+
self.fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
188188

189189
# Run cephfs-data-scan targeting only orphans
190190
self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
@@ -411,7 +411,7 @@ def test_health_status_after_dirfrag_repair(self):
411411

412412
self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)])
413413

414-
self.fs.journal_tool(['journal', 'reset'], 0)
414+
self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
415415
self.fs.set_joinable()
416416
self.fs.wait_for_daemons()
417417
self.mount_a.mount_wait()

qa/tasks/cephfs/test_journal_migration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def test_journal_migration(self):
6767
))
6868

6969
# Verify that cephfs-journal-tool can now read the rewritten journal
70+
self.fs.fail()
7071
inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
7172
if not inspect_out.endswith(": OK"):
7273
raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
@@ -84,6 +85,8 @@ def test_journal_migration(self):
8485
if event_count < 1000:
8586
# Approximate value of "lots", expected from having run fsstress
8687
raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
88+
self.fs.set_joinable()
89+
self.fs.wait_for_daemons()
8790

8891
# Do some client work to check that writing the log is still working
8992
with self.mount_a.mounted_wait():

qa/tasks/cephfs/test_journal_repair.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_inject_to_empty(self):
8686

8787
# Now check the MDS can read what we wrote: truncate the journal
8888
# and start the mds.
89-
self.fs.journal_tool(['journal', 'reset'], 0)
89+
self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
9090
self.fs.set_joinable()
9191
self.fs.wait_for_daemons()
9292

@@ -231,7 +231,7 @@ def is_marked_damaged():
231231
self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True)
232232
self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True)
233233
self.fs.table_tool(["0", "reset", "session"])
234-
self.fs.journal_tool(["journal", "reset"], 0)
234+
self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
235235
self.fs.erase_mds_objects(1)
236236
self.run_ceph_cmd('fs', 'reset', self.fs.name,
237237
'--yes-i-really-mean-it')

qa/tasks/cephfs/test_recovery_pool.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def _rebuild_metadata(self, workload, other_pool=None, workers=1):
138138
if False:
139139
with self.assertRaises(CommandFailedError):
140140
# Normal reset should fail when no objects are present, we'll use --force instead
141-
self.fs.journal_tool(["journal", "reset"], 0)
141+
self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
142142

143143
recovery_fs.data_scan(['scan_extents', '--alternate-pool',
144144
recovery_pool, '--filesystem', self.fs.name,
@@ -150,7 +150,7 @@ def _rebuild_metadata(self, workload, other_pool=None, workers=1):
150150
recovery_fs.data_scan(['scan_links', '--filesystem', recovery_fs.name])
151151
recovery_fs.journal_tool(['event', 'recover_dentries', 'list',
152152
'--alternate-pool', recovery_pool], 0)
153-
recovery_fs.journal_tool(["journal", "reset", "--force"], 0)
153+
recovery_fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
154154

155155
# Start the MDS
156156
recovery_fs.set_joinable()

0 commit comments

Comments
 (0)