|
3 | 3 | Test our tools for recovering the content of damaged journals |
4 | 4 | """ |
5 | 5 |
|
| 6 | +from io import StringIO |
| 7 | +import re |
6 | 8 | import json |
7 | 9 | import logging |
8 | 10 | from textwrap import dedent |
@@ -145,6 +147,67 @@ def test_inject_to_empty(self): |
145 | 147 | # Check that we can do metadata ops in the recovered directory |
146 | 148 | self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"]) |
147 | 149 |
|
| 150 | + def test_reset_trim(self): |
| 151 | + """ |
| 152 | + That after forcibly resetting the journal with disaster recovery, the old |
| 153 | + journal objects must be trimmed when fs is back online to recover the size |
| 154 | + of metadata pool |
| 155 | + """ |
| 156 | + |
| 157 | + self.fs.set_joinable(False) # no unintended failover |
| 158 | + |
| 159 | + # Create dirs |
| 160 | + self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file") |
| 161 | + |
| 162 | + # Do some IO to create multiple journal objects |
| 163 | + self.mount_a.create_n_files("alpha/file", 5000) |
| 164 | + self.mount_a.create_n_files("bravo/file", 5000) |
| 165 | + |
| 166 | + # Stop (hard) the MDS daemon |
| 167 | + self.fs.rank_fail(rank=0) |
| 168 | + |
| 169 | + # journal objects before reset |
| 170 | + objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n") |
| 171 | + journal_objs_before_reset = [ |
| 172 | + o for o in objects |
| 173 | + if re.match(r"200\.[0-9A-Fa-f]{8}$", o) and o != "200.00000000" |
| 174 | + ] |
| 175 | + |
| 176 | + # Kill the mount as dentries isn't being recovered |
| 177 | + log.info("Killing mount, it's blocked on the MDS we killed") |
| 178 | + self.mount_a.kill() |
| 179 | + self.mount_a.kill_cleanup() |
| 180 | + |
| 181 | + # Run journal reset to validate it doesn't reset journal trim position |
| 182 | + self.fs.fail() |
| 183 | + self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0) |
| 184 | + |
| 185 | + # It may have incorrect dir stats |
| 186 | + self.config_set('mds', 'mds_verify_scatter', 'false') |
| 187 | + self.config_set('mds', 'mds_debug_scatterstat', 'false') |
| 188 | + |
| 189 | + # Bring an MDS back online |
| 190 | + self.fs.set_joinable(True) |
| 191 | + self.fs.wait_for_daemons() |
| 192 | + self.mount_a.mount_wait() |
| 193 | + |
| 194 | + # Create few more files to validate that fs is intact |
| 195 | + self.mount_a.run_shell_payload("mkdir dir1 && touch dir1/file_after_reset") |
| 196 | + self.mount_a.create_n_files("dir1/file_after_reset", 100) |
| 197 | + |
| 198 | + # Flush the journal to verify if the journal objects are trimmed |
| 199 | + self.fs.rank_asok(["flush", "journal"], rank=0) |
| 200 | + |
| 201 | + # journal objects after reset |
| 202 | + objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n") |
| 203 | + journal_objs_after_reset = [ |
| 204 | + o for o in objects |
| 205 | + if re.match(r"200\.[0-9A-Fa-f]{8}$", o) and o != "200.00000000" |
| 206 | + ] |
| 207 | + |
| 208 | + # Validate that the journal flush has trimmed the old journal objects |
| 209 | + self.assertGreater(len(journal_objs_before_reset), len(journal_objs_after_reset)) |
| 210 | + |
148 | 211 | @for_teuthology # 308s |
149 | 212 | def test_reset(self): |
150 | 213 | """ |
|
0 commit comments