ArbitCode
diff --git a/‎qa/tasks/cephfs/test_exports.py‎
Lines changed: 96 additions & 0 deletions b/‎qa/tasks/cephfs/test_exports.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎qa/workunits/fs/snaps/snaptest-git-ceph.sh‎
Lines changed: 13 additions & 1 deletion b/‎qa/workunits/fs/snaps/snaptest-git-ceph.sh‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎src/mds/CDir.h‎
Lines changed: 10 additions & 0 deletions b/‎src/mds/CDir.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/mds/MDSCacheObject.h‎
Lines changed: 2 additions & 0 deletions b/‎src/mds/MDSCacheObject.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/mds/MDSDaemon.cc‎
Lines changed: 4 additions & 0 deletions b/‎src/mds/MDSDaemon.cc‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/mds/MDSRank.cc‎
Lines changed: 3 additions & 0 deletions b/‎src/mds/MDSRank.cc‎
Lines changed: 3 additions & 0 deletions
@@ -4,6 +4,7 @@
 from tasks.cephfs.fuse_mount import FuseMount
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while, MaxWhileTries
 
 log = logging.getLogger(__name__)
 
@@ -628,3 +629,98 @@ def test_ephemeral_pin_shrink_mds(self):
         log.info("{0} migrations have occured due to the cluster resizing".format(count))
         # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
         self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget
+
+class TestDumpExportStates(CephFSTestCase):
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 1
+
+    EXPORT_STATES = ['locking', 'discovering', 'freezing', 'prepping', 'warning', 'exporting']
+
+    def setUp(self):
+        super().setUp()
+
+        self.fs.set_max_mds(self.MDSS_REQUIRED)
+        self.status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload('mkdir -p test/export')
+
+    def tearDown(self):
+        super().tearDown()
+
+    def _wait_for_export_target(self, source, target, sleep=2, timeout=10):
+        try:
+            with safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+                while proceed():
+                    info = self.fs.getinfo().get_rank(self.fs.id, source)
+                    log.info(f'waiting for rank {target} to be added to the export target')
+                    if target in info['export_targets']:
+                        return
+        except MaxWhileTries as e:
+            raise RuntimeError(f'rank {target} has not been added to export target after {timeout}s') from e
+
+    def _dump_export_state(self, rank):
+        states = self.fs.rank_asok(['dump_export_states'], rank=rank, status=self.status)
+        self.assertTrue(type(states) is list)
+        self.assertEqual(len(states), 1)
+        return states[0]
+
+    def _test_base(self, path, source, target, state_index, kill):
+        self.fs.rank_asok(['config', 'set', 'mds_kill_import_at', str(kill)], rank=target, status=self.status)
+
+        self.fs.rank_asok(['export', 'dir', path, str(target)], rank=source, status=self.status)
+        self._wait_for_export_target(source, target)
+
+        target_rank = self.fs.get_rank(rank=target, status=self.status)
+        self.delete_mds_coredump(target_rank['name'])
+
+        state = self._dump_export_state(source)
+
+        self.assertTrue(type(state['tid']) is int)
+        self.assertEqual(state['path'], path)
+        self.assertEqual(state['state'], self.EXPORT_STATES[state_index])
+        self.assertEqual(state['peer'], target)
+
+        return state
+
+    def _test_state_history(self, state):
+        history = state['state_history']
+        self.assertTrue(type(history) is dict)
+        size = 0
+        for name in self.EXPORT_STATES:
+            self.assertTrue(type(history[name]) is dict)
+            size += 1
+            if name == state['state']:
+                break
+        self.assertEqual(len(history), size)
+
+    def _test_freeze_tree(self, state, waiters):
+        self.assertTrue(type(state['freeze_tree_time']) is float)
+        self.assertEqual(state['unfreeze_tree_waiters'], waiters)
+
+    def test_discovering(self):
+        state = self._test_base('/test', 0, 1, 1, 1)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertEqual(state['last_cum_auth_pins'], 0)
+        self.assertEqual(state['num_remote_waiters'], 0)
+
+    def test_prepping(self):
+        client_id = self.mount_a.get_global_id()
+
+        state = self._test_base('/test', 0, 1, 3, 3)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertEqual(state['flushed_clients'], [client_id])
+        self.assertTrue(type(state['warning_ack_waiting']) is list)
+
+    def test_exporting(self):
+        state = self._test_base('/test', 0, 1, 5, 5)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertTrue(type(state['notify_ack_waiting']) is list)
@@ -4,7 +4,14 @@ set -e
 
 # increase the cache size
 sudo git config --global http.sslVerify false
-sudo git config --global http.postBuffer 1048576000
+sudo git config --global http.postBuffer 1024MB # default is 1MB
+sudo git config --global http.maxRequestBuffer 100M # default is 10MB
+sudo git config --global core.compression 0
+
+# enable the debug logs for git clone
+export GIT_TRACE_PACKET=1
+export GIT_TRACE=1
+export GIT_CURL_VERBOSE=1
 
 # try it again if the clone is slow and the second time
 retried=false
@@ -19,6 +26,11 @@ timeout 1800 git clone https://git.ceph.com/ceph.git
 trap - EXIT
 cd ceph
 
+# disable the debug logs for git clone
+export GIT_TRACE_PACKET=0
+export GIT_TRACE=0
+export GIT_CURL_VERBOSE=0
+
 versions=`seq 1 90`
 
 for v in $versions
 
@@ -546,6 +546,16 @@ class CDir : public MDSCacheObject, public Counter<CDir> {
 
   void maybe_finish_freeze();
 
+  size_t count_unfreeze_tree_waiters() {
+    size_t n = count_unfreeze_dir_waiters();
+    _walk_tree([&n](CDir *dir) {
+        n += dir->count_unfreeze_dir_waiters();
+        return true;
+      });
+    return n;
+  }
+  inline size_t count_unfreeze_dir_waiters() const { return count_waiters(WAIT_UNFREEZE); }
+
   std::pair<bool,bool> is_freezing_or_frozen_tree() const {
     if (freeze_tree_state) {
       if (freeze_tree_state->frozen)
 
@@ -260,6 +260,8 @@ class MDSCacheObject {
   }
   bool is_waiter_for(waitmask_t mask);
 
+  inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); }
+
   virtual void add_waiter(uint64_t mask, MDSContext *c) {
     add_waiter(waitmask_t(mask), c);
   }
 
@@ -304,6 +304,10 @@ void MDSDaemon::set_up_admin_socket()
 				     asok_hook,
 				     "show recent ops, sorted by op duration");
   ceph_assert(r == 0);
+  r = admin_socket->register_command("dump_export_states",
+				     asok_hook,
+				     "dump export states");
+  ceph_assert(r == 0);
   r = admin_socket->register_command("scrub_path name=path,type=CephString "
 				     "name=scrubops,type=CephChoices,"
 				     "strings=force|recursive|repair,n=N,req=false "
 
@@ -2769,6 +2769,9 @@ void MDSRankDispatcher::handle_asok_command(
     if (!op_tracker.dump_historic_ops(f, true)) {
       *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable";
     }
+  } else if (command == "dump_export_states") {
+    std::lock_guard l(mds_lock);
+    mdcache->migrator->dump_export_states(f);
   } else if (command == "osdmap barrier") {
     int64_t target_epoch = 0;
     bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch);
Original file line number	Diff line number	Diff line change
`@@ -260,6 +260,8 @@ class MDSCacheObject {`
`260`	`260`	`}`
`261`	`261`	`bool is_waiter_for(waitmask_t mask);`
`262`	`262`
	`263`	`+ inline size_t count_waiters(uint64_t mask) const { return waiting.count(mask); }`
	`264`	`+`
`263`	`265`	`virtual void add_waiter(uint64_t mask, MDSContext *c) {`
`264`	`266`	`add_waiter(waitmask_t(mask), c);`
`265`	`267`	`}`