|
4 | 4 | import time |
5 | 5 | import logging |
6 | 6 | from io import BytesIO, StringIO |
| 7 | +import yaml |
7 | 8 |
|
8 | 9 | from tasks.mgr.mgr_test_case import MgrTestCase |
9 | 10 | from teuthology import contextutil |
@@ -497,6 +498,121 @@ def update_export(self, cluster_id, path, pseudo, fs_name): |
497 | 498 | } |
498 | 499 | })) |
499 | 500 |
|
| 501 | + def apply_ganesha_spec(self, spec): |
| 502 | + """ |
| 503 | + apply spec and wait for redeploy otherwise it will reset any conf changes |
| 504 | + :param spec: ganesha daemon spec (YAML) |
| 505 | + """ |
| 506 | + ganesha_daemon_pid_init = (self.ctx.cluster.run(args=["sudo", "pgrep", "ganesha.nfsd"], |
| 507 | + stdout=StringIO(), |
| 508 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 509 | + self.ctx.cluster.run(args=['ceph', 'orch', 'apply', '-i', '-'], |
| 510 | + stdin=spec) |
| 511 | + with contextutil.safe_while(sleep=4, tries=15) as proceed: |
| 512 | + while proceed(): |
| 513 | + try: |
| 514 | + ganesha_daemon_pid = (self.ctx.cluster.run(args=["sudo", "pgrep", "ganesha.nfsd"], |
| 515 | + stdout=StringIO(), |
| 516 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 517 | + if ganesha_daemon_pid != ganesha_daemon_pid_init: |
| 518 | + # new pid i.e. redeployment done |
| 519 | + break |
| 520 | + except CommandFailedError: |
| 521 | + # no pid if the redeployment is in progress |
| 522 | + log.info('waiting for ganesha daemon redeployment') |
| 523 | + |
| 524 | + def enable_libcephfs_logging(self, cluster_name): |
| 525 | + """ |
| 526 | + enable ceph client logs by adding a volume mount to ganesha daemon's |
| 527 | + unit.run using `ceph orch apply -i <spec>` and adding client log path |
| 528 | + to /var/lib/ceph/{fsid}/{ganesha_daemon}/config |
| 529 | + :param cluster_name: nfs cluster name |
| 530 | + """ |
| 531 | + fsid = self._cmd("fsid").strip() |
| 532 | + |
| 533 | + # add volume mount for ceph client logging from /var/log/ceph/$fsid:/var/log/ceph:z |
| 534 | + ganesha_spec = self._cmd("orch", "ls", "--service-name", |
| 535 | + f"nfs.{cluster_name}", "--export").strip() |
| 536 | + parsed_ganesha_spec = yaml.safe_load(ganesha_spec) |
| 537 | + original_ganesha_spec = yaml.dump(parsed_ganesha_spec) |
| 538 | + parsed_ganesha_spec["extra_container_args"] = ["-v", |
| 539 | + f"/var/log/ceph/{fsid}:/var/log/ceph:z"] |
| 540 | + debug_enabled_ganesha_spec = yaml.dump(parsed_ganesha_spec).replace("- -v", '- "-v"').replace( |
| 541 | + f"- /var/log/ceph/{fsid}:/var/log/ceph:z", f'- "/var/log/ceph/{fsid}:/var/log/ceph:z"') |
| 542 | + log.debug(f"debug enabled ganesha spec: {debug_enabled_ganesha_spec}") |
| 543 | + |
| 544 | + self.apply_ganesha_spec(debug_enabled_ganesha_spec) |
| 545 | + |
| 546 | + # add client debug to /var/lib/ceph/$fsid/$ganesha_daemon/config |
| 547 | + ganesha_daemon = ((self._orch_cmd("ps", "--daemon-type", "nfs")).split("\n")[1].split(' ')[0]).strip() |
| 548 | + GANESHA_CONF_FILE_PATH = f"/var/lib/ceph/{fsid}/{ganesha_daemon}/config" |
| 549 | + |
| 550 | + original_ganesha_conf = (self.ctx.cluster.run(args=["sudo", "cat", GANESHA_CONF_FILE_PATH], |
| 551 | + stdout=StringIO(), |
| 552 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 553 | + if "[client]" not in original_ganesha_conf: |
| 554 | + s = f"[client]\n\tdebug client = 20\n\tlog file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log" |
| 555 | + self._sys_cmd(["echo", Raw(f'"{s}"'), Raw("|"), "sudo", "tee", Raw("-a"), GANESHA_CONF_FILE_PATH]) |
| 556 | + # restart ganesha daemon for the changes to take effect |
| 557 | + self._orch_cmd("restart", f"nfs.{cluster_name}") |
| 558 | + |
| 559 | + # ensure log level and file path exists |
| 560 | + ganesha_conf_debug_enabled = (self.ctx.cluster.run(args=["sudo", "cat", GANESHA_CONF_FILE_PATH], |
| 561 | + stdout=StringIO(), |
| 562 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 563 | + self.assertIn("[client]", ganesha_conf_debug_enabled) |
| 564 | + self.assertIn("debug client = 20", ganesha_conf_debug_enabled) |
| 565 | + self.assertIn(f"log file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log", |
| 566 | + ganesha_conf_debug_enabled) |
| 567 | + |
| 568 | + def check_libcephfs_log(): |
| 569 | + LIBCEPHFS_LOG_FILE_PATH = f"/var/log/ceph/{fsid}/ceph-client.nfs.{cluster_name}.log" |
| 570 | + libcephfs_log = (self.ctx.cluster.run(args=["sudo", "cat", |
| 571 | + LIBCEPHFS_LOG_FILE_PATH, |
| 572 | + Raw("|"), "tail", "-n", "2"], |
| 573 | + check_status=False, |
| 574 | + stdout=StringIO(), |
| 575 | + stderr=StringIO())) |
| 576 | + if libcephfs_log[0].returncode != 0: |
| 577 | + log.debug(f"failed to read {LIBCEPHFS_LOG_FILE_PATH}, retrying") |
| 578 | + return False |
| 579 | + if len(libcephfs_log[0].stdout.getvalue().strip()) == 0: |
| 580 | + log.debug(f"log file {LIBCEPHFS_LOG_FILE_PATH} empty, retrying") |
| 581 | + return False |
| 582 | + return True |
| 583 | + |
| 584 | + # usually appears in no time, sometimes might take a second or two for the log file to appear |
| 585 | + self.wait_until_true(check_libcephfs_log, timeout=60) |
| 586 | + |
| 587 | + return original_ganesha_spec, GANESHA_CONF_FILE_PATH, original_ganesha_conf |
| 588 | + |
| 589 | + def disable_libcephfs_logging(self, cluster_name, ganesha_spec, conf_path, ganesha_conf): |
| 590 | + """ |
| 591 | + disable ceph client logs by reverting back to the primary ganesha spec and removing debug level |
| 592 | + and file path from /var/lib/ceph/{fsid}/{ganesha_daemon}/config |
| 593 | + :param cluster_name: nfs cluster name |
| 594 | + :param ganesha_spec: primary spec (spec prior to adding debug volume mount) |
| 595 | + :param conf_path: ganesha conf file path |
| 596 | + :param ganesha_conf: primary ganesha conf (conf prior to adding debug level and path) |
| 597 | + """ |
| 598 | + self.apply_ganesha_spec(ganesha_spec) |
| 599 | + |
| 600 | + # remove ceph client debug info from ganesha conf |
| 601 | + conf_content = (self.ctx.cluster.run(args=["sudo", "cat", conf_path], |
| 602 | + stdout=StringIO(), |
| 603 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 604 | + if "[client]" in conf_content: |
| 605 | + self.ctx.cluster.run(args=['sudo', 'truncate', Raw("-s"), "0", conf_path]) |
| 606 | + self._sys_cmd(["echo", Raw(f'"{ganesha_conf}"'), Raw("|"), "sudo", "tee", conf_path]) |
| 607 | + default_conf = (self.ctx.cluster.run(args=["sudo", "cat", conf_path], |
| 608 | + stdout=StringIO(), |
| 609 | + stderr=StringIO()))[0].stdout.getvalue().strip() |
| 610 | + self.assertNotIn("[client]", default_conf) |
| 611 | + self.assertNotIn("debug client = 20", default_conf) |
| 612 | + self.assertNotIn(f"log file = /var/log/ceph/ceph-client.nfs.{cluster_name}.log", default_conf) |
| 613 | + # restart ganesha daemon for the changes to take effect |
| 614 | + self._orch_cmd("restart", f"nfs.{cluster_name}") |
| 615 | + |
500 | 616 | def test_create_and_delete_cluster(self): |
501 | 617 | ''' |
502 | 618 | Test successful creation and deletion of the nfs cluster. |
@@ -681,11 +797,13 @@ def test_async_io_fio(self): |
681 | 797 | Test async io using fio. Expect completion without hang or crash |
682 | 798 | ''' |
683 | 799 | self._test_create_cluster() |
| 800 | + ganesha_spec, conf_path, conf = self.enable_libcephfs_logging(self.cluster_id) |
684 | 801 | self._create_export(export_id='1', create_fs=True, |
685 | 802 | extra_cmd=['--pseudo-path', self.pseudo_path]) |
686 | 803 | port, ip = self._get_port_ip_info() |
687 | 804 | self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed') |
688 | 805 | self._test_fio(self.pseudo_path, port, ip) |
| 806 | + self.disable_libcephfs_logging(self.cluster_id, ganesha_spec, conf_path, conf) |
689 | 807 | self._test_delete_cluster() |
690 | 808 |
|
691 | 809 | def test_cluster_info(self): |
|
0 commit comments