Skip to content

Commit 67a57d9

Browse files
[SWDEV-552020/SWDEV-563971] Fail memory partition tests if ASIC supports memory partitions & kmod not installed (#2695)
* [SWDEV-552020/SWDEV-563971] Fail memory partition tests if ASIC supports memory partitions Updates: - Now provide more details on kmod being missing and how to install. - Cleaned up comparison logic, regardless if memory partition set was successful or not. * Change kmod install check to be a one-time check
1 parent 807b3ac commit 67a57d9

File tree

1 file changed

+124
-10
lines changed

1 file changed

+124
-10
lines changed

projects/amdsmi/tests/amd_smi_test/functional/memorypartition_read_write.cc

Lines changed: 124 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
* THE SOFTWARE.
2121
*/
2222

23-
#include <cstdint>
23+
#include <gtest/gtest.h>
24+
#include <sys/stat.h>
2425

2526
#include <cstdint>
2627
#include <iostream>
2728
#include <string>
2829
#include <map>
2930
#include <limits>
3031

31-
#include <gtest/gtest.h>
3232
#include "../test_base.h"
3333
#include "../test_common.h"
3434
#include "amd_smi/amdsmi.h"
@@ -92,9 +92,31 @@ void ReloadDriverWithMessages(bool isVerbose,
9292
// 2) Containers must run with extra parameters:
9393
// --cap-add=SYS_ADMIN -v /lib/modules:/lib/modules
9494
// See: https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/setup-docker-container.html
95-
#if 0
96-
ASSERT_EQ(driver_reload_status, AMDSMI_STATUS_SUCCESS);
97-
#endif
95+
// 3) Require kmod to be installed within the docker container
96+
// (if ASIC supports memory partitions)
97+
}
98+
99+
// Helper function to check if kmod is available
100+
bool IsKmodInstalled() {
101+
// One time check for modprobe existence
102+
static bool installed = [] {
103+
// Check common paths for modprobe
104+
constexpr std::array<const char *, 4> paths = {
105+
"/usr/sbin/modprobe",
106+
"/sbin/modprobe",
107+
"/usr/bin/modprobe",
108+
"/bin/modprobe"
109+
};
110+
111+
struct stat st;
112+
for (const auto& path : paths) {
113+
if (stat(path, &st) == 0 && (st.st_mode & S_IXUSR)) {
114+
return true;
115+
}
116+
}
117+
return false;
118+
}();
119+
return installed;
98120
}
99121

100122
TestMemoryPartitionReadWrite::TestMemoryPartitionReadWrite() : TestBase() {
@@ -420,6 +442,7 @@ void TestMemoryPartitionReadWrite::Run(void) {
420442
uint32_t num_devices_to_test = current_num_devices;
421443
for (uint32_t dv_ind = 0; dv_ind < num_devices_to_test; ++dv_ind) {
422444
bool wasSetSuccess = false;
445+
bool isNewNPSMode = false;
423446
if (dv_ind != 0) {
424447
IF_VERB(STANDARD) {
425448
std::cout << std::endl;
@@ -632,6 +655,20 @@ void TestMemoryPartitionReadWrite::Run(void) {
632655
}
633656
ASSERT_TRUE((ret_caps == AMDSMI_STATUS_NOT_SUPPORTED) ||
634657
(ret_caps == AMDSMI_STATUS_SUCCESS));
658+
// Save original memory partition
659+
amdsmi_memory_partition_type_t saved_orig_memory_partition = current_memory_config.mp_mode;
660+
// Detect if we're changing to a different NPS mode
661+
if (ret_caps == AMDSMI_STATUS_SUCCESS) {
662+
isNewNPSMode = (current_memory_config.mp_mode != new_memory_partition);
663+
IF_VERB(STANDARD) {
664+
std::cout << "\t**" << "NPS mode change detected: "
665+
<< (isNewNPSMode ? "YES" : "NO")
666+
<< " (current (Saved): |"
667+
<< memoryPartitionString(current_memory_config.mp_mode)
668+
<< "| -> Requested: |" << memoryPartitionString(new_memory_partition) << "|)"
669+
<< std::endl;
670+
}
671+
}
635672

636673
ret_set = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind],
637674
new_memory_partition);
@@ -682,6 +719,30 @@ void TestMemoryPartitionReadWrite::Run(void) {
682719
if (driver_reload_status == AMDSMI_STATUS_SUCCESS) {
683720
wasSetSuccess = true;
684721
}
722+
if (driver_reload_status == AMDSMI_STATUS_AMDGPU_RESTART_ERR) {
723+
// Check kmod availability for driver reload operations
724+
// This is required in order to fully test changing memory partitions works
725+
726+
bool kmod_available = IsKmodInstalled();
727+
728+
IF_VERB(STANDARD) {
729+
std::cout << "\t** kmod (modprobe) installed: " << (kmod_available ? "YES" : "NO")
730+
<< std::endl;
731+
}
732+
733+
if (!kmod_available) {
734+
IF_VERB(STANDARD) {
735+
std::cout << "** ERROR: kmod is not installed. "
736+
<< "This device has been detected as supporting memory partitions. "
737+
<< "\n** Memory partition tests require kmod for "
738+
<< "driver reload operations to fully validate functionality. "
739+
<< "\n** Install with: apt-get install kmod (Debian/Ubuntu) "
740+
<< "or dnf install kmod (RHEL) **"
741+
<< std::endl;
742+
}
743+
ASSERT_TRUE(IsKmodInstalled());
744+
}
745+
}
685746
}
686747

687748
ret = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
@@ -700,17 +761,70 @@ void TestMemoryPartitionReadWrite::Run(void) {
700761
<< memoryPartitionString(current_memory_config.mp_mode)
701762
<< std::endl;
702763
}
703-
if (wasSetSuccess) {
764+
IF_VERB(STANDARD) {
765+
std::cout << "\t**WasSetSuccess (Set Memory Partition AND Driver reload was successful): "
766+
<< (wasSetSuccess ? "true" : "false")
767+
<< ", isNewNPSMode: " << (isNewNPSMode ? "true" : "false")
768+
<< "\n\t**Saved Memory Partition: "
769+
<< memoryPartitionString(saved_orig_memory_partition)
770+
<< "\n\t**Current Memory Partition: "
771+
<< memoryPartitionString(current_memory_config.mp_mode)
772+
<< "\n\t**Requested Memory Partition: "
773+
<< memoryPartitionString(new_memory_partition)
774+
<< std::endl;
775+
}
776+
777+
if (wasSetSuccess) { // driver reload was successful
704778
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret_set);
705-
ASSERT_STREQ(memoryPartitionString(new_memory_partition).c_str(),
706-
memoryPartitionString(current_memory_config.mp_mode).c_str());
707779
CHK_ERR_ASRT(ret_set)
780+
if (isNewNPSMode) {
781+
IF_VERB(STANDARD) {
782+
std::cout << "\t**Since driver reload (and set) was successful and a new NPS mode "
783+
<< "was requested; current memory partition ("
784+
<< memoryPartitionString(current_memory_config.mp_mode)
785+
<< ") is expected to be different than original ("
786+
<< memoryPartitionString(saved_orig_memory_partition)
787+
<< ") and equal to requested ("
788+
<< memoryPartitionString(new_memory_partition) << ")"
789+
<< std::endl;
790+
}
791+
ASSERT_STRNE(memoryPartitionString(current_memory_config.mp_mode).c_str(),
792+
memoryPartitionString(saved_orig_memory_partition).c_str());
793+
ASSERT_STREQ(memoryPartitionString(current_memory_config.mp_mode).c_str(),
794+
memoryPartitionString(new_memory_partition).c_str());
795+
} else {
796+
// if driver reload (and set) was successful, but not a new NPS mode
797+
IF_VERB(STANDARD) {
798+
std::cout << "\t**"
799+
<< "Since driver reload (and set) was successful, but no new NPS mode "
800+
<< "was requested; current memory partition ("
801+
<< memoryPartitionString(current_memory_config.mp_mode)
802+
<< ") is expected to be equal to original ("
803+
<< memoryPartitionString(saved_orig_memory_partition)
804+
<< ") and equal to requested ("
805+
<< memoryPartitionString(new_memory_partition) << ")"
806+
<< std::endl;
807+
}
808+
ASSERT_STREQ(memoryPartitionString(current_memory_config.mp_mode).c_str(),
809+
memoryPartitionString(saved_orig_memory_partition).c_str());
810+
ASSERT_STREQ(memoryPartitionString(current_memory_config.mp_mode).c_str(),
811+
memoryPartitionString(new_memory_partition).c_str());
812+
}
708813
} else {
709814
ASSERT_TRUE(ret_set == AMDSMI_STATUS_SUCCESS
710815
|| ret_set == AMDSMI_STATUS_INVAL
711816
|| ret_set == AMDSMI_STATUS_NOT_SUPPORTED);
712-
ASSERT_STRNE(memoryPartitionString(new_memory_partition).c_str(),
713-
memoryPartitionString(current_memory_config.mp_mode).c_str());
817+
// Since driver reload or set memory partition was not successful
818+
// we don't care about comparison
819+
// There are times when these can be equal or not
820+
IF_VERB(STANDARD) {
821+
std::cout << "\t**Since driver reload or set memory partition was NOT successful, "
822+
<< "we cannot guarantee current memory partition ("
823+
<< memoryPartitionString(current_memory_config.mp_mode)
824+
<< ") will or will not match requested ("
825+
<< memoryPartitionString(new_memory_partition) << ")"
826+
<< std::endl;
827+
}
714828
}
715829
} // END MEMORY PARTITION FOR LOOP
716830

0 commit comments

Comments
 (0)