2020 * THE SOFTWARE.
2121 */
2222
23- #include < cstdint>
23+ #include < gtest/gtest.h>
24+ #include < sys/stat.h>
2425
2526#include < cstdint>
2627#include < iostream>
2728#include < string>
2829#include < map>
2930#include < limits>
3031
31- #include < gtest/gtest.h>
3232#include " ../test_base.h"
3333#include " ../test_common.h"
3434#include " amd_smi/amdsmi.h"
@@ -92,9 +92,31 @@ void ReloadDriverWithMessages(bool isVerbose,
9292 // 2) Containers must run with extra parameters:
9393 // --cap-add=SYS_ADMIN -v /lib/modules:/lib/modules
9494 // See: https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/setup-docker-container.html
95- #if 0
96- ASSERT_EQ(driver_reload_status, AMDSMI_STATUS_SUCCESS);
97- #endif
95+ // 3) Require kmod to be installed within the docker container
96+ // (if ASIC supports memory partitions)
97+ }
98+
99+ // Helper function to check if kmod is available
100+ bool IsKmodInstalled () {
101+ // One time check for modprobe existence
102+ static bool installed = [] {
103+ // Check common paths for modprobe
104+ constexpr std::array<const char *, 4 > paths = {
105+ " /usr/sbin/modprobe" ,
106+ " /sbin/modprobe" ,
107+ " /usr/bin/modprobe" ,
108+ " /bin/modprobe"
109+ };
110+
111+ struct stat st;
112+ for (const auto & path : paths) {
113+ if (stat (path, &st) == 0 && (st.st_mode & S_IXUSR)) {
114+ return true ;
115+ }
116+ }
117+ return false ;
118+ }();
119+ return installed;
98120}
99121
100122TestMemoryPartitionReadWrite::TestMemoryPartitionReadWrite () : TestBase() {
@@ -420,6 +442,7 @@ void TestMemoryPartitionReadWrite::Run(void) {
420442 uint32_t num_devices_to_test = current_num_devices;
421443 for (uint32_t dv_ind = 0 ; dv_ind < num_devices_to_test; ++dv_ind) {
422444 bool wasSetSuccess = false ;
445+ bool isNewNPSMode = false ;
423446 if (dv_ind != 0 ) {
424447 IF_VERB (STANDARD) {
425448 std::cout << std::endl;
@@ -632,6 +655,20 @@ void TestMemoryPartitionReadWrite::Run(void) {
632655 }
633656 ASSERT_TRUE ((ret_caps == AMDSMI_STATUS_NOT_SUPPORTED) ||
634657 (ret_caps == AMDSMI_STATUS_SUCCESS));
658+ // Save original memory partition
659+ amdsmi_memory_partition_type_t saved_orig_memory_partition = current_memory_config.mp_mode ;
660+ // Detect if we're changing to a different NPS mode
661+ if (ret_caps == AMDSMI_STATUS_SUCCESS) {
662+ isNewNPSMode = (current_memory_config.mp_mode != new_memory_partition);
663+ IF_VERB (STANDARD) {
664+ std::cout << " \t **" << " NPS mode change detected: "
665+ << (isNewNPSMode ? " YES" : " NO" )
666+ << " (current (Saved): |"
667+ << memoryPartitionString (current_memory_config.mp_mode )
668+ << " | -> Requested: |" << memoryPartitionString (new_memory_partition) << " |)"
669+ << std::endl;
670+ }
671+ }
635672
636673 ret_set = amdsmi_set_gpu_memory_partition_mode (processor_handles_[dv_ind],
637674 new_memory_partition);
@@ -682,6 +719,30 @@ void TestMemoryPartitionReadWrite::Run(void) {
682719 if (driver_reload_status == AMDSMI_STATUS_SUCCESS) {
683720 wasSetSuccess = true ;
684721 }
722+ if (driver_reload_status == AMDSMI_STATUS_AMDGPU_RESTART_ERR) {
723+ // Check kmod availability for driver reload operations
724+ // This is required in order to fully test changing memory partitions works
725+
726+ bool kmod_available = IsKmodInstalled ();
727+
728+ IF_VERB (STANDARD) {
729+ std::cout << " \t ** kmod (modprobe) installed: " << (kmod_available ? " YES" : " NO" )
730+ << std::endl;
731+ }
732+
733+ if (!kmod_available) {
734+ IF_VERB (STANDARD) {
735+ std::cout << " ** ERROR: kmod is not installed. "
736+ << " This device has been detected as supporting memory partitions. "
737+ << " \n ** Memory partition tests require kmod for "
738+ << " driver reload operations to fully validate functionality. "
739+ << " \n ** Install with: apt-get install kmod (Debian/Ubuntu) "
740+ << " or dnf install kmod (RHEL) **"
741+ << std::endl;
742+ }
743+ ASSERT_TRUE (IsKmodInstalled ());
744+ }
745+ }
685746 }
686747
687748 ret = amdsmi_get_gpu_memory_partition_config (processor_handles_[dv_ind],
@@ -700,17 +761,70 @@ void TestMemoryPartitionReadWrite::Run(void) {
700761 << memoryPartitionString (current_memory_config.mp_mode )
701762 << std::endl;
702763 }
703- if (wasSetSuccess) {
764+ IF_VERB (STANDARD) {
765+ std::cout << " \t **WasSetSuccess (Set Memory Partition AND Driver reload was successful): "
766+ << (wasSetSuccess ? " true" : " false" )
767+ << " , isNewNPSMode: " << (isNewNPSMode ? " true" : " false" )
768+ << " \n\t **Saved Memory Partition: "
769+ << memoryPartitionString (saved_orig_memory_partition)
770+ << " \n\t **Current Memory Partition: "
771+ << memoryPartitionString (current_memory_config.mp_mode )
772+ << " \n\t **Requested Memory Partition: "
773+ << memoryPartitionString (new_memory_partition)
774+ << std::endl;
775+ }
776+
777+ if (wasSetSuccess) { // driver reload was successful
704778 ASSERT_EQ (AMDSMI_STATUS_SUCCESS, ret_set);
705- ASSERT_STREQ (memoryPartitionString (new_memory_partition).c_str (),
706- memoryPartitionString (current_memory_config.mp_mode ).c_str ());
707779 CHK_ERR_ASRT (ret_set)
780+ if (isNewNPSMode) {
781+ IF_VERB (STANDARD) {
782+ std::cout << " \t **Since driver reload (and set) was successful and a new NPS mode "
783+ << " was requested; current memory partition ("
784+ << memoryPartitionString (current_memory_config.mp_mode )
785+ << " ) is expected to be different than original ("
786+ << memoryPartitionString (saved_orig_memory_partition)
787+ << " ) and equal to requested ("
788+ << memoryPartitionString (new_memory_partition) << " )"
789+ << std::endl;
790+ }
791+ ASSERT_STRNE (memoryPartitionString (current_memory_config.mp_mode ).c_str (),
792+ memoryPartitionString (saved_orig_memory_partition).c_str ());
793+ ASSERT_STREQ (memoryPartitionString (current_memory_config.mp_mode ).c_str (),
794+ memoryPartitionString (new_memory_partition).c_str ());
795+ } else {
796+ // if driver reload (and set) was successful, but not a new NPS mode
797+ IF_VERB (STANDARD) {
798+ std::cout << " \t **"
799+ << " Since driver reload (and set) was successful, but no new NPS mode "
800+ << " was requested; current memory partition ("
801+ << memoryPartitionString (current_memory_config.mp_mode )
802+ << " ) is expected to be equal to original ("
803+ << memoryPartitionString (saved_orig_memory_partition)
804+ << " ) and equal to requested ("
805+ << memoryPartitionString (new_memory_partition) << " )"
806+ << std::endl;
807+ }
808+ ASSERT_STREQ (memoryPartitionString (current_memory_config.mp_mode ).c_str (),
809+ memoryPartitionString (saved_orig_memory_partition).c_str ());
810+ ASSERT_STREQ (memoryPartitionString (current_memory_config.mp_mode ).c_str (),
811+ memoryPartitionString (new_memory_partition).c_str ());
812+ }
708813 } else {
709814 ASSERT_TRUE (ret_set == AMDSMI_STATUS_SUCCESS
710815 || ret_set == AMDSMI_STATUS_INVAL
711816 || ret_set == AMDSMI_STATUS_NOT_SUPPORTED);
712- ASSERT_STRNE (memoryPartitionString (new_memory_partition).c_str (),
713- memoryPartitionString (current_memory_config.mp_mode ).c_str ());
817+ // Since driver reload or set memory partition was not successful
818+ // we don't care about comparison
819+ // There are times when these can be equal or not
820+ IF_VERB (STANDARD) {
821+ std::cout << " \t **Since driver reload or set memory partition was NOT successful, "
822+ << " we cannot guarantee current memory partition ("
823+ << memoryPartitionString (current_memory_config.mp_mode )
824+ << " ) will or will not match requested ("
825+ << memoryPartitionString (new_memory_partition) << " )"
826+ << std::endl;
827+ }
714828 }
715829 } // END MEMORY PARTITION FOR LOOP
716830
0 commit comments