From e03fedc3bd15eb4eada4c1620fdcfc3dd9f3c7ed Mon Sep 17 00:00:00 2001 From: Oskar Hubert Weber Date: Thu, 13 Nov 2025 21:52:33 +0100 Subject: [PATCH] Add indirect access memory stress test Signed-off-by: Oskar Hubert Weber --- .../common/src/stress_common_func.cpp | 4 +- .../test_multiple_memory_allocations.cl | 77 ++++++- .../test_multiple_memory_allocations.spv | Bin 8252 -> 24344 bytes .../src/test_memory_allocation.cpp | 188 +++++++++++++----- 4 files changed, 216 insertions(+), 53 deletions(-) diff --git a/stress_tests/common/src/stress_common_func.cpp b/stress_tests/common/src/stress_common_func.cpp index d028cffd7..c940d7891 100644 --- a/stress_tests/common/src/stress_common_func.cpp +++ b/stress_tests/common/src/stress_common_func.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ uint64_t total_available_host_memory() { stat.dwLength = sizeof(stat); GlobalMemoryStatusEx(&stat); - return stat.ullAvailVirtual; + return stat.ullAvailPhys; } uint64_t get_page_size() { SYSTEM_INFO si; diff --git a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl index 5cc7d8f9e..0779b40e4 100644 --- a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl +++ b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,3 +86,78 @@ kernel void test_device_memory10_unit_size4(__global uint *src, __global uint *d size_t tid = get_global_id(0); dst[tid] = src[tid]; } + +struct buffer { + uint *data; +}; + +kernel void test_device_memory1_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory2_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory3_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory4_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory5_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory6_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory7_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory8_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory9_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory10_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + diff --git a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv index fc0cb5370d8a9acc19a8bf194cef150314666e56..e21b3720827d6dd015d19361372a7cf59c2a7ece 100644 GIT binary patch literal 24344 zcmb812b^71)x{soB!LhJB=nj9p@a|!2`SXj5<>4Ch9Q~2KpK+?5`qXL* z)E2AtuI;^Gt-hA1Em&(M(%!4sdl&m6#lA+dugSg$tOeM2p4Bm9s~v{V?w(R(UJKo| z`qjVIkLk7Av5>kuW_Pzu={Tx$a!1?rj_EVI=8kBaGo!P+ZFc7|9V2S>^UOa!%io}q ze{z;TvXXy#mOrYJ|J^Kq!%F^nS^nrs{zX~-m`eWTS^n5c{?%FjxJv%@S^kLiV}rgw z{CfQ~&u`F2HNU<$XZhjvP58HE`Qi0V_`k{W!|R*y@6PhW>znW&$nwMMoA4ja^26(! z@Sn)?!|R*ypU(2b>znYO&+_BzJF4m5<12Z7Tjz`^on0N1yZKgV;{Pqn-=LEJc9uV~ zlK(-LKdO@d?<{}AO8ysF{^&}63pc#-{cTJof8i{DY$d;cmOrkNzf_jb>l!^>YwyjKsOd{}$e&|y9Q%Z$ZqE0$-p9XY3c+W$9b z*svZAE0y~*t>OMKqO4)%@{B4CO@pjqm2!VZL%8Q;Gz==wsM63h$QlNh`!gECeJrD4 z)$)ug4NZfrVYPC9MnkwaW;Co`o>8TtX^=GxDfeeIg!^qq!y4roRT`QGS;Ly;{)~oj z56@^=t30DhL(?E@Si9Vx(GY$vWHbydPSG{_p(DfeeIgr7AT4a3Sasx&kW>d`Qu zwr;sUqaptMN<`G}b&J=Am*-UJXdZ?T-7D8C_vdtk_dUbsbk67=I;`h^>-Wm_%QGr9 z#6f;NMwI(A8p17ZR9V9Yj_GsM63h$Qm{*_h&SOTUE4*06E8KcgYs z;xZaGDbJ|V&@{*zHZAvOG=y7RM#E<18C4pZ23f=A<^GI@aEr@m7+;=IrJ-q1kA?xY zEz12F4e|Dth$!xrTbAck>1ZB?kUq6#YFm~2b2{d|CFSWs*PO}SgASQ9b!rE1FOyVv zP~I)-{o>-at;@43wKWbHCX4FYwkh{#v^74j%#XGS<=Iu*!T}9!6U+S>ZH>=H^P_Fs z^6V;Y;edv=N#*{Gw#MhM`O&srd3Keya6m)b_T~PJw#H|^`O&sRd3Keya6m)bj^+N0 zw#Mhn`O&shd3Keya6m)b&gK4$w#H}K`O&sZd3Keya6m)buI2uWw#Mh?`O&spd3Key za6m)b?&bcBw#H}l`O&sVd3Keya6m)bp5^|GHg4N~9`HYk)4g}E^6V;Y@qoIx7M=~a za1Va<+*Y0!>FIH;*48$AR%h2yZClRioYp;Q#>8ne4{4t^X~t1AC%1QZ&YUrcN6PT} z@_)H!Eb+O7cr8q=)n<21uE+g|r_AmqZmlg!+%coOYc9L;{_K<6r%huc@5NVEtF?D^ zwa@LGasX68 z>KIBMb*w|4>R2~m)-fEctRuW-E_AF1&pOs8whqmSI!2O59XvuU=@=a_>lg!8))C$| z7dpnmvyO4Z*0B+B)UgS9)Uhdf>c{2*vySm#WgX$IbD?7kc-FBcv2|=k9Cd6%9(7D0 zPjze?Fzc8ER@M>TJ{LN+gJ&Jv6I;g)#8JmimtMCvnuVH+j^t4|%F%zkpfC{$OPt@h$WS?lUd51K?T5fyCBv5OLITFnQF`PM+$R z95CxR6fC17JaXu{pB@I!Iy#B1<8b1rLpRc>V>)@NV`jjtV-{FhN4THz4^Ms^3C}vZ zh^=Ebanvz~JnA@#Jk_BayfN$0&xNv%a6c_{90SifjwQB^j?MLLdUn@S;tAl)^Rd%)Nv|#)NvYls^j#4S;rY*WgX#uTIe_vo^_l> zY#rYrjyldJk2=mFPj#FdFzYxEtgIv4PYWIA!?TVHh^^y7;;7>y@~Got@>IvA0ke+F zz{)zp{j|_=IXvsQg4jB)B#t_+B9A(*CQo%-8!+p*4y>#r+)oP~KZIu;*ArXE4a8B$ zkIAEspOB|IZVZ@p+yqwE5$>mjj-SD^j-L}-$IZl1$1UVh$F1b4j@tre9k+v(b;SFr zewW!&y91te{EFB*eoY*8+({mF+(n-1_+7wybo8#>4c7C_VtCIZ4063+yq`UzZ{AyP z%JqA}1MKK)o7LT=TUs4^kR6>(*h9iPXU}Tyo_tta=M;FYwTHpt*YF5AZ)0b5&75AJ z^C)@q*U(z~z2>xc>&Hc%^8|a*qc5qBJ<0CA4eD5H?J0KkjV$t~*`r_2kh_O(f%=?h z1GZru>sxz{z3fZlkqG5_@AK@bFE4`UWu4CT5n@35!7ybG54@}987m-oS< zFaIQuzI;ILuaB_q%ZKdImygL)Up@hIpMCjP(Pv*iBagm(9(wG{e*j%=2ZP&}UzU6@B()J@V+w`k}|ZY!EQ}G7_xpOXKmI zeHjHW^<_h_)R)o15?{uEMPJ5}M_tn5qU(Vu;p05A1rB3SCnw!#u$CV@p?wj++z+f zb{rUAFJE~}?d!yRo%Q&HfS(xfQv!Z!z|RQynE^jL;O7MV{D5B&@QVX}Nx-iN_>}>_ zCg9fw{Dy%4DBw2+{HB2aBH*_K{Puv~5%4<$epkTn3HZGMe=y(=1^lsqKOXQW1O8ON zp9%P%1O7t5Uz~^cs=eBSH+?2~t*73Ey)LZj`?*)`@0s2=GQDqRdf&A&_YQeI z;aOSWkFVs1XIgvd=~zw!F&2Ng^FS?u}w z;q?vtxt09z`Ud`qmHhDf2L2h9{P6k){&|)B@cIV6o>%6--thVczMd=Q`Qh~qd_Dim z^TX>K_xm)Q!-!AS@QhmXD#OI`9q%H{0zeU z{@kSU{N~T*<`1sqH-Ba|e`F=U`Lmq)`h1hu)BG97e7*0_^PAuMo3Ho7d4BVIV)2*a zIU7F$YU7o#l=op@sGjxY`n~_4B43BP^2dsN8S)ll&CouGyp(m}qcg;r$}Wy@UE zi>JPg$ysrMdEB5ZA6`iu-e^ zrZoXfvli#lJk``MUzXq!t{!PIcL1@@6L&NEm5XcM7qG{2S^Ge0T2cFwGV7nzgS+EZmyRT^&s4i7V~O#WgS0K7^W9)V>C}wXaDm?Q5}H z``W}g?L)y;)3Tm*h&4|&Yad1|+?vc?7fk1gEA7g~H80h!_c>Zo`+DTozCN+E>nFdp z_k3pUnK=mD-|zbDs+yMd=qHNisb=l^tSa1^%pDD;^Td^Q<>H!`YS()%t*Cu0xwVfY zmiCRQVrk!j z-P(5~&S~EXTs1B0*_l}LRI~P7h=p5|xx0etJaMI6xwz(~-!r>W(~8=6C%5)Jh^2i` zc5B~@IH!GYaMiS|XCGqCQ_b4H!`YTuulR@8m~xwRiiEbRxe zTYDREPW!>&s%cqIJF(^+%w_F|5DT{^b0>r8yo0%drrZtsIdOO$;3vnm0 zv>(oH?MD#jv`+(9O-p(-YdV+asiyuZ>@&E8OH(VCxig7%p19JkTwL=~?X#$9MeRqD z`|Ic;miF1~*4|B=(>@1WH7)Boidgehv-YnM3%4e7j|S6u;!3-6am`D$&!wgnwI4%n z?Z*;J`*G~n{&nJ<_HTfzre!_fB-T9Dto?Xm;nrmC31B);TxnN+JePT?_7kaTMeW}r zxAv2WrTt`fYd?iJr~OoL)wHbVG-AzD&Dy_BEZmyRJsnKvi7V~O#WgS0eg-wIsQpZG zYd?!v+P}kY?cXKNX+IlWH7)BohgkDev-a;13%4e7&jr(Y;!3-6am`D$pGQqAYCoUc z+AkoM_6ymq{rki@?H7Tore!@B6KkGo)_w`GaBDL6QZSt-uCyx`*Su8wWz@8y_RGnw z{R(1fzmnbBe?XkmeigWCTGn$lvF52}?bi?swHe+;ggmi7FESo2h~_MZ|9w1oum-ODt z{y>NyEaEBb4{_=1c$oc2ej;k-BL%MW)%z%y`h`#6=jh|?zvmMESaIoVxQF-;T&ii# zLR_Bt1hLNY%qNMZL%3%?MJzm>`A0C-Jkx&v2`uOL)8MLU={)IrhD+L1v)_L%aLsMy zGWS_xohh#TRxYl2so&30(~5pSPj2sDC}R2jBA5JriQV43T;TTR6>{|pw>Pg63s1dy z4NNtAng1ddp3eL?m};JBzdr@b`TZHVYFhU2b7E;z&3^xf zSh#&O_rG8|Q(XD2TwL=~zrUcS75)B_+}?jx#PVAYtK|0r?Dl3sV&V2if3u~2;r6DL zSa|A9FEG{Yjc4`-i!=L>ONVgJT!>hBI&)z#)jZRF_XW%Oy$HB!TK2G?xLm5)??s7) z+edR31Jjw}%5UZ3nwR>m=hRx!@5RCFxBf;_j9r9-%9E>A2xow)*-YMyDoR|L!Xy%M-;TJ~^da%oe|ey>6- z+&-E+2ux>+E5DVCYhLR2U}{>??^VI<_i9BfzgH)g-$U5#%^JkQ?f06*>KATr)*=?3 zdb2i|YWBu6hl0hK>yS%_aL*h@EIggLE|_YbX}^bq<@{a`Ts19wxIVeGsb;@N5DT}D z=57F{GsTtP%EdJ=^;^Fu(TaYL0<+)xn>K%ck0zGiW7zG@SYqM!dmOR)h1;8rh=r%# zYz(HFz46RVz~anJ$)!WMXKqF;Je|2Ym};JBzsG~+{N4gwH7$F%CAqYzX1}*07H%KS z-5N}1iYvdBi)&u$_cqkDqTdt1?DxbX*7x_e#PWL*yS>?tSh)S(UNM(&d$R+v@YI_f z!Bn$1p1BiPoVhc(bO`s%U5JIJGj|13%`@%yZeTgTcL!HZ%O37QE^VsW?>&iy+edTv z0@In|%5UZ3nwR>m-_vPDzxM&N-}@G^{N9gPe(%q2Zw??9ZodyCR=;q2a}crc)SEUi z)$EOD9t;*|wv$VTaL+u1Sa>>fGMH+fX}_m{<^1jdS53K%)$I47#KP^Pxrc%2 zOmXG6a&gT|{qCfu75zRO%zhtH#PWL@vHYISZf|A~3%B1hiPbOM-pnEvo_cd6m}>UM zGrPdz%-Q79A>1>&iG`;#=YXl^nfCiAu$rkqjstW2bz*t^4fb(-F6>SHo5Xz!p6cn$pUxU|MnJ8DO4yCb4Fo z#qOEkAs#}Hc&eu}zYC@nXPyn_ndcB|=J(h=^IYQL^oXZ=I`cd*tvK_1FweYzSTiqV z_ss9t>pX{JS3RA15m-9&;(AZd_rN8@nt3U^XI@4;o+j~BPiI~ZrWJKw0cM?75^LrU z*gf+q;z{&~r+PZ`YA~%h^BOSEyp~uquVeSj9}@3Ik9ex5Gp`5JiZgEj^UNO+YvzyH zJ@Y5T`_m(y>gmj%f@w|V(wq+V8^IjkM68)VWB1IT6CYaeR8ME#45k%l{sPQ1Zz0yq zTiHGHm&CK^5l{7W=51hFapvt{o_PncX8wxZGk;Aymmcv{PiOuHOe@a36U;O3BG$~` zvU}$5h)<+PJk`^gcY|rgnfHKs=Doz4c^|uH-cNi6J>sdJ&U^q&E6#in%rhS%*35_5 zJ@XOb^XL&z^>pT=U|MnJV_=^7II(8_p4~J5KztcJ;;Ej_d;&}>&U_NgGoK>X%s;Yw z=AVeKqenc|)0t0$X~mh(fO+Phi8b?CcF%l{SkHaMQ$3yeJeXFT`2v_{zDTT@FR^>( z%fz?SBcAH%%vZp);?MtA!8B9PFJCKS{rrEOSo8nFK90Y%=?(T*;``avBd$2<8Gi%Q zif8;C%xAn&_mVx931cAxPU@pIJFBd$2<8E=DW#WUUk^BM0JvCeppSZBP??lb;L ztY>5D5m%h_j1R!H;u#-;`HYW>SZ91ptTR4g_Zj~p*7G3sh$~Kd#=pU|;u)WU`Hats zSZ91rtTX<@?lb;N++R4CxZxtDR*$&iq-W^AJFFGY(BF#t4E>F`&(Pn7 z>x@O&ea2$M+i-?@#1$t!L;tPf^bGw?xzEtwlKTw(4Y|%3$nGeUJZ_j;({^s0g=x@z+#&YaFV|n6E&QOoI;`ogJ11NTxg8%>k delta 1085 zcmYk4OK1~O6oyZlbQ&|6nIz&&(o7y|TTz6Gf{C@7q}m5A47#uzq0UA(LJ6ol-=G^I zQjbF26x|4QQCi$6gotlzZR;CcRCn$aSMCJwJ(D}V3_r|w&VSDN8D^>#Xjb)L$L218 z0O+8=7nszyO;)$#oY|)W7!YMefc1EJlZA&ZeVt#!C)O6p(Tnpv7P?8C7d@m z;6y4W?f8+B9S6x#F;rX)82G1KPH>F%Ow6lMtaNBMZMjvaaCdF_w2_e)U!ibw8QGD` z%8pxfx|Nl~IXT&Jza1`$t)Pz8yqw@sJDiHykfL-!9G(KsMtadv(xEs!18gW&_VOIC z9Cff?i8FY?t3BvwYP;~#E0n`F>%Ib<4)dAUfHhsryzwmj6M0J(yh1HvhZ69XA01*d zi{g3&BQb*lz2ip5$0eSGuV3~SIGTphm`2tWuT&1JzWqH*A?2dd?P7z;OuIPE)MvXm z%T&y`*kT&Vy12|VmUHnt)5*Mxzw_hiQ3ydN+t2wI z8c{=(L}%Sj{8pN2vqgM+x8OYSk$%BjiM2g~cM>o7JV1PPK=g-*2lom-MEuX^G2+Mj zME^MP*|OkM#5*d2&k+Ce`8@GVRrIeBpBNN;gLvb9!BfPmKHnq0b3pVT5FZ^9{E#?4 LEVxO$G<^C$1p3fj diff --git a/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp b/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp index 9cfcb3c37..f1ba65512 100644 --- a/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp +++ b/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -25,11 +25,20 @@ using lzt::to_u32; class zeDriverMemoryAllocationStressTest : public ::testing::Test, public ::testing::WithParamInterface< - std::tuple> { + std::tuple> { protected: typedef uint32_t kernel_copy_unit_t; const size_t kernel_copy_unit_size = sizeof(kernel_copy_unit_t); + struct MemoryAllocationTestArguments : public TestArguments_t { + bool immediate; + bool indirect_access; + } test_arguments_; + + struct Buffer { + kernel_copy_unit_t *data; + }; + bool verify_results(kernel_copy_unit_t *allocation, uint64_t test_single_allocation_count) { for (uint64_t i = 0; i < test_single_allocation_count; i++) { @@ -42,6 +51,7 @@ class zeDriverMemoryAllocationStressTest } return false; } + void dispatch_kernels( const ze_device_handle_t device, ze_memory_type_t memory_type, ze_module_handle_t module_handle, @@ -51,10 +61,50 @@ class zeDriverMemoryAllocationStressTest const std::vector &test_kernel_names, uint32_t number_of_dispatch, uint64_t one_case_allocation_count, ze_context_handle_t context) { + auto cmd_bundle = lzt::create_command_bundle( + context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0, 0, 0, test_arguments_.immediate); std::vector test_functions; - ze_command_list_handle_t command_list = - lzt::create_command_list(context, device, 0); + [[maybe_unused]] std::vector host_src_ptrs(number_of_dispatch); + [[maybe_unused]] std::vector host_dst_ptrs(number_of_dispatch); + [[maybe_unused]] kernel_copy_unit_t *src_allocation_ptrs = nullptr; + allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + [[maybe_unused]] kernel_copy_unit_t *dst_allocation_ptrs = nullptr; + allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + + if (test_arguments_.indirect_access) { + src_allocation_ptrs = allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + dst_allocation_ptrs = allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + for (uint32_t i = 0; i < number_of_dispatch; i++) { + host_src_ptrs[i].data = src_allocations[i]; + host_dst_ptrs[i].data = dst_allocations[i]; + } + + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { + lzt::append_memory_copy(cmd_bundle.list, src_allocation_ptrs, + host_src_ptrs.data(), + number_of_dispatch * sizeof(void *), nullptr); + lzt::append_memory_copy(cmd_bundle.list, dst_allocation_ptrs, + host_dst_ptrs.data(), + number_of_dispatch * sizeof(void *), nullptr); + lzt::append_barrier(cmd_bundle.list); + } else { + std::memcpy(src_allocation_ptrs, host_src_ptrs.data(), + number_of_dispatch * sizeof(void *)); + std::memcpy(dst_allocation_ptrs, host_dst_ptrs.data(), + number_of_dispatch * sizeof(void *)); + } + } + for (uint64_t dispatch_id = 0; dispatch_id < number_of_dispatch; dispatch_id++) { @@ -65,51 +115,76 @@ class zeDriverMemoryAllocationStressTest lzt::create_function(module_handle, test_kernel_names[dispatch_id]); lzt::set_group_size(kernel_handle, workgroup_size_x_, 1, 1); - lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation), - &src_allocation); - lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation), - &dst_allocation); + + if (test_arguments_.indirect_access) { + switch (test_arguments_.memory_type) { + case ZE_MEMORY_TYPE_DEVICE: + lzt::kernel_set_indirect_access( + kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE); + break; + case ZE_MEMORY_TYPE_HOST: + lzt::kernel_set_indirect_access(kernel_handle, + ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); + break; + case ZE_MEMORY_TYPE_SHARED: + lzt::kernel_set_indirect_access( + kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED); + break; + default: + break; + } + lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation_ptrs), + &src_allocation_ptrs); + lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation_ptrs), + &dst_allocation_ptrs); + lzt::set_argument_value(kernel_handle, 2, sizeof(uint32_t), + &dispatch_id); + } else { + lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation), + &src_allocation); + lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation), + &dst_allocation); + } uint32_t group_count_x = to_u32(one_case_allocation_count / workgroup_size_x_); ze_group_count_t thread_group_dimensions = {group_count_x, 1, 1}; - lzt::append_memory_fill( - command_list, src_allocation, &init_value_2_, sizeof(init_value_2_), - one_case_allocation_count * kernel_copy_unit_size, nullptr); + lzt::append_memory_fill(cmd_bundle.list, src_allocation, &init_value_2_, + sizeof(init_value_2_), + one_case_allocation_count * kernel_copy_unit_size, + nullptr); - lzt::append_memory_fill( - command_list, dst_allocation, &init_value_3_, sizeof(init_value_3_), - one_case_allocation_count * kernel_copy_unit_size, nullptr); + lzt::append_memory_fill(cmd_bundle.list, dst_allocation, &init_value_3_, + sizeof(init_value_3_), + one_case_allocation_count * kernel_copy_unit_size, + nullptr); - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); - lzt::append_launch_function(command_list, kernel_handle, + lzt::append_launch_function(cmd_bundle.list, kernel_handle, &thread_group_dimensions, nullptr, 0, nullptr); - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); if (memory_type == ZE_MEMORY_TYPE_DEVICE) { lzt::append_memory_copy( - command_list, data_out[dispatch_id].data(), dst_allocation, + cmd_bundle.list, data_out[dispatch_id].data(), dst_allocation, one_case_allocation_count * kernel_copy_unit_size, nullptr); } - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); test_functions.push_back(kernel_handle); } - ze_command_queue_handle_t command_queue = lzt::create_command_queue( - context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0); + if (!test_arguments_.immediate) { + lzt::close_command_list(cmd_bundle.list); + } - lzt::close_command_list(command_list); - lzt::execute_command_lists(command_queue, 1, &command_list, nullptr); - lzt::synchronize(command_queue, UINT64_MAX); + lzt::execute_and_sync_command_bundle(cmd_bundle, UINT64_MAX); + lzt::destroy_command_bundle(cmd_bundle); - lzt::destroy_command_queue(command_queue); - lzt::destroy_command_list(command_list); for (uint64_t dispatch_id = 0; dispatch_id < test_functions.size(); dispatch_id++) { EXPECT_ZE_RESULT_SUCCESS(zeKernelDestroy(test_functions[dispatch_id])); @@ -121,17 +196,21 @@ class zeDriverMemoryAllocationStressTest kernel_copy_unit_t init_value_1_ = 0; kernel_copy_unit_t init_value_2_ = 0xAAAAAAAA; // 1010 1010 kernel_copy_unit_t init_value_3_ = 0x55555555; // 0101 0101 + bool indirect_access = false; + bool immediate = false; }; LZT_TEST_P( zeDriverMemoryAllocationStressTest, AlocateFullAvailableMemoryNumberOfKernelDispatchesDependsOnUserChunkAllocaitonRequest) { - TestArguments_t test_arguments = { + test_arguments_ = { std::get<0>(GetParam()), // total memory size limit std::get<1>(GetParam()), // one allocation size limit std::get<2>(GetParam()), // dispatch multiplier - std::get<3>(GetParam()) // memory type + std::get<3>(GetParam()), // memory type + std::get<4>(GetParam()), // immediate + std::get<5>(GetParam()) // indirect access }; auto driver = lzt::get_default_driver(); @@ -139,14 +218,14 @@ LZT_TEST_P( auto device = lzt::get_default_device(driver); ze_device_properties_t device_properties = lzt::get_device_properties(device); - test_arguments.print_test_arguments(device_properties); + test_arguments_.print_test_arguments(device_properties); std::vector device_memory_properties = lzt::get_memory_properties(device); const uint32_t used_vectors_in_test = - test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3; - uint32_t number_of_dispatches = to_u32(test_arguments.multiplier); + test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3; + uint32_t number_of_dispatches = to_u32(test_arguments_.multiplier); uint64_t number_of_all_allocations = used_vectors_in_test * number_of_dispatches; uint64_t test_single_allocation_memory_size = 0; @@ -156,15 +235,16 @@ LZT_TEST_P( adjust_max_memory_allocation( driver, device_properties, device_memory_properties, test_total_memory_size, test_single_allocation_memory_size, - number_of_all_allocations, test_arguments, relax_memory_capability); + number_of_all_allocations, test_arguments_, relax_memory_capability); if (number_of_all_allocations != used_vectors_in_test * number_of_dispatches) { LOG_INFO << "Need to limit dispatches from : " << number_of_dispatches << " to: " << number_of_all_allocations / used_vectors_in_test; - number_of_dispatches = - to_u32(number_of_all_allocations / used_vectors_in_test); // bacause number_of_all_allocations can change; + number_of_dispatches = to_u32( + number_of_all_allocations / + used_vectors_in_test); // bacause number_of_all_allocations can change; } if (test_single_allocation_memory_size < kernel_copy_unit_size) { @@ -197,10 +277,10 @@ LZT_TEST_P( for (uint32_t dispatch_id = 0; dispatch_id < number_of_dispatches; dispatch_id++) { kernel_copy_unit_t *input_allocation = allocate_memory( - context, device, test_arguments.memory_type, + context, device, test_arguments_.memory_type, test_single_allocation_memory_size, relax_memory_capability); kernel_copy_unit_t *output_allocation = allocate_memory( - context, device, test_arguments.memory_type, + context, device, test_arguments_.memory_type, test_single_allocation_memory_size, relax_memory_capability); if (input_allocation == nullptr || output_allocation == nullptr) { LOG_WARNING << "Cannot allocate " @@ -217,18 +297,22 @@ LZT_TEST_P( } input_allocations.push_back(input_allocation); output_allocations.push_back(output_allocation); - if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) { + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { std::vector data_out( test_single_allocation_count * kernel_copy_unit_size, init_value_1_); data_out_vector.push_back(data_out); } - std::string kernel_name; - kernel_name = - "test_device_memory" + - std::to_string((dispatch_id % number_of_kernels_in_module_) + 1) + - "_unit_size" + std::to_string(kernel_copy_unit_size); - test_kernel_names.push_back(kernel_name); + std::stringstream kernel_name_ss; + kernel_name_ss << "test_device_memory" + + std::to_string( + (dispatch_id % number_of_kernels_in_module_) + 1); + if (test_arguments_.indirect_access) { + kernel_name_ss << "_indirect"; + } else { + kernel_name_ss << "_unit_size" << std::to_string(kernel_copy_unit_size); + } + test_kernel_names.push_back(kernel_name_ss.str()); } LOG_INFO << "call create module"; @@ -238,7 +322,7 @@ LZT_TEST_P( nullptr); LOG_INFO << "call dispatch_kernels"; - dispatch_kernels(device, test_arguments.memory_type, module_handle, + dispatch_kernels(device, test_arguments_.memory_type, module_handle, input_allocations, output_allocations, data_out_vector, test_kernel_names, number_of_dispatches, test_single_allocation_count, context); @@ -247,7 +331,7 @@ LZT_TEST_P( bool memory_test_failure = false; uint32_t counter = 0; - if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) { + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { for (auto output : data_out_vector) { counter++; memory_test_failure |= @@ -290,30 +374,34 @@ struct CombinationsTestNameSuffix { std::stringstream ss; ss << "dispatches_" << std::get<2>(info.param); ss << "_memoryType_" << print_allocation_type(std::get<3>(info.param)); + ss << (std::get<4>(info.param) ? "_immediate" : ""); + ss << (std::get<5>(info.param) ? "_indirectAccess" : ""); return ss.str(); } }; std::vector multiple_dispatches = {1, 10, 1000, 5000, 10000}; -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( TestAllocationMemoryMatrixMaxMemory, zeDriverMemoryAllocationStressTest, ::testing::Combine(::testing::Values(hundred_percent), ::testing::Values(hundred_percent), ::testing::ValuesIn(multiple_dispatches), ::testing::Values(ZE_MEMORY_TYPE_HOST, ZE_MEMORY_TYPE_SHARED, - ZE_MEMORY_TYPE_DEVICE)), + ZE_MEMORY_TYPE_DEVICE), + ::testing::Bool(), ::testing::Bool()), CombinationsTestNameSuffix()); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( TestAllocationMemoryMatrixMinMemory, zeDriverMemoryAllocationStressTest, ::testing::Combine(::testing::Values(hundred_percent), ::testing::Values(ten_percent), ::testing::ValuesIn(multiple_dispatches), ::testing::Values(ZE_MEMORY_TYPE_HOST, ZE_MEMORY_TYPE_SHARED, - ZE_MEMORY_TYPE_DEVICE)), + ZE_MEMORY_TYPE_DEVICE), + ::testing::Bool(), ::testing::Bool()), CombinationsTestNameSuffix()); } // namespace