@@ -144,7 +144,7 @@ bool testUSM(queue Q, uint32_t Groups, uint32_t Threads,
144144 return Passed;
145145}
146146
147- template <typename T, bool TestPVCFeatures > bool testUSM (queue Q) {
147+ template <typename T, TestFeatures Features > bool testUSM (queue Q) {
148148 constexpr bool CheckMerge = true ;
149149 constexpr bool CheckMask = true ;
150150 constexpr bool CheckProperties = true ;
@@ -154,7 +154,7 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
154154
155155 bool Passed = true ;
156156
157- // Test block_load() that is available on Gen12 and PVC.
157+ // Test block_load() that is available on Gen12, DG2 and PVC.
158158 Passed &= testUSM<T, 1 , !CheckMask, !CheckMerge, CheckProperties>(
159159 Q, 2 , 4 , AlignElemProps);
160160 Passed &= testUSM<T, 2 , !CheckMask, !CheckMerge, CheckProperties>(
@@ -196,53 +196,56 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
196196 Passed &= testUSM<T, 32 , !CheckMask, !CheckMerge, !CheckProperties>(
197197 Q, 2 , 4 , Align16Props);
198198
199- if constexpr (TestPVCFeatures) {
200- // Using mask or cache hints adds the requirement to run tests on PVC.
201- // Also, PVC variant currently requires a) power-or-two elements,
199+ if constexpr (Features == TestFeatures::PVC ||
200+ Features == TestFeatures::DG2) {
201+ // Using mask or cache hints adds the requirement to run tests on DG2/PVC.
202+ // Also, DG2/DG2/PVC variant currently requires a) power-or-two elements,
202203 // b) the number of bytes loaded per call must not exceed 512,
203204 // c) the alignment of USM ptr + offset to be 4 or 8-bytes(for 8-byte
204205 // element vectors).
205206
206207 constexpr size_t RequiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
207- properties PVCProps {cache_hint_L1<cache_hint::streaming>,
208- cache_hint_L2<cache_hint::cached>,
209- alignment<RequiredAlignment>};
208+ properties DG2OrPVCProps {cache_hint_L1<cache_hint::streaming>,
209+ cache_hint_L2<cache_hint::cached>,
210+ alignment<RequiredAlignment>};
210211
211212 // Only d/q-words are supported now.
212213 // Thus we use this I32Factor for testing purposes and convenience.
213214 constexpr int I32Factor =
214215 std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
215216 Passed &=
216217 testUSM<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
217- Q, 2 , 4 , PVCProps );
218+ Q, 2 , 4 , DG2OrPVCProps );
218219 Passed &=
219220 testUSM<T, 2 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
220- Q, 5 , 5 , PVCProps );
221+ Q, 5 , 5 , DG2OrPVCProps );
221222 Passed &=
222223 testUSM<T, 4 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
223- Q, 5 , 5 , PVCProps );
224+ Q, 5 , 5 , DG2OrPVCProps );
224225 Passed &=
225226 testUSM<T, 8 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
226- Q, 5 , 5 , PVCProps );
227+ Q, 5 , 5 , DG2OrPVCProps );
227228 Passed &=
228229 testUSM<T, 16 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
229- Q, 5 , 5 , PVCProps );
230+ Q, 5 , 5 , DG2OrPVCProps );
230231 Passed &=
231232 testUSM<T, 32 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
232- Q, 2 , 4 , PVCProps );
233+ Q, 2 , 4 , DG2OrPVCProps );
233234
234235 // This call (potentially) and the next call (guaranteed) load the biggest
235236 // load-able chunk, which requires loading with 8-byte elements, which
236237 // requires the alignment to be 8-bytes or more.
237238 properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
238239 cache_hint_L2<cache_hint::cached>, alignment<8 >};
239- Passed &=
240- testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
241- Q, 7 , 1 , PVCAlign8Props);
242- if constexpr (sizeof (T) <= 4 )
240+ if constexpr (Features == TestFeatures::PVC) {
243241 Passed &=
244- testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
245- Q, 1 , 4 , PVCAlign8Props);
242+ testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
243+ Q, 7 , 1 , PVCAlign8Props);
244+ if constexpr (sizeof (T) <= 4 )
245+ Passed &=
246+ testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
247+ Q, 1 , 4 , PVCAlign8Props);
248+ }
246249 } // TestPVCFeatures
247250
248251 return Passed;
@@ -350,7 +353,7 @@ bool testACC(queue Q, uint32_t Groups, uint32_t Threads,
350353 return Passed;
351354}
352355
353- template <typename T, bool TestPVCFeatures > bool testACC (queue Q) {
356+ template <typename T, TestFeatures Features > bool testACC (queue Q) {
354357 constexpr bool CheckMerge = true ;
355358 constexpr bool CheckMask = true ;
356359 constexpr bool CheckProperties = true ;
@@ -361,7 +364,7 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
361364
362365 bool Passed = true ;
363366
364- // Test block_load() that is available on Gen12 and PVC:
367+ // Test block_load() that is available on Gen12, DG2 and PVC:
365368 // 1, 2, 4 or 8 16-byte loads.
366369 constexpr int NElemsInOword = 16 / sizeof (T);
367370 Passed &= testACC<T, NElemsInOword, !CheckMask, !CheckMerge, CheckProperties>(
@@ -381,18 +384,19 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
381384 testACC<T, NElemsInOword, !CheckMask, !CheckMerge, !CheckProperties>(
382385 Q, 2 , 4 , Align16Props);
383386
384- if constexpr (TestPVCFeatures) {
385- // Using mask or cache hints adds the requirement to run tests on PVC.
386- // Also, PVC variant currently requires power-or-two elements and
387+ if constexpr (Features == TestFeatures::PVC ||
388+ Features == TestFeatures::DG2) {
389+ // Using mask or cache hints adds the requirement to run tests on DG2/PVC.
390+ // Also, DG2/PVC variant currently requires power-or-two elements and
387391 // the number of bytes loaded per call must not exceed 512.
388392
389393 constexpr int I32Factor =
390394 std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
391- properties PVCProps {cache_hint_L1<cache_hint::streaming>,
392- cache_hint_L2<cache_hint::cached>,
393- alignment<RequiredAlignment>};
395+ properties DG2OrPVCProps {cache_hint_L1<cache_hint::streaming>,
396+ cache_hint_L2<cache_hint::cached>,
397+ alignment<RequiredAlignment>};
394398
395- // Test block_load() that is available on PVC:
399+ // Test block_load() that is available on DG2/ PVC:
396400 // 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
397401 Passed &=
398402 testACC<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
@@ -404,29 +408,31 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
404408 testACC<T, 3 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
405409 Q, 2 , 8 , MinReqAlignProps);
406410 Passed &= testACC<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
407- Q, 2 , 4 , PVCProps );
411+ Q, 2 , 4 , DG2OrPVCProps );
408412 Passed &= testACC<T, 8 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
409413 Q, 2 , 4 , MinReqAlignProps);
410414 Passed &=
411415 testACC<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
412416 Q, 2 , 4 , MinReqAlignProps);
413417 Passed &=
414418 testACC<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
415- Q, 2 , 4 , PVCProps );
419+ Q, 2 , 4 , DG2OrPVCProps );
416420
417421 // This call (potentially) and the next call (guaranteed) load the biggest
418422 // load-able chunk, which requires loading with 8-byte elements, which
419423 // requires the alignment to be 8-bytes or more.
420424 properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
421425 cache_hint_L2<cache_hint::cached>, alignment<8 >};
422- Passed &=
423- testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
424- Q, 2 , 4 , PVCAlign8Props);
425-
426- if constexpr (sizeof (T) <= 4 )
426+ if constexpr (Features == TestFeatures::PVC) {
427427 Passed &=
428- testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
428+ testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
429429 Q, 2 , 4 , PVCAlign8Props);
430+
431+ if constexpr (sizeof (T) <= 4 )
432+ Passed &=
433+ testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
434+ Q, 2 , 4 , PVCAlign8Props);
435+ }
430436 } // TestPVCFeatures
431437
432438 return Passed;
@@ -540,7 +546,7 @@ bool testSLMAcc(queue Q, uint32_t Groups, uint32_t GroupSize,
540546 return Passed;
541547}
542548
543- template <typename T, bool TestPVCFeatures > bool testSLMAcc (queue Q) {
549+ template <typename T, TestFeatures Features > bool testSLMAcc (queue Q) {
544550 constexpr bool CheckMerge = true ;
545551 constexpr bool CheckMask = true ;
546552 constexpr bool CheckProperties = true ;
@@ -592,46 +598,50 @@ template <typename T, bool TestPVCFeatures> bool testSLMAcc(queue Q) {
592598 Q, 2 , 4 , AlignElemProps);
593599 }
594600
595- if constexpr (TestPVCFeatures) {
596- // Using the mask adds the requirement to run tests on PVC.
597- // Also, PVC variant currently requires power-or-two elements and
601+ if constexpr (Features == TestFeatures::PVC ||
602+ Features == TestFeatures::DG2) {
603+
604+ // Using the mask adds the requirement to run tests on DG2/PVC.
605+ // Also, DG2/PVC variant currently requires power-or-two elements and
598606 // the number of bytes loaded per call must not exceed 512.
599607
600608 constexpr int I32Factor =
601609 std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
602610 constexpr size_t ReqiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
603- properties PVCProps {alignment<ReqiredAlignment>};
611+ properties DG2OrPVCProps {alignment<ReqiredAlignment>};
604612
605- // Test block_load() that is available on PVC:
613+ // Test block_load() that is available on DG2/ PVC:
606614 // 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
607615 Passed &=
608616 testSLMAcc<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
609- Q, 2 , 4 , PVCProps );
617+ Q, 2 , 4 , DG2OrPVCProps );
610618 Passed &=
611619 testSLMAcc<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
612- Q, 1 , 4 , PVCProps );
620+ Q, 1 , 4 , DG2OrPVCProps );
613621 Passed &=
614622 testSLMAcc<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
615- Q, 2 , 8 , PVCProps );
623+ Q, 2 , 8 , DG2OrPVCProps );
616624 Passed &=
617625 testSLMAcc<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
618- Q, 2 , 4 , PVCProps );
626+ Q, 2 , 4 , DG2OrPVCProps );
619627 Passed &=
620628 testSLMAcc<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
621- Q, 2 , 4 , PVCProps );
629+ Q, 2 , 4 , DG2OrPVCProps );
622630 Passed &=
623631 testSLMAcc<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
624- Q, 2 , 4 , PVCProps );
632+ Q, 2 , 4 , DG2OrPVCProps );
625633 Passed &=
626634 testSLMAcc<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
627- Q, 2 , 4 , PVCProps);
628- Passed &=
629- testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
630- Q, 2 , 4 , PVCProps);
635+ Q, 2 , 4 , DG2OrPVCProps);
636+ if constexpr (Features == TestFeatures::PVC) {
637+ Passed &=
638+ testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
639+ Q, 2 , 4 , DG2OrPVCProps);
631640
632- if constexpr (sizeof (T) <= 4 )
633- Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
634- CheckProperties>(Q, 2 , 4 , Align16Props);
641+ if constexpr (sizeof (T) <= 4 )
642+ Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
643+ CheckProperties>(Q, 2 , 4 , Align16Props);
644+ }
635645 } // TestPVCFeatures
636646
637647 return Passed;
@@ -735,7 +745,7 @@ bool testSLM(queue Q, uint32_t Groups, LoadPropertiesT LoadProperties) {
735745 return Passed;
736746}
737747
738- template <typename T, bool TestPVCFeatures > bool testSLM (queue Q) {
748+ template <typename T, TestFeatures Features > bool testSLM (queue Q) {
739749 constexpr bool CheckMerge = true ;
740750 constexpr bool CheckMask = true ;
741751 constexpr bool CheckProperties = true ;
@@ -786,45 +796,48 @@ template <typename T, bool TestPVCFeatures> bool testSLM(queue Q) {
786796 Q, 2 , AlignElemProps);
787797 }
788798
789- if constexpr (TestPVCFeatures) {
790- // Using the mask adds the requirement to run tests on PVC.
791- // Also, PVC variant currently requires power-or-two elements and
799+ if constexpr (Features == TestFeatures::PVC ||
800+ Features == TestFeatures::DG2) {
801+ // Using the mask adds the requirement to run tests on DG2/PVC.
802+ // Also, DG2/PVC variant currently requires power-or-two elements and
792803 // the number of bytes loaded per call must not exceed 512.
793804
794805 constexpr int I32Factor =
795806 std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
796807 constexpr size_t RequiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
797- properties PVCProps {alignment<RequiredAlignment>};
808+ properties DG2OrPVCProps {alignment<RequiredAlignment>};
798809
799- // Test block_load() that is available on PVC:
810+ // Test block_load() that is available on DG2/ PVC:
800811 // 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
801812 Passed &=
802813 testSLM<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
803- Q, 2 , PVCProps );
814+ Q, 2 , DG2OrPVCProps );
804815 Passed &= testSLM<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
805- Q, 1 , PVCProps );
816+ Q, 1 , DG2OrPVCProps );
806817 Passed &=
807818 testSLM<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
808- Q, 2 , PVCProps );
819+ Q, 2 , DG2OrPVCProps );
809820 Passed &= testSLM<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
810- Q, 2 , PVCProps );
821+ Q, 2 , DG2OrPVCProps );
811822 Passed &=
812823 testSLM<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
813- Q, 2 , PVCProps );
824+ Q, 2 , DG2OrPVCProps );
814825 Passed &=
815826 testSLM<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
816- Q, 2 , PVCProps );
827+ Q, 2 , DG2OrPVCProps );
817828 Passed &=
818829 testSLM<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
819- Q, 2 , PVCProps);
820- Passed &=
821- testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
822- Q, 2 , PVCProps);
823-
824- if constexpr (sizeof (T) <= 4 )
830+ Q, 2 , DG2OrPVCProps);
831+ if constexpr (Features == TestFeatures::PVC) {
825832 Passed &=
826- testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
827- Q, 2 , Align16Props);
833+ testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
834+ Q, 2 , DG2OrPVCProps);
835+
836+ if constexpr (sizeof (T) <= 4 )
837+ Passed &=
838+ testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
839+ Q, 2 , Align16Props);
840+ }
828841 } // TestPVCFeatures
829842
830843 return Passed;
0 commit comments