@@ -55,7 +55,7 @@ template <int NChannels, typename DType>
5555static sycl::vec<DType, NChannels>
5656linearOp (sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
5757 sycl::vec<DType, NChannels> pix3, sycl::vec<DType, NChannels> pix4,
58- float weight1, float weight2) {
58+ float weight1, float weight2, sycl::backend backend ) {
5959
6060 sycl::vec<float , NChannels> weightArr1 (weight1);
6161 sycl::vec<float , NChannels> weightArr2 (weight2);
@@ -73,14 +73,41 @@ linearOp(sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
7373 (one - weightArr1) * weightArr2 * Ti0j1 +
7474 weightArr1 * weightArr2 * Ti1j1));
7575
76- // Round to nearest whole number.
77- // There is no option to do this via sycl::rounding_mode.
78- if constexpr (std::is_same_v<DType, short > ||
79- std::is_same_v<DType, unsigned short > ||
80- std::is_same_v<DType, signed char > ||
81- std::is_same_v<DType, unsigned char >) {
82- for (int i = 0 ; i < NChannels; i++) {
83- result[i] = std::round (result[i]);
76+ if (backend == sycl::backend::ext_oneapi_cuda) {
77+ // On Nvidia devices, if the image being accessed contains smaller than
78+ // 32-bit integer data, then the fractional result of linear interpolation
79+ // is rounded to the nearest number.
80+ if constexpr (std::is_same_v<DType, short > ||
81+ std::is_same_v<DType, unsigned short > ||
82+ std::is_same_v<DType, signed char > ||
83+ std::is_same_v<DType, unsigned char >) {
84+ for (int i = 0 ; i < NChannels; i++) {
85+ result[i] = std::round (result[i]);
86+ }
87+ }
88+
89+ // On Nvidia devices, if the image being accessed contains 32-bit integer
90+ // data, then the fractional result of linear interpolation is rounded down.
91+ if constexpr (std::is_same_v<DType, int > ||
92+ std::is_same_v<DType, unsigned int >) {
93+ for (int i = 0 ; i < NChannels; i++) {
94+ result[i] = std::floor (result[i]);
95+ }
96+ }
97+ }
98+
99+ if (backend == sycl::backend::ext_oneapi_level_zero) {
100+ // On Intel devices, if the image being accessed contains integer data, then
101+ // the fractional result of linear interpolation is rounded down.
102+ if constexpr (std::is_same_v<DType, short > ||
103+ std::is_same_v<DType, unsigned short > ||
104+ std::is_same_v<DType, signed char > ||
105+ std::is_same_v<DType, unsigned char > ||
106+ std::is_same_v<DType, int > ||
107+ std::is_same_v<DType, unsigned int >) {
108+ for (int i = 0 ; i < NChannels; i++) {
109+ result[i] = std::floor (result[i]);
110+ }
84111 }
85112 }
86113
@@ -360,7 +387,8 @@ struct InterpolRes {
360387template <typename DType, int NChannels>
361388static sycl::vec<DType, NChannels>
362389clampLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
363- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
390+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
391+ sycl::backend backend) {
364392 using VecType = sycl::vec<DType, NChannels>;
365393
366394 float coordX = coords[0 ];
@@ -391,14 +419,16 @@ clampLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
391419 clampLinearCheckBounds<VecType>(i1, j1, width, height, inputImage);
392420
393421 // Perform linear sampling
394- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
422+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
423+ backend);
395424}
396425
397426// Out of range coords are clamped to the extent.
398427template <typename DType, int NChannels>
399428static sycl::vec<DType, NChannels>
400429clampToEdgeLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
401- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
430+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
431+ sycl::backend backend) {
402432 using VecType = sycl::vec<DType, NChannels>;
403433
404434 float coordX = coords[0 ];
@@ -428,7 +458,8 @@ clampToEdgeLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
428458 VecType pix4 = inputImage[i1 + (width * j1)];
429459
430460 // Perform linear sampling
431- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
461+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
462+ backend);
432463}
433464
434465// Out of range coords return a border color
@@ -451,7 +482,8 @@ static InterpolRes repeatLinearCoord(float coord, int dimSize) {
451482template <typename DType, int NChannels>
452483static sycl::vec<DType, NChannels>
453484repeatLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
454- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
485+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
486+ sycl::backend backend) {
455487 using VecType = sycl::vec<DType, NChannels>;
456488
457489 float coordX = coords[0 ];
@@ -482,7 +514,8 @@ repeatLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
482514 VecType pix4 = inputImage[i1 + (width * j1)];
483515
484516 // Perform linear sampling
485- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
517+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
518+ backend);
486519}
487520
488521// Out of range coordinates are flipped at every integer junction
@@ -517,9 +550,10 @@ static InterpolRes mirroredRepeatLinearCoord(float coord, int dimSize) {
517550
518551// Out of range coordinates are flipped at every integer junction
519552template <typename DType, int NChannels>
520- static sycl::vec<DType, NChannels> mirroredRepeatLinear (
521- sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
522- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
553+ static sycl::vec<DType, NChannels>
554+ mirroredRepeatLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
555+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
556+ sycl::backend backend) {
523557 using VecType = sycl::vec<DType, NChannels>;
524558
525559 float coordX = coords[0 ];
@@ -551,7 +585,8 @@ static sycl::vec<DType, NChannels> mirroredRepeatLinear(
551585 VecType pix4 = inputImage[i1 + (width * j1)];
552586
553587 // Perform linear sampling
554- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
588+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
589+ backend);
555590}
556591
557592// Some vector sizes here are hardcoded because the sampling functions are
@@ -560,7 +595,8 @@ template <int NDims, typename DType, int NChannels>
560595static sycl::vec<DType, NChannels>
561596read (sycl::range<2 > globalSize, sycl::vec<float , 2 > coords, float offset,
562597 const sycl::ext::oneapi::experimental::bindless_image_sampler &samp,
563- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
598+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
599+ sycl::backend backend) {
564600 using VecType = sycl::vec<DType, NChannels>;
565601
566602 // Add offset to coords
@@ -624,26 +660,28 @@ read(sycl::range<2> globalSize, sycl::vec<float, 2> coords, float offset,
624660 } else { // linear
625661 sycl::addressing_mode SampAddrMode = samp.addressing [0 ];
626662 if (SampAddrMode == sycl::addressing_mode::ext_oneapi_clamp_to_border) {
627- return clampLinear<DType, NChannels>(coords, globalSize, inputImage);
663+ return clampLinear<DType, NChannels>(coords, globalSize, inputImage,
664+ backend);
628665 }
629666 if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) {
630- return clampToEdgeLinear<DType, NChannels>(coords, globalSize,
631- inputImage );
667+ return clampToEdgeLinear<DType, NChannels>(coords, globalSize, inputImage,
668+ backend );
632669 }
633670 if (SampAddrMode == sycl::addressing_mode::repeat) {
634671 if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
635672 assert (false &&
636673 " Repeat addressing mode must be used with normalized coords" );
637674 }
638- return repeatLinear<DType, NChannels>(coords, globalSize, inputImage);
675+ return repeatLinear<DType, NChannels>(coords, globalSize, inputImage,
676+ backend);
639677 }
640678 if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) {
641679 if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
642680 assert (false && " Mirrored repeat addressing mode must be used with "
643681 " normalized coords" );
644682 }
645683 return mirroredRepeatLinear<DType, NChannels>(coords, globalSize,
646- inputImage);
684+ inputImage, backend );
647685 }
648686 if (SampAddrMode == sycl::addressing_mode::none) {
649687 // Ensure no access out of bounds when addressing_mode is none
0 commit comments