diff --git a/CHANGELOG.md b/CHANGELOG.md index 834a9790a..b846de69f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ The full documentation for MIVisionX is available at [https://rocm.docs.amd.com/ ### Added * Added the `PythonFunction` extension to VX_RPP +### Removed +* Removed the batchPD extensions from VX_RPP + ### Changed * Updated vx_rpp extension for Gaussian Filter diff --git a/amd_openvx_extensions/CMakeLists.txt b/amd_openvx_extensions/CMakeLists.txt index 2d3ce70b9..0087be4ed 100644 --- a/amd_openvx_extensions/CMakeLists.txt +++ b/amd_openvx_extensions/CMakeLists.txt @@ -48,7 +48,7 @@ if(NOT MIN_DEPS_MODE) find_package(OpenCV QUIET) find_package(FFmpeg QUIET) endif() -find_package(rpp 2.2.2 QUIET) +find_package(rpp 2.3.0 QUIET) if(GPU_SUPPORT) if("${BACKEND}" STREQUAL "OPENCL") find_package(OpenCL QUIET) diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index 05e9071b6..5f8eb4111 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -31,14 +31,14 @@ endif() # VX_RPP Version # * must match with include/vx_ext_rpp_version.h -set(VERSION "3.1.7") +set(VERSION "3.2.0") project(vx_rpp VERSION ${VERSION} LANGUAGES CXX) list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/lib/cmake) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake) -find_package(rpp 2.2.2 REQUIRED) +find_package(rpp 2.3.0 REQUIRED) message("-- ${White}${PROJECT_NAME}: rpp found with find_package(rpp REQUIRED)${ColourReset}") message("-- \t${White}rpp_INCLUDE_DIR -- ${rpp_INCLUDE_DIR}${ColourReset}") @@ -147,100 +147,6 @@ list(APPEND SOURCES source/internal_publishKernels.cpp ) -set(RPP_LEGACY_SUPPORT_FOUND 0) -if(EXISTS ${rpp_INCLUDE_DIR}/rpp/rpp_legacy_support.h) - file(READ ${rpp_INCLUDE_DIR}/rpp/rpp_legacy_support.h RPP_LEGACY_SUPPORT_FILE) - string(REGEX MATCH "RPP_LEGACY_SUPPORT ([0-1]*)" _ ${RPP_LEGACY_SUPPORT_FILE}) - set(RPP_LEGACY_SUPPORT_FOUND ${CMAKE_MATCH_1}) -endif() - -if(RPP_LEGACY_SUPPORT_FOUND) - list(APPEND SOURCES - source/image/AbsoluteDifferencebatchPD.cpp - source/image/AccumulatebatchPD.cpp - source/image/AccumulateSquaredbatchPD.cpp - source/image/AccumulateWeightedbatchPD.cpp - source/image/AddbatchPD.cpp - source/image/BitwiseANDbatchPD.cpp - source/image/BitwiseNOTbatchPD.cpp - source/image/BlendbatchPD.cpp - source/image/BlurbatchPD.cpp - source/image/BoxFilterbatchPD.cpp - source/image/BrightnessbatchPD.cpp - source/image/CannyEdgeDetector.cpp - source/image/ChannelCombinebatchPD.cpp - source/image/ChannelExtractbatchPD.cpp - source/image/ColorTemperaturebatchPD.cpp - source/image/ColorTwistbatchPD.cpp - source/image/ContrastbatchPD.cpp - source/image/CopybatchPD.cpp - source/image/CropMirrorNormalizePD.cpp - source/image/CropPD.cpp - source/image/CustomConvolutionbatchPD.cpp - source/image/DataObjectCopybatchPD.cpp - source/image/DilatebatchPD.cpp - source/image/ErodebatchPD.cpp - source/image/ExclusiveORbatchPD.cpp - source/image/ExposurebatchPD.cpp - source/image/FastCornerDetector.cpp - source/image/FisheyebatchPD.cpp - source/image/FlipbatchPD.cpp - source/image/FogbatchPD.cpp - source/image/GammaCorrectionbatchPD.cpp - source/image/GaussianFilterbatchPD.cpp - source/image/GaussianImagePyramidbatchPD.cpp - source/image/HarrisCornerDetector.cpp - source/image/Histogram.cpp - source/image/HistogramBalancebatchPD.cpp - source/image/HistogramEqualizebatchPD.cpp - source/image/HuebatchPD.cpp - source/image/InclusiveORbatchPD.cpp - source/image/JitterbatchPD.cpp - source/image/LaplacianImagePyramid.cpp - source/image/LensCorrectionbatchPD.cpp - source/image/LocalBinaryPatternbatchPD.cpp - source/image/LookUpTablebatchPD.cpp - source/image/MagnitudebatchPD.cpp - source/image/MaxbatchPD.cpp - source/image/MeanStddev.cpp - source/image/MedianFilterbatchPD.cpp - source/image/MinbatchPD.cpp - source/image/MinMaxLoc.cpp - source/image/MultiplybatchPD.cpp - source/image/NoisebatchPD.cpp - source/image/NonLinearFilterbatchPD.cpp - source/image/NonMaxSupressionbatchPD.cpp - source/image/NopbatchPD.cpp - source/image/PhasebatchPD.cpp - source/image/PixelatebatchPD.cpp - source/image/RainbatchPD.cpp - source/image/RandomCropLetterBoxbatchPD.cpp - source/image/RandomShadowbatchPD.cpp - source/image/Remap.cpp - source/image/ResizebatchPD.cpp - source/image/ResizeCropbatchPD.cpp - source/image/ResizeCropMirrorPD.cpp - source/image/ResizeMirrorNormalizeTensor.cpp - source/image/Resizetensor.cpp - source/image/RotatebatchPD.cpp - source/image/SaturationbatchPD.cpp - source/image/ScalebatchPD.cpp - source/image/SequenceRearrangebatchPD.cpp - source/image/SnowbatchPD.cpp - source/image/SobelbatchPD.cpp - source/image/SubtractbatchPD.cpp - source/image/TensorAdd.cpp - source/image/TensorLookup.cpp - source/image/TensorMatrixMultiply.cpp - source/image/TensorMultiply.cpp - source/image/TensorSubtract.cpp - source/image/ThresholdingbatchPD.cpp - source/image/VignettebatchPD.cpp - source/image/WarpAffinebatchPD.cpp - source/image/WarpPerspectivebatchPD.cpp - ) -endif() - if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "OPENCL" AND OpenCL_FOUND) message("-- ${Green}vx_rpp -- Building with OpenCL${ColourReset}") set(ENABLE_OPENCL 1) @@ -290,15 +196,6 @@ else() message("-- ${Yellow}vx_rpp Audio Features Excluded${ColourReset}") endif() -if(RPP_LEGACY_SUPPORT_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLEGACY_SUPPORT=1") - target_compile_definitions(vx_rpp PUBLIC RPP_LEGACY_SUPPORT=1) - message("-- ${White}vx_rpp BatchPD augmentations included${ColourReset}") -else() - target_compile_definitions(vx_rpp PUBLIC RPP_LEGACY_SUPPORT=0) - message("-- ${Yellow}vx_rpp BatchPD augmentations excluded${ColourReset}") -endif() - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} OpenVX) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index dff511cfd..ecd0f523c 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -39,91 +39,6 @@ extern "C" SHARED_PUBLIC vx_status VX_API_CALL vxPublishKernels(vx_context conte vx_status ADD_KERNEL(std::function); vx_status get_kernels_to_publish(); -#if RPP_LEGACY_SUPPORT -vx_status AbsoluteDifferencebatchPD_Register(vx_context); -vx_status AccumulatebatchPD_Register(vx_context); -vx_status AccumulateSquaredbatchPD_Register(vx_context); -vx_status AccumulateWeightedbatchPD_Register(vx_context); -vx_status AddbatchPD_Register(vx_context); -vx_status BitwiseANDbatchPD_Register(vx_context); -vx_status BitwiseNOTbatchPD_Register(vx_context); -vx_status BlendbatchPD_Register(vx_context); -vx_status BlurbatchPD_Register(vx_context); -vx_status BoxFilterbatchPD_Register(vx_context); -vx_status BrightnessbatchPD_Register(vx_context); -vx_status CannyEdgeDetector_Register(vx_context); -vx_status ChannelCombinebatchPD_Register(vx_context); -vx_status ChannelExtractbatchPD_Register(vx_context); -vx_status ColorTemperaturebatchPD_Register(vx_context); -vx_status ColorTwistbatchPD_Register(vx_context); -vx_status ContrastbatchPD_Register(vx_context); -vx_status CopybatchPD_Register(vx_context); -vx_status CropMirrorNormalizePD_Register(vx_context); -vx_status CropPD_Register(vx_context); -vx_status CustomConvolutionbatchPD_Register(vx_context); -vx_status DataObjectCopybatchPD_Register(vx_context); -vx_status DilatebatchPD_Register(vx_context); -vx_status ErodebatchPD_Register(vx_context); -vx_status ExclusiveORbatchPD_Register(vx_context); -vx_status ExposurebatchPD_Register(vx_context); -vx_status FastCornerDetector_Register(vx_context); -vx_status FisheyebatchPD_Register(vx_context); -vx_status FlipbatchPD_Register(vx_context); -vx_status FogbatchPD_Register(vx_context); -vx_status GammaCorrectionbatchPD_Register(vx_context); -vx_status GaussianFilterbatchPD_Register(vx_context); -vx_status GaussianImagePyramidbatchPD_Register(vx_context); -vx_status HarrisCornerDetector_Register(vx_context); -vx_status Histogram_Register(vx_context); -vx_status HistogramBalancebatchPD_Register(vx_context); -vx_status HistogramEqualizebatchPD_Register(vx_context); -vx_status HuebatchPD_Register(vx_context); -vx_status InclusiveORbatchPD_Register(vx_context); -vx_status JitterbatchPD_Register(vx_context); -vx_status LaplacianImagePyramid_Register(vx_context); -vx_status LensCorrectionbatchPD_Register(vx_context); -vx_status LocalBinaryPatternbatchPD_Register(vx_context); -vx_status LookUpTablebatchPD_Register(vx_context); -vx_status MagnitudebatchPD_Register(vx_context); -vx_status MaxbatchPD_Register(vx_context); -vx_status MeanStddev_Register(vx_context); -vx_status MedianFilterbatchPD_Register(vx_context); -vx_status MinbatchPD_Register(vx_context); -vx_status MinMaxLoc_Register(vx_context); -vx_status MultiplybatchPD_Register(vx_context); -vx_status NoisebatchPD_Register(vx_context); -vx_status NonLinearFilterbatchPD_Register(vx_context); -vx_status NonMaxSupressionbatchPD_Register(vx_context); -vx_status NopbatchPD_Register(vx_context); -vx_status PhasebatchPD_Register(vx_context); -vx_status PixelatebatchPD_Register(vx_context); -vx_status RainbatchPD_Register(vx_context); -vx_status RandomCropLetterBoxbatchPD_Register(vx_context); -vx_status RandomShadowbatchPD_Register(vx_context); -vx_status remap_Register(vx_context); -vx_status ResizebatchPD_Register(vx_context); -vx_status ResizeCropbatchPD_Register(vx_context); -vx_status ResizeCropMirrorPD_Register(vx_context); -vx_status ResizeMirrorNormalizeTensor_Register(vx_context); -vx_status Resizetensor_Register(vx_context); -vx_status RotatebatchPD_Register(vx_context); -vx_status SaturationbatchPD_Register(vx_context); -vx_status ScalebatchPD_Register(vx_context); -vx_status SequenceRearrangebatchPD_Register(vx_context); -vx_status SnowbatchPD_Register(vx_context); -vx_status SobelbatchPD_Register(vx_context); -vx_status SubtractbatchPD_Register(vx_context); -vx_status TensorAdd_Register(vx_context); -vx_status TensorLookup_Register(vx_context); -vx_status TensorMatrixMultiply_Register(vx_context); -vx_status TensorMultiply_Register(vx_context); -vx_status TensorSubtract_Register(vx_context); -vx_status ThresholdingbatchPD_Register(vx_context); -vx_status VignettebatchPD_Register(vx_context); -vx_status WarpAffinebatchPD_Register(vx_context); -vx_status WarpPerspectivebatchPD_Register(vx_context); -#endif - vx_status Blend_Register(vx_context); vx_status Blur_Register(vx_context); vx_status Brightness_Register(vx_context); @@ -205,90 +120,6 @@ vx_status TensorMax_Register(vx_context); vx_status TensorMean_Register(vx_context); vx_status TensorStdDev_Register(vx_context); -// kernel names -#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD" -#define VX_KERNEL_RPP_COPYBATCHPD_NAME "org.rpp.CopybatchPD" -#define VX_KERNEL_RPP_BRIGHTNESSBATCHPD_NAME "org.rpp.BrightnessbatchPD" -#define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD_NAME "org.rpp.GammaCorrectionbatchPD" -#define VX_KERNEL_RPP_BLENDBATCHPD_NAME "org.rpp.BlendbatchPD" -#define VX_KERNEL_RPP_BLURBATCHPD_NAME "org.rpp.BlurbatchPD" -#define VX_KERNEL_RPP_CONTRASTBATCHPD_NAME "org.rpp.ContrastbatchPD" -#define VX_KERNEL_RPP_PIXELATEBATCHPD_NAME "org.rpp.PixelatebatchPD" -#define VX_KERNEL_RPP_JITTERBATCHPD_NAME "org.rpp.JitterbatchPD" -#define VX_KERNEL_RPP_SNOWBATCHPD_NAME "org.rpp.SnowbatchPD" -#define VX_KERNEL_RPP_NOISEBATCHPD_NAME "org.rpp.NoisebatchPD" -#define VX_KERNEL_RPP_RANDOMSHADOWBATCHPD_NAME "org.rpp.RandomShadowbatchPD" -#define VX_KERNEL_RPP_FOGBATCHPD_NAME "org.rpp.FogbatchPD" -#define VX_KERNEL_RPP_RAINBATCHPD_NAME "org.rpp.RainbatchPD" -#define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD_NAME "org.rpp.RandomCropLetterBoxbatchPD" -#define VX_KERNEL_RPP_EXPOSUREBATCHPD_NAME "org.rpp.ExposurebatchPD" -#define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD_NAME "org.rpp.HistogramBalancebatchPD" -#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD_NAME "org.rpp.AbsoluteDifferencebatchPD" -#define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD_NAME "org.rpp.AccumulateWeightedbatchPD" -#define VX_KERNEL_RPP_ACCUMULATEBATCHPD_NAME "org.rpp.AccumulatebatchPD" -#define VX_KERNEL_RPP_ADDBATCHPD_NAME "org.rpp.AddbatchPD" -#define VX_KERNEL_RPP_SUBTRACTBATCHPD_NAME "org.rpp.SubtractbatchPD" -#define VX_KERNEL_RPP_MAGNITUDEBATCHPD_NAME "org.rpp.MagnitudebatchPD" -#define VX_KERNEL_RPP_MULTIPLYBATCHPD_NAME "org.rpp.MultiplybatchPD" -#define VX_KERNEL_RPP_PHASEBATCHPD_NAME "org.rpp.PhasebatchPD" -#define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD_NAME "org.rpp.AccumulateSquaredbatchPD" -#define VX_KERNEL_RPP_BITWISEANDBATCHPD_NAME "org.rpp.BitwiseANDbatchPD" -#define VX_KERNEL_RPP_BITWISENOTBATCHPD_NAME "org.rpp.BitwiseNOTbatchPD" -#define VX_KERNEL_RPP_EXCLUSIVEORBATCHPD_NAME "org.rpp.ExclusiveORbatchPD" -#define VX_KERNEL_RPP_INCLUSIVEORBATCHPD_NAME "org.rpp.InclusiveORbatchPD" -#define VX_KERNEL_RPP_HISTOGRAM_NAME "org.rpp.Histogram" -#define VX_KERNEL_RPP_THRESHOLDINGBATCHPD_NAME "org.rpp.ThresholdingbatchPD" -#define VX_KERNEL_RPP_MAXBATCHPD_NAME "org.rpp.MaxbatchPD" -#define VX_KERNEL_RPP_MINBATCHPD_NAME "org.rpp.MinbatchPD" -#define VX_KERNEL_RPP_MINMAXLOC_NAME "org.rpp.MinMaxLoc" -#define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD_NAME "org.rpp.HistogramEqualizebatchPD" -#define VX_KERNEL_RPP_MEANSTDDEV_NAME "org.rpp.MeanStddev" -#define VX_KERNEL_RPP_FLIPBATCHPD_NAME "org.rpp.FlipbatchPD" -#define VX_KERNEL_RPP_RESIZEBATCHPD_NAME "org.rpp.ResizebatchPD" -#define VX_KERNEL_RPP_RESIZECROPBATCHPD_NAME "org.rpp.ResizeCropbatchPD" -#define VX_KERNEL_RPP_ROTATEBATCHPD_NAME "org.rpp.RotatebatchPD" -#define VX_KERNEL_RPP_WARPAFFINEBATCHPD_NAME "org.rpp.WarpAffinebatchPD" -#define VX_KERNEL_RPP_FISHEYEBATCHPD_NAME "org.rpp.FisheyebatchPD" -#define VX_KERNEL_RPP_LENSCORRECTIONBATCHPD_NAME "org.rpp.LensCorrectionbatchPD" -#define VX_KERNEL_RPP_SCALEBATCHPD_NAME "org.rpp.ScalebatchPD" -#define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD_NAME "org.rpp.WarpPerspectivebatchPD" -#define VX_KERNEL_RPP_DILATEBATCHPD_NAME "org.rpp.DilatebatchPD" -#define VX_KERNEL_RPP_ERODEBATCHPD_NAME "org.rpp.ErodebatchPD" -#define VX_KERNEL_RPP_HUEBATCHPD_NAME "org.rpp.HuebatchPD" -#define VX_KERNEL_RPP_SATURATIONBATCHPD_NAME "org.rpp.SaturationbatchPD" -#define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD_NAME "org.rpp.ColorTemperaturebatchPD" -#define VX_KERNEL_RPP_VIGNETTEBATCHPD_NAME "org.rpp.VignettebatchPD" -#define VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD_NAME "org.rpp.ChannelExtractbatchPD" -#define VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD_NAME "org.rpp.ChannelCombinebatchPD" -#define VX_KERNEL_RPP_LOOKUPTABLEBATCHPD_NAME "org.rpp.LookUpTablebatchPD" -#define VX_KERNEL_RPP_BOXFILTERBATCHPD_NAME "org.rpp.BoxFilterbatchPD" -#define VX_KERNEL_RPP_SOBELBATCHPD_NAME "org.rpp.SobelbatchPD" -#define VX_KERNEL_RPP_MEDIANFILTERBATCHPD_NAME "org.rpp.MedianFilterbatchPD" -#define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD_NAME "org.rpp.CustomConvolutionbatchPD" -#define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD_NAME "org.rpp.NonMaxSupressionbatchPD" -#define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD_NAME "org.rpp.GaussianFilterbatchPD" -#define VX_KERNEL_RPP_NONLINEARFILTERBATCHPD_NAME "org.rpp.NonLinearFilterbatchPD" -#define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD_NAME "org.rpp.LocalBinaryPatternbatchPD" -#define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD_NAME "org.rpp.DataObjectCopybatchPD" -#define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD_NAME "org.rpp.GaussianImagePyramidbatchPD" -#define VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID_NAME "org.rpp.LaplacianImagePyramid" -#define VX_KERNEL_RPP_CANNYEDGEDETECTOR_NAME "org.rpp.CannyEdgeDetector" -#define VX_KERNEL_RPP_HARRISCORNERDETECTOR_NAME "org.rpp.HarrisCornerDetector" -#define VX_KERNEL_RPP_FASTCORNERDETECTOR_NAME "org.rpp.FastCornerDetector" -#define VX_KERNEL_RPP_REMAPBATCHPD_NAME "org.rpp.remap" -#define VX_KERNEL_RPP_TENSORADD_NAME "org.rpp.TensorAdd" -#define VX_KERNEL_RPP_TENSORSUBTRACT_NAME "org.rpp.TensorSubtract" -#define VX_KERNEL_RPP_TENSORMULTIPLY_NAME "org.rpp.TensorMultiply" -#define VX_KERNEL_RPP_TENSORMATRIXMULTIPLY_NAME "org.rpp.TensorMatrixMultiply" -#define VX_KERNEL_RPP_TENSORLOOKUP_NAME "org.rpp.TensorLookup" -#define VX_KERNEL_RPP_COLORTWISTBATCHPD_NAME "org.rpp.ColorTwistbatchPD" -#define VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD_NAME "org.rpp.CropMirrorNormalizebatchPD" -#define VX_KERNEL_RPP_CROPPD_NAME "org.rpp.CropPD" -#define VX_KERNEL_RPP_RESIZECROPMIRRORPD_NAME "org.rpp.ResizeCropMirrorPD" -#define VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR_NAME "org.rpp.ResizeMirrorNormalizeTensor" -#define VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD_NAME "org.rpp.SequenceRearrangebatchPD" -#define VX_KERNEL_RPP_RESIZETENSOR_NAME "org.rpp.Resizetensor" - //tensor #define VX_KERNEL_RPP_BLEND_NAME "org.rpp.Blend" #define VX_KERNEL_RPP_BLUR_NAME "org.rpp.Blur" diff --git a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h index fed568bac..4468acd0b 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h @@ -30,9 +30,6 @@ THE SOFTWARE. #include "rpp/rpp.h" #include "rpp/rppdefs.h" -#if RPP_LEGACY_SUPPORT -#include "rpp/rppi.h" -#endif #if ENABLE_OPENCL #include diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index 62a7c87a4..5b75033a8 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -32,91 +32,6 @@ extern "C" enum vx_kernel_ext_amd_rpp_e { -#if RPP_LEGACY_SUPPORT - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x0, - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1, - VX_KERNEL_RPP_ACCUMULATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2, - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3, - VX_KERNEL_RPP_ADDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4, - VX_KERNEL_RPP_BLENDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5, - VX_KERNEL_RPP_BLURBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6, - VX_KERNEL_RPP_BITWISEANDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7, - VX_KERNEL_RPP_BITWISENOTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x8, - VX_KERNEL_RPP_BRIGHTNESSBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xa, - VX_KERNEL_RPP_BOXFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xb, - VX_KERNEL_RPP_CONTRASTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xc, - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xd, - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xe, - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xf, - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x10, - VX_KERNEL_RPP_CANNYEDGEDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x11, - VX_KERNEL_RPP_COLORTWISTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x13, - VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x14, - VX_KERNEL_RPP_CROPPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x15, - VX_KERNEL_RPP_COPYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x16, - VX_KERNEL_RPP_DILATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x17, - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x18, - VX_KERNEL_RPP_EXPOSUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x19, - VX_KERNEL_RPP_EXCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1a, - VX_KERNEL_RPP_ERODEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1b, - VX_KERNEL_RPP_FLIPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1c, - VX_KERNEL_RPP_FOGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1d, - VX_KERNEL_RPP_FISHEYEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1e, - VX_KERNEL_RPP_FASTCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1f, - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x20, - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x21, - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x22, - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x23, - VX_KERNEL_RPP_HISTOGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x24, - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x25, - VX_KERNEL_RPP_HUEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x26, - VX_KERNEL_RPP_HARRISCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x27, - VX_KERNEL_RPP_INCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x28, - VX_KERNEL_RPP_JITTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x29, - VX_KERNEL_RPP_LENSCORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2a, - VX_KERNEL_RPP_LOOKUPTABLEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2b, - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2c, - VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2d, - VX_KERNEL_RPP_MAGNITUDEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2e, - VX_KERNEL_RPP_MULTIPLYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2f, - VX_KERNEL_RPP_MAXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x30, - VX_KERNEL_RPP_MINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x31, - VX_KERNEL_RPP_MINMAXLOC = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x32, - VX_KERNEL_RPP_MEANSTDDEV = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x33, - VX_KERNEL_RPP_MEDIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x34, - VX_KERNEL_RPP_NOISEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x35, - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x36, - VX_KERNEL_RPP_NONLINEARFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x37, - VX_KERNEL_RPP_NOPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x38, - VX_KERNEL_RPP_PIXELATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x39, - VX_KERNEL_RPP_PHASEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3a, - VX_KERNEL_RPP_RANDOMSHADOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3b, - VX_KERNEL_RPP_RAINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3c, - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3d, - VX_KERNEL_RPP_RESIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3e, - VX_KERNEL_RPP_RESIZECROPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3f, - VX_KERNEL_RPP_ROTATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x40, - VX_KERNEL_RPP_REMAPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x41, - VX_KERNEL_RPP_RESIZECROPMIRRORPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x42, - VX_KERNEL_RPP_SNOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x43, - VX_KERNEL_RPP_SUBTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x44, - VX_KERNEL_RPP_SCALEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x45, - VX_KERNEL_RPP_SATURATIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x46, - VX_KERNEL_RPP_SOBELBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x47, - VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x48, - VX_KERNEL_RPP_THRESHOLDINGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x49, - VX_KERNEL_RPP_TENSORADD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4a, - VX_KERNEL_RPP_TENSORSUBTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4b, - VX_KERNEL_RPP_TENSORMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4c, - VX_KERNEL_RPP_TENSORMATRIXMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4d, - VX_KERNEL_RPP_TENSORLOOKUP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4e, - VX_KERNEL_RPP_VIGNETTEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4f, - VX_KERNEL_RPP_WARPAFFINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x50, - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x51, - VX_KERNEL_RPP_RESIZETENSOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x52, - VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x53, -#endif - //tensor VX_KERNEL_RPP_BLEND = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x54, VX_KERNEL_RPP_BLUR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x55, diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index d8e5ecb21..36e2ac3f9 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -72,1312 +72,6 @@ extern "C" /*!*********************************************************************************************************** RPP VX_API_ENTRY C Function NODE *************************************************************************************************************/ - /*! \brief [Graph] Creates a RPP Absolute Difference function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AbsoluteDifferencebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Accumulate function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [inout] pSrc1 The bidirectional image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data that acts as the first input and output. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulatebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Accumulate Squared function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [inout] pSrc The bidirectional image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data that acts as the input and output. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateSquaredbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Accumulate Weighted function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [inout] pSrc1 The bidirectional image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data that acts as the first input and output. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [in] alpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateWeightedbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_array alpha, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Add function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AddbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Bitwise And function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseANDbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Bitwise NOT function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseNOTbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Blend function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] alpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlendbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Blur function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlurbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Box Filter function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BoxFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Brightness function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] alpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data. - * \param [in] beta The input array in \ref VX_TYPE_FLOAT32 format containing the beta data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BrightnessbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_array beta, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Canny Edge Detector function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] max The input array in unsigned char format containing the max data. - * \param [in] min The input array in unsigned char format containing the min data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CannyEdgeDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array max, vx_array min, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Channel Combine function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [in] pSrc3 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelCombinebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pSrc3, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Channel Extract function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] extractChannelNumber The input array in \ref VX_TYPE_UINT32 format containing the data for channel number to be extracted. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelExtractbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array extractChannelNumber, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Color Temperature function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] adjustmentValue The input array in \ref VX_TYPE_UINT32 format containing the data for the adjustment value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTemperaturebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array adjustmentValue, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Color Twist function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] alpha The input array in \ref VX_TYPE_FLOAT32 format containing the alpha data. - * \param [in] beta The input array in \ref VX_TYPE_FLOAT32 format containing the beta data. - * \param [in] hue The input array in \ref VX_TYPE_FLOAT32 format containing the hue data. - * \param [in] sat The input array in \ref VX_TYPE_FLOAT32 format containing the saturation data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTwistbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_array beta, vx_array hue, vx_array sat, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Contrast function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] min The input array in \ref VX_TYPE_UINT32 format containing the min data. - * \param [in] max The input array in \ref VX_TYPE_UINT32 format containing the max data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ContrastbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array min, vx_array max, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Copy function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CopybatchPD(vx_graph graph, vx_image pSrc, vx_image pDst); - - /*! - * \brief [Graph] Creates a RPP Crop Mirror Normalize function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] mean The input array of batch size in VX_TYPE_FLOAT containing the mean value. - * \param [in] std_dev The input array of batch size in VX_TYPE_FLOAT containing the standard deviation value. - * \param [in] flip The input array of batch size in VX_TYPE_UINT32 containing the flip value. - * \param [in] chnShift An input scalar value of type VX_TYPE_UINT32 containing the channel shift value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CropMirrorNormalizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array mean, vx_array std_dev, vx_array flip, vx_scalar chnShift, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Crop function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CropPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Custom Convolution Normalize function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernel The input array of mxn size in VX_TYPE_ARRAY containing the data for convolution kernel. - * \param [in] kernelWidth The input array in VX_TYPE_UINT32 containing the kernel width data. - * \param [in] kernelHeight The input array in VX_TYPE_UINT32 containing the kernel height data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CustomConvolutionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernel, vx_array kernelWidth, vx_array kernelHeight, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Data Object Copy function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DataObjectCopybatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Dilate function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DilatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Erade function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ErodebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP ExclusiveORbatchPD function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExclusiveORbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Exposure function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] exposureValue The input array in \ref VX_TYPE_FLOAT32 format containing the exposure value data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExposurebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array exposureValue, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Fast Corner Detector function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] noOfPixels The input array in \ref VX_TYPE_UINT32 format containing minimum number of contiguous pixel to detect a corner. - * \param [in] threshold The input array of batch size in \ref VX_TYPE_UINT8 format containing the intensity-difference threshold for corners. - * \param [in] nonMaxKernelSize The input array in \ref VX_TYPE_UINT32 format containing the nonmax suppression kernel size. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FastCornerDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array noOfPixels, vx_array threshold, vx_array nonMaxKernelSize, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Fish Eye function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FisheyebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Flip function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] flipAxis The input array in \ref VX_TYPE_FLOAT32 format containing the flip axis data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FlipbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array flipAxis, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Fog function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] fogValue The input array in \ref VX_TYPE_FLOAT32 format containing the fog value data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FogbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array fogValue, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Gamma Correction function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] gamma The input array in \ref VX_TYPE_FLOAT32 format containing the gamma data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GammaCorrectionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array gamma, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Gaussian Filter function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] stdDev The input array in \ref VX_TYPE_FLOAT32 format containing the standard deviation data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Gaussian Image Pyramid function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] stdDev The input array in \ref VX_TYPE_FLOAT32 format containing the standard deviation data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianImagePyramidbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP HarrisCornerDetector function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] gaussianKernelSize The input array of batch size in \ref VX_TYPE_UINT32 format containing gaussian kernel size. - * \param [in] stdDev The input array in float format containing the standard deviation data. - * \param [in] kernelSize The input array of batch size in \ref VX_TYPE_UINT32 format containing the corner detection kernel size. - * \param [in] kValue The input array in \ref VX_TYPE_FLOAT32 format containing the 'k' value. - * \param [in] threshold The input array of batch size in \ref VX_TYPE_FLOAT32 format containing the threshold for corners. - * \param [in] nonMaxKernelSize The input array in \ref VX_TYPE_UINT32 format containing the nonmax suppression kernel size. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HarrisCornerDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array gaussianKernelSize, vx_array stdDev, vx_array kernelSize, vx_array kValue, vx_array threshold, vx_array nonMaxKernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Gaussian Image Pyramid function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [inout] pSrc The bidirectional image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data that acts as the input and output.. - * \param [in] outputHistogram The input array of given size in unsigned int containing the output histogram data. - * \param [in] bins The input scalar in unsigned int to set bins value. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Histogram(vx_graph graph, vx_image pSrc, vx_array outputHistogram, vx_scalar bins); - - /*! \brief [Graph] Creates a RPP Histogram Balance function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramBalancebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Histogram Equalize function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramEqualizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Gamma Correction function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] hueShift The input array in \ref VX_TYPE_FLOAT32 format containing the hue shift data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HuebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array hueShift, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Inclusive Or function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_InclusiveORbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Jitter function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_JitterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Laplacian Image Pyramid function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] stdDev The input array in float format containing the standard deviation data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LaplacianImagePyramid(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Lens Correction function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] strength The input array in \ref VX_TYPE_FLOAT32 format containing the strength data. - * \param [in] zoom The input array in \ref VX_TYPE_FLOAT32 format containing the zoom data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LensCorrectionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array strength, vx_array zoom, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Local Binary Pattern function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LocalBinaryPatternbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Lookup Table function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] lutPtr The input array in unsigned char format containing the strength data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LookUpTablebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array lutPtr, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Magnitude function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MagnitudebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Max function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MaxbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Mean Standard Deviation function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] mean The output scalar of \ref VX_TYPE_FLOAT32 for the computed mean of the image. - * \param [out] stdDev The output scalar of \ref VX_TYPE_FLOAT32 for the computed standard deviation of the image. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MeanStddev(vx_graph graph, vx_image pSrc, vx_scalar mean, vx_scalar stdDev); - - /*! \brief [Graph] Creates a RPP Median Filter function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MedianFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Min function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Min Max Location function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] min The output of type \ref VX_TYPE_UINT8 for the computed min. - * \param [out] max The output of type \ref VX_TYPE_UINT8 for the computed max. - * \param [out] minLoc The output of type \ref VX_TYPE_UINT32 for the index of the computed min. - * \param [out] maxLoc The output of type \ref VX_TYPE_UINT32 for the index of the computed max. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinMaxLoc(vx_graph graph, vx_image pSrc, vx_scalar min, vx_scalar max, vx_scalar minLoc, vx_scalar maxLoc); - - /*! \brief [Graph] Creates a RPP Multiply function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MultiplybatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP NoisebatchPD function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] noiseProbability input array of batch size in VX_TYPE_FLOAT32 containing the noise probability value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NoisebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array noiseProbability, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP NonLinearFilterbatchPD function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors pre - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonLinearFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP NonMaxSupressionbatchPD function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] kernelSize The input array in \ref VX_TYPE_UINT32 format containing the kernel size data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors pre - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonMaxSupressionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP NOP function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NopbatchPD(vx_graph graph, vx_image pSrc, vx_image pDst); - - /*! - * \brief [Graph] Creates a RPP Phase function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PhasebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Pixelate function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PixelatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Rain function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] rainValue input array of batch size in VX_TYPE_FLOAT32 containing the rain value. - * \param [in] rainWidth input array of batch size in VX_TYPE_UINT32 containing the rain width. - * \param [in] rainHeight input array of batch size in VX_TYPE_UINT32 containing the rain height. - * \param [in] rainTransperancy input array of batch size in VX_TYPE_FLOAT32 containing the rain transparency. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RainbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array rainValue, vx_array rainWidth, vx_array rainHeight, vx_array rainTransperancy, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Random Crop Letter Box function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] x2 The input array of batch size in VX_TYPE_FLOAT containing the end x-position for each image. - * \param [in] y2 The input array of batch size in VX_TYPE_FLOAT containing the end y-position for each image. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBoxbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Shadow function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] x2 The input array of batch size in VX_TYPE_FLOAT containing the end x-position for each image. - * \param [in] y2 The input array of batch size in VX_TYPE_FLOAT containing the end y-position for each image. - * \param [in] numberOfShadows The input array of batch size in VX_TYPE_UINT32 containing number-of-shadows. - * \param [in] maxSizeX The input array of batch size in VX_TYPE_UINT32 containing max-shadow-width. - * \param [in] maxSizeY The input array of batch size in VX_TYPE_UINT32 containing max-shadow-height. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomShadowbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_array numberOfShadows, vx_array maxSizeX, vx_array maxSizeY, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Remap function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] rowRemap The input array of batch size in VX_TYPE_UINT32 containing the row numbers for every pixel in the input. - * \param [in] colRemap The input array of batch size in VX_TYPE_UINT32 containing the column numbers for every pixel in the input. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_remap(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array rowRemap, vx_array colRemap, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Resize function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Resize Crop function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] x2 The input array of batch size in VX_TYPE_FLOAT containing the end x-position for each image. - * \param [in] y2 The input array of batch size in VX_TYPE_FLOAT containing the end y-position for each image. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Resize Crop Mirror function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] x1 The input array of batch size in VX_TYPE_UINT32 containing the start x-position for each image. - * \param [in] y1 The input array of batch size in VX_TYPE_UINT32t containing the start y-position for each image. - * \param [in] x2 The input array of batch size in VX_TYPE_FLOAT containing the end x-position for each image. - * \param [in] y2 The input array of batch size in VX_TYPE_FLOAT containing the end y-position for each image. - * \param [in] mirrorFlag The input array of batch size in VX_TYPE_FLOAT containing the mirror flag (true/false) for each image. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropMirrorPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_array mirrorFlag, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Resize Mirror Normalize function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] mean The input array of batch size in VX_TYPE_FLOAT containing the mean value. - * \param [in] std_dev The input array of batch size in VX_TYPE_FLOAT containing the standard deviation value. - * \param [in] chnShift An input scalar value of type VX_TYPE_UINT32 containing the channel shift value. - * \param [in] flip The input array of batch size in VX_TYPE_FLOAT containing the flip value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeMirrorNormalizeTensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array mean, vx_array std_dev, vx_array flip, vx_scalar chnShift, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Rotate function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] angle input array of batch size in VX_TYPE_FLOAT32 containing the angle of rotation. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RotatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array angle, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Saturation function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] saturationFactor input array of batch size in VX_TYPE_FLOAT32 containing the saturation factor. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SaturationbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array saturationFactor, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Scale function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] percentage input array of batch size in VX_TYPE_FLOAT32 containing the scaling percentage value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ScalebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array percentage, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Snow function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] snowValue input array of batch size in VX_TYPE_FLOAT32 containing the snow value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SnowbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array snowValue, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Sobel function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] sobelType input array of batch size in VX_TYPE_UINT32 containing the sobel type value. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SobelbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array sobelType, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Subtract function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SubtractbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Tensor Add function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] tensorDimensions The input scalar in \ref VX_TYPE_UINT32 to set number of dimensions in tensor. - * \param [in] tensorDimensionValues The input array in \ref VX_TYPE_UINT8 of size tensorDimensions length containing size of each dimension. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorAdd(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues); - - /*! - * \brief [Graph] Creates a RPP Tensor Lookup function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] lutPtr The input array in \ref VX_TYPE_UINT8 of batch size containing look-up table for each image. - * \param [in] tensorDimensions The input scalar in \ref VX_TYPE_UINT32 to set number of dimensions in tensor. - * \param [in] tensorDimensionValues The input array in \ref VX_TYPE_UINT8 of size tensorDimensions length containing size of each dimension. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorLookup(vx_graph graph, vx_array pSrc, vx_array pDst, vx_array lutPtr, vx_scalar tensorDimensions, vx_array tensorDimensionValues); - - /*! - * \brief [Graph] Creates a RPP Tensor Matrix Multiply function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] tensorDimensionValues1 The input array in \ref VX_TYPE_UINT8 of containing dimensions of first tensor. - * \param [in] tensorDimensionValues2 The input array in \ref VX_TYPE_UINT8 of containing dimensions of second tensor. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorMatrixMultiply(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_array tensorDimensionValues1, vx_array tensorDimensionValues2); - - /*! - * \brief [Graph] Creates a RPP Tensor Multiply function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] tensorDimensions The input scalar in \ref VX_TYPE_UINT32 to set number of dimensions in tensor. - * \param [in] tensorDimensionValues The input array in \ref VX_TYPE_UINT8 of size tensorDimensions length containing size of each dimension. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorMultiply(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues); - - /*! - * \brief [Graph] Creates a RPP Tensor Subtract function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc1 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] pSrc2 The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] tensorDimensions The input scalar in \ref VX_TYPE_UINT32 to set number of dimensions in tensor. - * \param [in] tensorDimensionValues The input array in \ref VX_TYPE_UINT8 of size tensorDimensions length containing size of each dimension. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorSubtract(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues); - - /*! - * \brief [Graph] Creates a RPP Threshold function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] min The input array in unsigned char format containing the min data. - * \param [in] max The input array in unsigned char format containing the max data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array min, vx_array max, vx_uint32 nbatchSize); - - /*! \brief [Graph] Creates a RPP Max function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] stdDev The input array in VX_TYPE_FLOAT32 format containing the standard deviation data. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Warp Affine function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] affine The input array of batch size in float containing values defining the affine-transformation matrix. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array affine, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Warp Perspective function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] perspective The input array of batch size in float containing values defining the perspective-transformation matrix. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array perspective, vx_uint32 nbatchSize); - - /*! - * \brief [Graph] Creates a RPP Sequence Rearrange function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [out] pDst The output image data. - * \param [in] newOrder The input array of batch size in \ref VX_TYPE_UINT32 containing the new order for the output. - * \param [in] newSequenceLength The input scalar in \ref VX_TYPE_UINT32 containing the new sequence length. - * \param [in] sequenceLength The input scalar in \ref VX_TYPE_UINT32 containing the original sequence length. - * \param [in] sequenceCount The input scalar in \ref VX_TYPE_UINT32 containing the sequence count. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrangebatchPD(vx_graph graph, vx_image pSrc, vx_image pDst, vx_array newOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount); - - /*! - * \brief [Graph] Creates a RPP Resize Tensor function node. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input image in \ref VX_DF_IMAGE_U8 or \ref VX_DF_IMAGE_RGB format data. - * \param [in] srcImgWidth The input array of batch size in unsigned int containing the image width data. - * \param [in] srcImgHeight The input array of batch size in unsigned int containing the image height data. - * \param [out] pDst The output image data. - * \param [in] dstImgWidth The input array of batch size in unsigned int containing the width data for output image. - * \param [in] dstImgHeight The input array of batch size in unsigned int containing the height data for output image. - * \param [in] interpolation_type The resize interpolation type in \ref VX_TYPE_INT32 format containing the type of interpolation. - * \param [in] nbatchSize The input scalar in \ref VX_TYPE_UINT32 to set batch size. - * \return vx_node. - * \returns A node reference \ref vx_node. Any possible errors preventing a - * successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_int32 interpolation_type, vx_uint32 nbatchSize); - // Tensor Augmentations /*! \brief [Graph] Creates a Brightness function node. * \ingroup group_amd_rpp @@ -1863,13 +557,15 @@ extern "C" * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 format data. * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format. * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 format data. - * \param [in] pSnowValue The input array in \ref VX_TYPE_FLOAT32 format containing the snow value data. + * \param [in] pBrightnessCoefficient The input array in \ref VX_TYPE_FLOAT32 format containing the brightness coefficient (per image). Valid range: (1, 4]. + * \param [in] pSnowThreshold The input array in \ref VX_TYPE_FLOAT32 format containing the snow threshold (per image). Valid range: (0, 1]. + * \param [in] pDarkMode The input array in \ref VX_TYPE_INT32 format containing the dark mode enable/disable flag (per image). Valid values: 0/1. * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor. * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor. * \param [in] roiType The type of roi \ref VX_TYPE_INT32 denotes whether source roi is of XYWH/LTRB type. * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSnowValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType); + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pBrightnessCoefficient, vx_array pSnowThreshold, vx_array pDarkMode, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType); /*! \brief [Graph] Creates a Pixelate function node. * \ingroup group_amd_rpp diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp_version.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp_version.h index 3f3b414ce..ca7a6ccd8 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp_version.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp_version.h @@ -38,8 +38,8 @@ SOFTWARE. extern "C" { #endif #define VX_EXT_RPP_VERSION_MAJOR 3 -#define VX_EXT_RPP_VERSION_MINOR 1 -#define VX_EXT_RPP_VERSION_PATCH 7 +#define VX_EXT_RPP_VERSION_MINOR 2 +#define VX_EXT_RPP_VERSION_PATCH 0 #define VX_EXT_RPP_CHECK_VERSION(major, minor, patch) \ ((VX_EXT_RPP_VERSION_MAJOR > (major)) || \ diff --git a/amd_openvx_extensions/amd_rpp/source/image/AbsoluteDifferencebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/AbsoluteDifferencebatchPD.cpp deleted file mode 100644 index 9f9d10d33..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/AbsoluteDifferencebatchPD.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AbsoluteDifferencebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AbsoluteDifferencebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAbsoluteDifferencebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AbsoluteDifferencebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPDLocalData *data = new AbsoluteDifferencebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status AbsoluteDifferencebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifferencebatchPD", - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD, - processAbsoluteDifferencebatchPD, - 7, - validateAbsoluteDifferencebatchPD, - initializeAbsoluteDifferencebatchPD, - uninitializeAbsoluteDifferencebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/AccumulateSquaredbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/AccumulateSquaredbatchPD.cpp deleted file mode 100644 index a743f87ea..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/AccumulateSquaredbatchPD.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateSquaredbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#elif ENABLE_HIP - void *hip_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateSquaredbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateSquaredbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquaredbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateSquaredbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPDLocalData *data = new AccumulateSquaredbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status AccumulateSquaredbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquaredbatchPD", - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD, - processAccumulateSquaredbatchPD, - 5, - validateAccumulateSquaredbatchPD, - initializeAccumulateSquaredbatchPD, - uninitializeAccumulateSquaredbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/AccumulateWeightedbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/AccumulateWeightedbatchPD.cpp deleted file mode 100644 index a0c69cff9..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/AccumulateWeightedbatchPD.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateWeightedbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - vx_float32 *alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateWeightedbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateWeightedbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateWeightedbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPDLocalData *data = new AccumulateWeightedbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->alpha); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status AccumulateWeightedbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeightedbatchPD", - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD, - processAccumulateWeightedbatchPD, - 7, - validateAccumulateWeightedbatchPD, - initializeAccumulateWeightedbatchPD, - uninitializeAccumulateWeightedbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/AccumulatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/AccumulatebatchPD.cpp deleted file mode 100644 index 08a0d995f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/AccumulatebatchPD.cpp +++ /dev/null @@ -1,268 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulatebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulatebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulatebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshAccumulatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshAccumulatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshAccumulatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_accumulate_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPDLocalData *data = new AccumulatebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshAccumulatebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status AccumulatebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulatebatchPD", - VX_KERNEL_RPP_ACCUMULATEBATCHPD, - processAccumulatebatchPD, - 6, - validateAccumulatebatchPD, - initializeAccumulatebatchPD, - uninitializeAccumulatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/AddbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/AddbatchPD.cpp deleted file mode 100644 index 8c20d5f86..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/AddbatchPD.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AddbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AddbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAddbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AddbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshAddbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_add_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_add_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshAddbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_add_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_add_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshAddbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_add_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_add_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPDLocalData *data = new AddbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshAddbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status AddbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AddbatchPD", - VX_KERNEL_RPP_ADDBATCHPD, - processAddbatchPD, - 7, - validateAddbatchPD, - initializeAddbatchPD, - uninitializeAddbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BitwiseANDbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BitwiseANDbatchPD.cpp deleted file mode 100644 index 2e124292d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BitwiseANDbatchPD.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseANDbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseANDbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseANDbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseANDbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBitwiseANDbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBitwiseANDbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBitwiseANDbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPDLocalData *data = new BitwiseANDbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshBitwiseANDbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status BitwiseANDbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseANDbatchPD", - VX_KERNEL_RPP_BITWISEANDBATCHPD, - processBitwiseANDbatchPD, - 7, - validateBitwiseANDbatchPD, - initializeBitwiseANDbatchPD, - uninitializeBitwiseANDbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BitwiseNOTbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BitwiseNOTbatchPD.cpp deleted file mode 100644 index 6b963771c..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BitwiseNOTbatchPD.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseNOTbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseNOTbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseNOTbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOTbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseNOTbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBitwiseNOTbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBitwiseNOTbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBitwiseNOTbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPDLocalData *data = new BitwiseNOTbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseNOTbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status BitwiseNOTbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOTbatchPD", - VX_KERNEL_RPP_BITWISENOTBATCHPD, - processBitwiseNOTbatchPD, - 6, - validateBitwiseNOTbatchPD, - initializeBitwiseNOTbatchPD, - uninitializeBitwiseNOTbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BlendbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BlendbatchPD.cpp deleted file mode 100644 index a18cc2f5f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BlendbatchPD.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlendbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_float32 *alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlendbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlendbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blend_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blend_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blend_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blend_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blend_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blend_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPDLocalData *data = new BlendbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshBlendbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->alpha); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status BlendbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlendbatchPD", - VX_KERNEL_RPP_BLENDBATCHPD, - processBlendbatchPD, - 8, - validateBlendbatchPD, - initializeBlendbatchPD, - uninitializeBlendbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BlurbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BlurbatchPD.cpp deleted file mode 100644 index 45ead2f5d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BlurbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlurbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlurbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlurbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlurbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blur_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blur_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blur_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blur_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_blur_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_blur_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPDLocalData *data = new BlurbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshBlurbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status BlurbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlurbatchPD", - VX_KERNEL_RPP_BLURBATCHPD, - processBlurbatchPD, - 7, - validateBlurbatchPD, - initializeBlurbatchPD, - uninitializeBlurbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BoxFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BoxFilterbatchPD.cpp deleted file mode 100644 index 015e666a2..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BoxFilterbatchPD.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BoxFilterbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BoxFilterbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBoxFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BoxFilterbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBoxFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_box_filter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBoxFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_box_filter_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBoxFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_box_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPDLocalData *data = new BoxFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshBoxFilterbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status BoxFilterbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilterbatchPD", - VX_KERNEL_RPP_BOXFILTERBATCHPD, - processBoxFilterbatchPD, - 7, - validateBoxFilterbatchPD, - initializeBoxFilterbatchPD, - uninitializeBoxFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/BrightnessbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/BrightnessbatchPD.cpp deleted file mode 100644 index e8d2df22f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/BrightnessbatchPD.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BrightnessbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *alpha; - vx_float32 *beta; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBrightnessbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BrightnessbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BrightnessbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_brightness_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_brightness_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_brightness_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_brightness_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPDLocalData *data = new BrightnessbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshBrightnessbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->alpha); - free(data->beta); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; -} - -vx_status BrightnessbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BrightnessbatchPD", - VX_KERNEL_RPP_BRIGHTNESSBATCHPD, - processBrightnessbatchPD, - 8, - validateBrightnessbatchPD, - initializeBrightnessbatchPD, - uninitializeBrightnessbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/CannyEdgeDetector.cpp b/amd_openvx_extensions/amd_rpp/source/image/CannyEdgeDetector.cpp deleted file mode 100644 index f16bce930..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/CannyEdgeDetector.cpp +++ /dev/null @@ -1,268 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CannyEdgeDetectorLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u *max; - Rpp8u *min; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, CannyEdgeDetectorLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp8u), data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp8u), data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCannyEdgeDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CannyEdgeDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CannyEdgeDetectorLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshCannyEdgeDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_canny_edge_detector_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_canny_edge_detector_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshCannyEdgeDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_canny_edge_detector_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_canny_edge_detector_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCannyEdgeDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_canny_edge_detector_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_canny_edge_detector_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->max, data->min, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CannyEdgeDetectorLocalData *data = new CannyEdgeDetectorLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->max = (Rpp8u *)malloc(sizeof(Rpp8u) * data->nbatchSize); - data->min = (Rpp8u *)malloc(sizeof(Rpp8u) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshCannyEdgeDetector(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CannyEdgeDetectorLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->max); - free(data->min); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; -} - -vx_status CannyEdgeDetector_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CannyEdgeDetector", - VX_KERNEL_RPP_CANNYEDGEDETECTOR, - processCannyEdgeDetector, - 8, - validateCannyEdgeDetector, - initializeCannyEdgeDetector, - uninitializeCannyEdgeDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ChannelCombinebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ChannelCombinebatchPD.cpp deleted file mode 100644 index aa66867b6..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ChannelCombinebatchPD.cpp +++ /dev/null @@ -1,286 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelCombinebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pSrc3; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pSrc3; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pSrc3; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelCombinebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc3, sizeof(data->cl_pSrc3))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc3, sizeof(data->hip_pSrc3))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc3, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelCombinebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #4 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 5); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelCombinebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshChannelCombinebatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_channel_combine_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1),static_cast(data->cl_pSrc2),static_cast(data->cl_pSrc3),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_channel_combine_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1),static_cast(data->cl_pSrc2),static_cast(data->cl_pSrc3),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshChannelCombinebatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_channel_combine_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1),static_cast(data->hip_pSrc2),static_cast(data->hip_pSrc3),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_channel_combine_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1),static_cast(data->hip_pSrc2),static_cast(data->hip_pSrc3),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshChannelCombinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_channel_combine_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->pSrc3, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_channel_combine_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->pSrc3, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombinebatchPDLocalData *data = new ChannelCombinebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshChannelCombinebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombinebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ChannelCombinebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelCombinebatchPD", - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD, - processChannelCombinebatchPD, - 8, - validateChannelCombinebatchPD, - initializeChannelCombinebatchPD, - uninitializeChannelCombinebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ChannelExtractbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ChannelExtractbatchPD.cpp deleted file mode 100644 index ee69fb12d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ChannelExtractbatchPD.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelExtractbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *extractChannelNumber; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelExtractbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->extractChannelNumber, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelExtractbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelExtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelExtractbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshChannelExtractbatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_channel_extract_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->extractChannelNumber,data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_channel_extract_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->extractChannelNumber,data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshChannelExtractbatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_channel_extract_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->extractChannelNumber,data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_channel_extract_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->extractChannelNumber,data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshChannelExtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_channel_extract_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->extractChannelNumber, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_channel_extract_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->extractChannelNumber, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractbatchPDLocalData *data = new ChannelExtractbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->extractChannelNumber = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshChannelExtractbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->extractChannelNumber); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ChannelExtractbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelExtractbatchPD", - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD, - processChannelExtractbatchPD, - 7, - validateChannelExtractbatchPD, - initializeChannelExtractbatchPD, - uninitializeChannelExtractbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ColorTemperaturebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ColorTemperaturebatchPD.cpp deleted file mode 100644 index fb28fc86b..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ColorTemperaturebatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTemperaturebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_int32 *adjustmentValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperaturebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_int32), data->adjustmentValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTemperaturebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperaturebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTemperaturebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_color_temperature_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->adjustmentValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPDLocalData *data = new ColorTemperaturebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->adjustmentValue = (vx_int32 *)malloc(sizeof(vx_int32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshColorTemperaturebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->adjustmentValue); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ColorTemperaturebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperaturebatchPD", - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD, - processColorTemperaturebatchPD, - 7, - validateColorTemperaturebatchPD, - initializeColorTemperaturebatchPD, - uninitializeColorTemperaturebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ColorTwistbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ColorTwistbatchPD.cpp deleted file mode 100644 index adfa3c40d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ColorTwistbatchPD.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTwistbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *alpha; - vx_float32 *beta; - vx_float32 *hue; - vx_float32 *sat; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTwistbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->hue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_float32), data->sat, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTwistbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTwistbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTwistbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTwistbatchPDLocalData *data = new ColorTwistbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->hue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->sat = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - - refreshColorTwistbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTwistbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->alpha); - free(data->beta); - free(data->hue); - free(data->sat); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ColorTwistbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTwistbatchPD", - VX_KERNEL_RPP_COLORTWISTBATCHPD, - processColorTwistbatchPD, - 10, - validateColorTwistbatchPD, - initializeColorTwistbatchPD, - uninitializeColorTwistbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - unsigned short idx = 0; // For Index - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ContrastbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ContrastbatchPD.cpp deleted file mode 100644 index 56b50e10a..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ContrastbatchPD.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ContrastbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *min; - vx_uint32 *max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateContrastbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ContrastbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ContrastbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_contrast_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_contrast_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_contrast_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_contrast_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPDLocalData *data = new ContrastbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->min = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->max = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshContrastbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->min); - free(data->max); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ContrastbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ContrastbatchPD", - VX_KERNEL_RPP_CONTRASTBATCHPD, - processContrastbatchPD, - 8, - validateContrastbatchPD, - initializeContrastbatchPD, - uninitializeContrastbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/CopybatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/CopybatchPD.cpp deleted file mode 100644 index 4510923c2..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/CopybatchPD.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CopybatchPDLocalData -{ - - vxRppHandle *handle; - RppiSize dimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u device_type; - -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CopybatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCopybatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - // check scalar alpha and beta type - vx_status status = VX_SUCCESS; - vx_parameter param = vxGetParameterByIndex(node, 0); - - vx_image image; - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &image, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - status = VX_ERROR_INVALID_VALUE; - - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - - vx_uint32 height, width; - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); - - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_WIDTH, &width, sizeof(width))); - vxReleaseImage(&image); - - return status; -} - -static vx_status VX_CALLBACK processCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CopybatchPDLocalData *data = NULL; - vx_status return_status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshCopybatchPD(node, parameters, num, data); - cl_command_queue handle = data->handle->cmdq; - if (df_image == VX_DF_IMAGE_U8) - { - clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size, 0, NULL, NULL); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size * 3, 0, NULL, NULL); - } - return_status = VX_SUCCESS; -#elif ENABLE_HIP - refreshCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->hip_pDst, data->hip_pSrc, size, hipMemcpyDeviceToDevice)); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->hip_pDst, data->hip_pSrc, size * 3, hipMemcpyDeviceToDevice)); - } -#endif - } - else if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - memcpy(data->pDst, data->pSrc, size); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - memcpy(data->pDst, data->pSrc, size * 3); - } - return_status = VX_SUCCESS; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CopybatchPDLocalData *data = new CopybatchPDLocalData; - memset(data, 0, sizeof(*data)); - - refreshCopybatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status CopybatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CopybatchPD", - VX_KERNEL_RPP_COPYBATCHPD, - processCopybatchPD, - 3, - validateCopybatchPD, - initializeCopybatchPD, - uninitializeCopybatchPD); - - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/CropMirrorNormalizePD.cpp b/amd_openvx_extensions/amd_rpp/source/image/CropMirrorNormalizePD.cpp deleted file mode 100644 index dd5081a87..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/CropMirrorNormalizePD.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CropMirrorNormalizebatchPDLocalData -{ - vxRppHandle * handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *start_x; - vx_uint32 *start_y; - vx_float32 *mean; - vx_float32 *std_dev; - vx_uint32 *mirror; - vx_uint32 chnShift; //NHWC to NCHW - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CropMirrorNormalizebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_float32), data->mean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_float32), data->std_dev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->chnShift)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCropMirrorNormalizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[13], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropMirrorNormalizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CropMirrorNormalizebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CropMirrorNormalizebatchPDLocalData *data = new CropMirrorNormalizebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[13], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[12], &data->nbatchSize)); - data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->mean = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->std_dev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->mirror = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CropMirrorNormalizebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->start_x); - free(data->start_y); - free(data->mean); - free(data->std_dev); - free(data->mirror); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status CropMirrorNormalizePD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropMirrorNormalizebatchPD", - VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD, - processCropMirrorNormalizebatchPD, - 14, - validateCropMirrorNormalizebatchPD, - initializeCropMirrorNormalizebatchPD, - uninitializeCropMirrorNormalizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/CropPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/CropPD.cpp deleted file mode 100644 index 0acbbfccc..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/CropPD.cpp +++ /dev/null @@ -1,291 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CropPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *start_x; - vx_uint32 *start_y; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CropPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCropPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CropPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - - vx_int32 output_format_toggle = 0; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CropPDLocalData *data = new CropPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshCropPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CropPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->start_x); - free(data->start_y); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status CropPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropPD", - VX_KERNEL_RPP_CROPPD, - processCropPD, - 10, - validateCropPD, - initializeCropPD, - uninitializeCropPD); - - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/CustomConvolutionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/CustomConvolutionbatchPD.cpp deleted file mode 100644 index ffad893b2..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/CustomConvolutionbatchPD.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CustomConvolutionbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *kernel; - size_t kernel_arr_size; - vx_uint32 *kernelWidth; - vx_uint32 *kernelHeight; - RppiSize *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CustomConvolutionbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->kernel_arr_size, sizeof(vx_array), data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->kernelSize[i].width = data->kernelWidth[i]; - data->kernelSize[i].height = data->kernelHeight[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCustomConvolutionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolutionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CustomConvolutionbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshCustomConvolutionbatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshCustomConvolutionbatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCustomConvolutionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_custom_convolution_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernel, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernel, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPDLocalData *data = new CustomConvolutionbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &data->kernel_arr_size, sizeof(data->kernel_arr_size))); - data->kernel = (vx_array *)malloc(sizeof(vx_array) * data->kernel_arr_size); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->kernelWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->kernelHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - refreshCustomConvolutionbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelWidth); - free(data->kernelHeight); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status CustomConvolutionbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolutionbatchPD", - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD, - processCustomConvolutionbatchPD, - 9, - validateCustomConvolutionbatchPD, - initializeCustomConvolutionbatchPD, - uninitializeCustomConvolutionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/DataObjectCopybatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/DataObjectCopybatchPD.cpp deleted file mode 100644 index a458b41c7..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/DataObjectCopybatchPD.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DataObjectCopybatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, DataObjectCopybatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDataObjectCopybatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DataObjectCopybatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshDataObjectCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_data_object_copy_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshDataObjectCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_data_object_copy_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshDataObjectCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_data_object_copy_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPDLocalData *data = new DataObjectCopybatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshDataObjectCopybatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status DataObjectCopybatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopybatchPD", - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD, - processDataObjectCopybatchPD, - 6, - validateDataObjectCopybatchPD, - initializeDataObjectCopybatchPD, - uninitializeDataObjectCopybatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/DilatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/DilatebatchPD.cpp deleted file mode 100644 index 81a8af519..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/DilatebatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DilatebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, DilatebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDilatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DilatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DilatebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshDilatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_dilate_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_dilate_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshDilatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_dilate_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_dilate_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshDilatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_dilate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_dilate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPDLocalData *data = new DilatebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshDilatebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status DilatebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DilatebatchPD", - VX_KERNEL_RPP_DILATEBATCHPD, - processDilatebatchPD, - 7, - validateDilatebatchPD, - initializeDilatebatchPD, - uninitializeDilatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ErodebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ErodebatchPD.cpp deleted file mode 100644 index 98489955f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ErodebatchPD.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ErodebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ErodebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateErodebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ErodebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ErodebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshErodebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_erode_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_erode_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshErodebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_erode_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_erode_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshErodebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_erode_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_erode_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPDLocalData *data = new ErodebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshErodebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ErodebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ErodebatchPD", - VX_KERNEL_RPP_ERODEBATCHPD, - processErodebatchPD, - 7, - validateErodebatchPD, - initializeErodebatchPD, - uninitializeErodebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ExclusiveORbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ExclusiveORbatchPD.cpp deleted file mode 100644 index 7ce695191..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ExclusiveORbatchPD.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExclusiveORbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ExclusiveORbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExclusiveORbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExclusiveORbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshExclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshExclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshExclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPDLocalData *data = new ExclusiveORbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshExclusiveORbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ExclusiveORbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveORbatchPD", - VX_KERNEL_RPP_EXCLUSIVEORBATCHPD, - processExclusiveORbatchPD, - 7, - validateExclusiveORbatchPD, - initializeExclusiveORbatchPD, - uninitializeExclusiveORbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ExposurebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ExposurebatchPD.cpp deleted file mode 100644 index 8da78b752..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ExposurebatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExposurebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *exposureValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposurebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->exposureValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExposurebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExposurebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExposurebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exposure_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exposure_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_exposure_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_exposure_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->exposureValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPDLocalData *data = new ExposurebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->exposureValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshExposurebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->exposureValue); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ExposurebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExposurebatchPD", - VX_KERNEL_RPP_EXPOSUREBATCHPD, - processExposurebatchPD, - 7, - validateExposurebatchPD, - initializeExposurebatchPD, - uninitializeExposurebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/FastCornerDetector.cpp b/amd_openvx_extensions/amd_rpp/source/image/FastCornerDetector.cpp deleted file mode 100644 index 7f9497a8c..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/FastCornerDetector.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FastCornerDetectorLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *noOfPixels; - Rpp8u *threshold; - Rpp32u *nonMaxKernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, FastCornerDetectorLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->noOfPixels, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp8u), data->threshold, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u), data->nonMaxKernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFastCornerDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FastCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FastCornerDetectorLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshFastCornerDetector(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_fast_corner_detector_u8_pln1_gpu(static_cast(data->cl_pSrc),data->srcDimensions,static_cast(data->cl_pDst),data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_fast_corner_detector_u8_pkd3_gpu(static_cast(data->cl_pSrc),data->srcDimensions,static_cast(data->cl_pDst),data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshFastCornerDetector(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_fast_corner_detector_u8_pln1_gpu(static_cast(data->hip_pSrc),data->srcDimensions,static_cast(data->hip_pDst),data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_fast_corner_detector_u8_pkd3_gpu(static_cast(data->hip_pSrc),data->srcDimensions,static_cast(data->hip_pDst),data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshFastCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fast_corner_detector_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noOfPixels, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fast_corner_detector_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noOfPixels, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FastCornerDetectorLocalData *data = new FastCornerDetectorLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - data->noOfPixels = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->threshold = (Rpp8u *)malloc(sizeof(Rpp8u) * data->nbatchSize); - data->nonMaxKernelSize = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshFastCornerDetector(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FastCornerDetectorLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->noOfPixels); - free(data->threshold); - free(data->nonMaxKernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; -} - -vx_status FastCornerDetector_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FastCornerDetector", - VX_KERNEL_RPP_FASTCORNERDETECTOR, - processFastCornerDetector, - 9, - validateFastCornerDetector, - initializeFastCornerDetector, - uninitializeFastCornerDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/FisheyebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/FisheyebatchPD.cpp deleted file mode 100644 index 06c22c50d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/FisheyebatchPD.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FisheyebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FisheyebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFisheyebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FisheyebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FisheyebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fisheye_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPDLocalData *data = new FisheyebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshFisheyebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status FisheyebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FisheyebatchPD", - VX_KERNEL_RPP_FISHEYEBATCHPD, - processFisheyebatchPD, - 6, - validateFisheyebatchPD, - initializeFisheyebatchPD, - uninitializeFisheyebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/FlipbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/FlipbatchPD.cpp deleted file mode 100644 index 7939cf65c..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/FlipbatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FlipbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *flipAxis; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->flipAxis, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFlipbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FlipbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FlipbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_flip_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_flip_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_flip_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_flip_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_flip_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_flip_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->flipAxis, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPDLocalData *data = new FlipbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->flipAxis = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - refreshFlipbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->flipAxis); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status FlipbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FlipbatchPD", - VX_KERNEL_RPP_FLIPBATCHPD, - processFlipbatchPD, - 7, - validateFlipbatchPD, - initializeFlipbatchPD, - uninitializeFlipbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/FogbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/FogbatchPD.cpp deleted file mode 100644 index 93db0ed97..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/FogbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FogbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *fogValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FogbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->fogValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFogbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FogbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FogbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fog_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fog_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fog_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fog_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_fog_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_fog_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->fogValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPDLocalData *data = new FogbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->fogValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshFogbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->fogValue); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status FogbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FogbatchPD", - VX_KERNEL_RPP_FOGBATCHPD, - processFogbatchPD, - 7, - validateFogbatchPD, - initializeFogbatchPD, - uninitializeFogbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/GammaCorrectionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/GammaCorrectionbatchPD.cpp deleted file mode 100644 index ecfe82e04..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/GammaCorrectionbatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GammaCorrectionbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *gamma; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->gamma, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGammaCorrectionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GammaCorrectionbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->gamma, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->gamma, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->gamma, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->gamma, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gamma, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gamma, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPDLocalData *data = new GammaCorrectionbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->gamma = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshGammaCorrectionbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->gamma); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status GammaCorrectionbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrectionbatchPD", - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD, - processGammaCorrectionbatchPD, - 7, - validateGammaCorrectionbatchPD, - initializeGammaCorrectionbatchPD, - uninitializeGammaCorrectionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/GaussianFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/GaussianFilterbatchPD.cpp deleted file mode 100644 index 8bb6dd94e..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/GaussianFilterbatchPD.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianFilterbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianFilterbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianFilterbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshGaussianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshGaussianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshGaussianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPDLocalData *data = new GaussianFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshGaussianFilterbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->stdDev); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status GaussianFilterbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilterbatchPD", - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD, - processGaussianFilterbatchPD, - 8, - validateGaussianFilterbatchPD, - initializeGaussianFilterbatchPD, - uninitializeGaussianFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/GaussianImagePyramidbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/GaussianImagePyramidbatchPD.cpp deleted file mode 100644 index 067458ef0..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/GaussianImagePyramidbatchPD.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianImagePyramidbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianImagePyramidbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianImagePyramidbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianImagePyramidbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianImagePyramidbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidbatchPDLocalData *data = new GaussianImagePyramidbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->stdDev); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status GaussianImagePyramidbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianImagePyramidbatchPD", - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD, - processGaussianImagePyramidbatchPD, - 8, - validateGaussianImagePyramidbatchPD, - initializeGaussianImagePyramidbatchPD, - uninitializeGaussianImagePyramidbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/HarrisCornerDetector.cpp b/amd_openvx_extensions/amd_rpp/source/image/HarrisCornerDetector.cpp deleted file mode 100644 index f9493e74a..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/HarrisCornerDetector.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HarrisCornerDetectorLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *gaussianKernelSize; - Rpp32f *stdDev; - Rpp32u *kernelSize; - Rpp32f *kValue; - Rpp32f *threshold; - Rpp32u *nonMaxKernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, HarrisCornerDetectorLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->gaussianKernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32f), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32f), data->kValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32f), data->threshold, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u), data->nonMaxKernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHarrisCornerDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HarrisCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HarrisCornerDetectorLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshHarrisCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_harris_corner_detector_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_harris_corner_detector_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshHarrisCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_harris_corner_detector_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_harris_corner_detector_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HarrisCornerDetectorLocalData *data = new HarrisCornerDetectorLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - data->gaussianKernelSize = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->stdDev = (Rpp32f *)malloc(sizeof(Rpp32f) * data->nbatchSize); - data->kernelSize = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->kValue = (Rpp32f *)malloc(sizeof(Rpp32f) * data->nbatchSize); - data->threshold = (Rpp32f *)malloc(sizeof(Rpp32f) * data->nbatchSize); - data->nonMaxKernelSize = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshHarrisCornerDetector(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HarrisCornerDetectorLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->gaussianKernelSize); - free(data->stdDev); - free(data->kernelSize); - free(data->kValue); - free(data->threshold); - free(data->nonMaxKernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status HarrisCornerDetector_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HarrisCornerDetector", - VX_KERNEL_RPP_HARRISCORNERDETECTOR, - processHarrisCornerDetector, - 12, - validateHarrisCornerDetector, - initializeHarrisCornerDetector, - uninitializeHarrisCornerDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/Histogram.cpp b/amd_openvx_extensions/amd_rpp/source/image/Histogram.cpp deleted file mode 100644 index 363e05671..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/Histogram.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramLocalData -{ - vxRppHandle *handle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp32u *outputHistogram; - Rpp32u bins; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#elif ENABLE_HIP - void *hip_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->outputHistogram = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->bins)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogram(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Histogram: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -// #if ENABLE_OPENCL -// refreshHistogram(node, parameters, num, data); -// if (df_image == VX_DF_IMAGE_U8) -// { -// // rpp_status = rppi_histogram_u8_pln1_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); -// } -// else if (df_image == VX_DF_IMAGE_RGB) -// { -// // rpp_status = rppi_histogram_u8_pkd3_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); -// } -// size_t arr_size; -// STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); -// STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); -// return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -// #elif ENABLE_HIP -// refreshHistogram(node, parameters, num, data); -// if (df_image == VX_DF_IMAGE_U8) -// { -// // rpp_status = rppi_histogram_u8_pln1_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); -// } -// else if (df_image == VX_DF_IMAGE_RGB) -// { -// // rpp_status = rppi_histogram_u8_pkd3_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); -// } -// size_t arr_size; -// STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); -// STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); -// return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -// #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshHistogram(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_histogram_u8_pln1_host(data->pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_histogram_u8_pkd3_host(data->pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->handle->rppHandle); - } - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramLocalData *data = new HistogramLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogram(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status Histogram_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Histogram", - VX_KERNEL_RPP_HISTOGRAM, - processHistogram, - 4, - validateHistogram, - initializeHistogram, - uninitializeHistogram); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/HistogramBalancebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/HistogramBalancebatchPD.cpp deleted file mode 100644 index 5c1696b0b..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/HistogramBalancebatchPD.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramBalancebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramBalancebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramBalancebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalancebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramBalancebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshHistogramBalancebatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshHistogramBalancebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_histogram_balance_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPDLocalData *data = new HistogramBalancebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshHistogramBalancebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status HistogramBalancebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalancebatchPD", - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD, - processHistogramBalancebatchPD, - 6, - validateHistogramBalancebatchPD, - initializeHistogramBalancebatchPD, - uninitializeHistogramBalancebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/HistogramEqualizebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/HistogramEqualizebatchPD.cpp deleted file mode 100644 index a1e94e62f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/HistogramEqualizebatchPD.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramEqualizebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramEqualizebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramEqualizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramEqualizebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshHistogramEqualizebatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->cl_pDst),data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshHistogramEqualizebatchPD(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->nbatchSize,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc),data->srcDimensions,data->maxSrcDimensions,static_cast(data->hip_pDst),data->nbatchSize,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshHistogramEqualizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPDLocalData *data = new HistogramEqualizebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshHistogramEqualizebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status HistogramEqualizebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualizebatchPD", - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD, - processHistogramEqualizebatchPD, - 6, - validateHistogramEqualizebatchPD, - initializeHistogramEqualizebatchPD, - uninitializeHistogramEqualizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/HuebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/HuebatchPD.cpp deleted file mode 100644 index b23531af0..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/HuebatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HuebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *hueShift; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HuebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->hueShift, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHuebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HuebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HuebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->hueShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->hueShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->hueShift, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPDLocalData *data = new HuebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->hueShift = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshHuebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->hueShift); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status HuebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HuebatchPD", - VX_KERNEL_RPP_HUEBATCHPD, - processHuebatchPD, - 7, - validateHuebatchPD, - initializeHuebatchPD, - uninitializeHuebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/InclusiveORbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/InclusiveORbatchPD.cpp deleted file mode 100644 index ed0d99e4d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/InclusiveORbatchPD.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct InclusiveORbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, InclusiveORbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateInclusiveORbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - InclusiveORbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshInclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshInclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshInclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPDLocalData *data = new InclusiveORbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshInclusiveORbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status InclusiveORbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveORbatchPD", - VX_KERNEL_RPP_INCLUSIVEORBATCHPD, - processInclusiveORbatchPD, - 7, - validateInclusiveORbatchPD, - initializeInclusiveORbatchPD, - uninitializeInclusiveORbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/JitterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/JitterbatchPD.cpp deleted file mode 100644 index f39aa259e..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/JitterbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct JitterbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateJitterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: JitterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - JitterbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_jitter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_jitter_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_jitter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_jitter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPDLocalData *data = new JitterbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshJitterbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status JitterbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.JitterbatchPD", - VX_KERNEL_RPP_JITTERBATCHPD, - processJitterbatchPD, - 7, - validateJitterbatchPD, - initializeJitterbatchPD, - uninitializeJitterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/LaplacianImagePyramid.cpp b/amd_openvx_extensions/amd_rpp/source/image/LaplacianImagePyramid.cpp deleted file mode 100644 index f46e70b4f..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/LaplacianImagePyramid.cpp +++ /dev/null @@ -1,268 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LaplacianImagePyramidLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *stdDev; - Rpp32u *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num, LaplacianImagePyramidLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32f), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLaplacianImagePyramid(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LaplacianImagePyramid: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LaplacianImagePyramidLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshLaplacianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshLaplacianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshLaplacianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LaplacianImagePyramidLocalData *data = new LaplacianImagePyramidLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->stdDev = (Rpp32f *)malloc(sizeof(Rpp32f) * data->nbatchSize); - data->kernelSize = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshLaplacianImagePyramid(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LaplacianImagePyramidLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->stdDev); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; -} - -vx_status LaplacianImagePyramid_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LaplacianImagePyramid", - VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID, - processLaplacianImagePyramid, - 8, - validateLaplacianImagePyramid, - initializeLaplacianImagePyramid, - uninitializeLaplacianImagePyramid); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/LensCorrectionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/LensCorrectionbatchPD.cpp deleted file mode 100644 index ceeb92924..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/LensCorrectionbatchPD.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LensCorrectionbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *strength; - vx_float32 *zoom; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->strength, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->zoom, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLensCorrectionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LensCorrectionbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_lens_correction_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->strength, data->zoom, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPDLocalData *data = new LensCorrectionbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->strength = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->zoom = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshLensCorrectionbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->strength); - free(data->zoom); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status LensCorrectionbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrectionbatchPD", - VX_KERNEL_RPP_LENSCORRECTIONBATCHPD, - processLensCorrectionbatchPD, - 8, - validateLensCorrectionbatchPD, - initializeLensCorrectionbatchPD, - uninitializeLensCorrectionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/LocalBinaryPatternbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/LocalBinaryPatternbatchPD.cpp deleted file mode 100644 index fc2c49183..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/LocalBinaryPatternbatchPD.cpp +++ /dev/null @@ -1,260 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LocalBinaryPatternbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LocalBinaryPatternbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLocalBinaryPatternbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPatternbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LocalBinaryPatternbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPDLocalData *data = new LocalBinaryPatternbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status LocalBinaryPatternbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPatternbatchPD", - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD, - processLocalBinaryPatternbatchPD, - 6, - validateLocalBinaryPatternbatchPD, - initializeLocalBinaryPatternbatchPD, - uninitializeLocalBinaryPatternbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/LookUpTablebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/LookUpTablebatchPD.cpp deleted file mode 100644 index a76fd60d9..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/LookUpTablebatchPD.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LookUpTablebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u *lutPtr; - size_t arr_size; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LookUpTablebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->arr_size, sizeof(Rpp8u), data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLookUpTablebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTablebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LookUpTablebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshLookUpTablebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_look_up_table_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_look_up_table_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshLookUpTablebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_look_up_table_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_look_up_table_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshLookUpTablebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_look_up_table_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_look_up_table_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->lutPtr, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPDLocalData *data = new LookUpTablebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - data->arr_size = 256 * data->nbatchSize; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->lutPtr = (Rpp8u *)malloc(sizeof(Rpp8u) * data->arr_size); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshLookUpTablebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->lutPtr); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status LookUpTablebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTablebatchPD", - VX_KERNEL_RPP_LOOKUPTABLEBATCHPD, - processLookUpTablebatchPD, - 7, - validateLookUpTablebatchPD, - initializeLookUpTablebatchPD, - uninitializeLookUpTablebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MagnitudebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/MagnitudebatchPD.cpp deleted file mode 100644 index b903a7d93..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MagnitudebatchPD.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MagnitudebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MagnitudebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMagnitudebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MagnitudebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshMagnitudebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_magnitude_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshMagnitudebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_magnitude_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMagnitudebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_magnitude_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPDLocalData *data = new MagnitudebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshMagnitudebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MagnitudebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MagnitudebatchPD", - VX_KERNEL_RPP_MAGNITUDEBATCHPD, - processMagnitudebatchPD, - 7, - validateMagnitudebatchPD, - initializeMagnitudebatchPD, - uninitializeMagnitudebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MaxbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/MaxbatchPD.cpp deleted file mode 100644 index b800ee466..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MaxbatchPD.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MaxbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MaxbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMaxbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MaxbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle->cmdq; - refreshMaxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_max_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_max_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshMaxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_max_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_max_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMaxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_max_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_max_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPDLocalData *data = new MaxbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshMaxbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MaxbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MaxbatchPD", - VX_KERNEL_RPP_MAXBATCHPD, - processMaxbatchPD, - 7, - validateMaxbatchPD, - initializeMaxbatchPD, - uninitializeMaxbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MeanStddev.cpp b/amd_openvx_extensions/amd_rpp/source/image/MeanStddev.cpp deleted file mode 100644 index 6c54ca3f6..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MeanStddev.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MeanStddevLocalData -{ - vxRppHandle *handle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp32f mean; - Rpp32f stdDev; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#elif ENABLE_HIP - void *hip_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num, MeanStddevLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->mean)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMeanStddev(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_FLOAT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_FLOAT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MeanStddev: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MeanStddevLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshMeanStddev(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_mean_stddev_u8_pln1_gpu(static_cast(data->cl_pSrc),data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_mean_stddev_u8_pkd3_gpu(static_cast(data->cl_pSrc),data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - // } - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshMeanStddev(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_mean_stddev_u8_pln1_gpu(static_cast(data->hip_pSrc),data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_mean_stddev_u8_pkd3_gpu(static_cast(data->hip_pSrc),data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - // } - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMeanStddev(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_mean_stddev_u8_pln1_host(data->pSrc, data->srcDimensions, &data->mean, &data->stdDev, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_mean_stddev_u8_pkd3_host(data->pSrc, data->srcDimensions, &data->mean, &data->stdDev, data->handle->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MeanStddevLocalData *data = new MeanStddevLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMeanStddev(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MeanStddevLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MeanStddev_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MeanStddev", - VX_KERNEL_RPP_MEANSTDDEV, - processMeanStddev, - 4, - validateMeanStddev, - initializeMeanStddev, - uninitializeMeanStddev); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MedianFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/MedianFilterbatchPD.cpp deleted file mode 100644 index 729374eac..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MedianFilterbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MedianFilterbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MedianFilterbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMedianFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MedianFilterbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshMedianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_median_filter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshMedianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_median_filter_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMedianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_median_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPDLocalData *data = new MedianFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshMedianFilterbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MedianFilterbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilterbatchPD", - VX_KERNEL_RPP_MEDIANFILTERBATCHPD, - processMedianFilterbatchPD, - 7, - validateMedianFilterbatchPD, - initializeMedianFilterbatchPD, - uninitializeMedianFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MinMaxLoc.cpp b/amd_openvx_extensions/amd_rpp/source/image/MinMaxLoc.cpp deleted file mode 100644 index 04a82af46..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MinMaxLoc.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MinMaxLocLocalData -{ - vxRppHandle *handle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp8u min; - Rpp8u max; - Rpp32u minLoc; - Rpp32u maxLoc; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#elif ENABLE_HIP - void *hip_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num, MinMaxLocLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->minLoc)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMinMaxLoc(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT8) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT8) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinMaxLoc: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinMaxLocLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshMinMaxLoc(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_min_max_loc_u8_pln1_gpu(static_cast(data->cl_pSrc),data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_min_max_loc_u8_pkd3_gpu(static_cast(data->cl_pSrc),data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - // } - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshMinMaxLoc(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_min_max_loc_u8_pln1_gpu(static_cast(data->hip_pSrc),data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_min_max_loc_u8_pkd3_gpu(static_cast(data->hip_pSrc),data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - // } - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); - // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMinMaxLoc(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_min_max_loc_u8_pln1_host(data->pSrc, data->srcDimensions, &data->min, &data->max, &data->minLoc, &data->maxLoc, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_min_max_loc_u8_pkd3_host(data->pSrc, data->srcDimensions, &data->min, &data->max, &data->minLoc, &data->maxLoc, data->handle->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinMaxLocLocalData *data = new MinMaxLocLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMinMaxLoc(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinMaxLocLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MinMaxLoc_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinMaxLoc", - VX_KERNEL_RPP_MINMAXLOC, - processMinMaxLoc, - 6, - validateMinMaxLoc, - initializeMinMaxLoc, - uninitializeMinMaxLoc); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MinbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/MinbatchPD.cpp deleted file mode 100644 index 98965ce07..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MinbatchPD.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MinbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MinbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMinbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshMinbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_min_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_min_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshMinbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_min_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_min_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMinbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_min_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_min_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPDLocalData *data = new MinbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshMinbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MinbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinbatchPD", - VX_KERNEL_RPP_MINBATCHPD, - processMinbatchPD, - 7, - validateMinbatchPD, - initializeMinbatchPD, - uninitializeMinbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/MultiplybatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/MultiplybatchPD.cpp deleted file mode 100644 index 9f3237b62..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/MultiplybatchPD.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MultiplybatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MultiplybatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMultiplybatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MultiplybatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshMultiplybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_multiply_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_multiply_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshMultiplybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_multiply_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_multiply_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshMultiplybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_multiply_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_multiply_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPDLocalData *data = new MultiplybatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshMultiplybatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status MultiplybatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MultiplybatchPD", - VX_KERNEL_RPP_MULTIPLYBATCHPD, - processMultiplybatchPD, - 7, - validateMultiplybatchPD, - initializeMultiplybatchPD, - uninitializeMultiplybatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/NoisebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/NoisebatchPD.cpp deleted file mode 100644 index 1dd4e4ebf..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/NoisebatchPD.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NoisebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *noiseProbability; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NoisebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->noiseProbability, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNoisebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NoisebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NoisebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshNoisebatchPD(node, parameters, num, data); - data->noiseProbability[0] = 0.01; - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_noise_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_noise_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshNoisebatchPD(node, parameters, num, data); - data->noiseProbability[0] = 0.01; - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_noise_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_noise_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshNoisebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_noise_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_noise_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noiseProbability, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPDLocalData *data = new NoisebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->noiseProbability = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshNoisebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->noiseProbability); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status NoisebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NoisebatchPD", - VX_KERNEL_RPP_NOISEBATCHPD, - processNoisebatchPD, - 7, - validateNoisebatchPD, - initializeNoisebatchPD, - uninitializeNoisebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/NonLinearFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/NonLinearFilterbatchPD.cpp deleted file mode 100644 index 24422712b..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/NonLinearFilterbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonLinearFilterbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NonLinearFilterbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonLinearFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonLinearFilterbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshNonLinearFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshNonLinearFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshNonLinearFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPDLocalData *data = new NonLinearFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshNonLinearFilterbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status NonLinearFilterbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilterbatchPD", - VX_KERNEL_RPP_NONLINEARFILTERBATCHPD, - processNonLinearFilterbatchPD, - 7, - validateNonLinearFilterbatchPD, - initializeNonLinearFilterbatchPD, - uninitializeNonLinearFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/NonMaxSupressionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/NonMaxSupressionbatchPD.cpp deleted file mode 100644 index 7b5074d5b..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/NonMaxSupressionbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonMaxSupressionbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NonMaxSupressionbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonMaxSupressionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupressionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonMaxSupressionbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPDLocalData *data = new NonMaxSupressionbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->kernelSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status NonMaxSupressionbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupressionbatchPD", - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD, - processNonMaxSupressionbatchPD, - 7, - validateNonMaxSupressionbatchPD, - initializeNonMaxSupressionbatchPD, - uninitializeNonMaxSupressionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/NopbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/NopbatchPD.cpp deleted file mode 100644 index 620db6b6e..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/NopbatchPD.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NopbatchPDLocalData -{ - vxRppHandle *handle; - RppiSize dimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u device_type; - -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif - -#if ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK validateNopbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - // check scalar alpha and beta type - vx_status status = VX_SUCCESS; - vx_parameter param = vxGetParameterByIndex(node, 0); - - vx_image image; - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &image, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - status = VX_ERROR_INVALID_VALUE; - - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - - vx_uint32 height, width; - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); - - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_WIDTH, &width, sizeof(width))); - vxReleaseImage(&image); - - return status; -} - -static vx_status VX_CALLBACK processNopbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NopbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - unsigned size = data->dimensions.height * data->dimensions.width; - } - else if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - unsigned size = data->dimensions.height * data->dimensions.width; - } - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK initializeNopbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NopbatchPDLocalData *data = new NopbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#else - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); -#endif - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNopbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status NopbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NopbatchPD", - VX_KERNEL_RPP_NOPBATCHPD, - processNopbatchPD, - 3, - validateNopbatchPD, - initializeNopbatchPD, - uninitializeNopbatchPD); - - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/PhasebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/PhasebatchPD.cpp deleted file mode 100644 index d3379215a..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/PhasebatchPD.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PhasebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, PhasebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePhasebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PhasebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshPhasebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_phase_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_phase_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshPhasebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_phase_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_phase_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshPhasebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_phase_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_phase_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializePhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPDLocalData *data = new PhasebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshPhasebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status PhasebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PhasebatchPD", - VX_KERNEL_RPP_PHASEBATCHPD, - processPhasebatchPD, - 7, - validatePhasebatchPD, - initializePhasebatchPD, - uninitializePhasebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/PixelatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/PixelatebatchPD.cpp deleted file mode 100644 index 4f07dba61..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/PixelatebatchPD.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PixelatebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelatebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePixelatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PixelatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PixelatebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_pixelate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializePixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPDLocalData *data = new PixelatebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshPixelatebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status PixelatebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PixelatebatchPD", - VX_KERNEL_RPP_PIXELATEBATCHPD, - processPixelatebatchPD, - 6, - validatePixelatebatchPD, - initializePixelatebatchPD, - uninitializePixelatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/RainbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/RainbatchPD.cpp deleted file mode 100644 index 9c41f92b7..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/RainbatchPD.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RainbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *rainValue; - vx_uint32 *rainWidth; - vx_uint32 *rainHeight; - vx_float32 *rainTransperancy; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RainbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->rainValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->rainWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->rainHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_float32), data->rainTransperancy, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRainbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RainbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RainbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rain_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rain_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rain_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rain_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rain_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rain_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPDLocalData *data = new RainbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - data->rainValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->rainWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->rainHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->rainTransperancy = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshRainbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->rainHeight); - free(data->rainWidth); - free(data->rainTransperancy); - free(data->rainValue); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status RainbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RainbatchPD", - VX_KERNEL_RPP_RAINBATCHPD, - processRainbatchPD, - 10, - validateRainbatchPD, - initializeRainbatchPD, - uninitializeRainbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/RandomCropLetterBoxbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/RandomCropLetterBoxbatchPD.cpp deleted file mode 100644 index b8479d096..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/RandomCropLetterBoxbatchPD.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomCropLetterBoxbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomCropLetterBoxbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomCropLetterBoxbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBoxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomCropLetterBoxbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPDLocalData *data = new RandomCropLetterBoxbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->x1); - free(data->x2); - free(data->y1); - free(data->y2); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status RandomCropLetterBoxbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBoxbatchPD", - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD, - processRandomCropLetterBoxbatchPD, - 12, - validateRandomCropLetterBoxbatchPD, - initializeRandomCropLetterBoxbatchPD, - uninitializeRandomCropLetterBoxbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/RandomShadowbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/RandomShadowbatchPD.cpp deleted file mode 100644 index e40ace961..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/RandomShadowbatchPD.cpp +++ /dev/null @@ -1,295 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomShadowbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; - vx_uint32 *numberOfShadows; - vx_uint32 *maxSizeX; - vx_uint32 *maxSizeY; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomShadowbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->numberOfShadows, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->maxSizeX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->maxSizeY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomShadowbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomShadowbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshRandomShadowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_shadow_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_shadow_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshRandomShadowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_shadow_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_shadow_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshRandomShadowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_random_shadow_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_random_shadow_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPDLocalData *data = new RandomShadowbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->numberOfShadows = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->maxSizeX = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->maxSizeY = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshRandomShadowbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->x1); - free(data->x2); - free(data->y1); - free(data->y2); - free(data->numberOfShadows); - free(data->maxSizeX); - free(data->maxSizeY); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status RandomShadowbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadowbatchPD", - VX_KERNEL_RPP_RANDOMSHADOWBATCHPD, - processRandomShadowbatchPD, - 13, - validateRandomShadowbatchPD, - initializeRandomShadowbatchPD, - uninitializeRandomShadowbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/Remap.cpp b/amd_openvx_extensions/amd_rpp/source/image/Remap.cpp deleted file mode 100644 index 9986ac4ce..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/Remap.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct remapLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *rowRemap; - Rpp32u *colRemap; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshremap(vx_node node, const vx_reference *parameters, vx_uint32 num, remapLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->rowRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->colRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateremap(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: remap: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processremap(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - remapLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // refreshremap(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_remap_u8_pln1_gpu(static_cast(data->cl_pSrc),data->srcDimensions,static_cast(data->cl_pDst),data->rowRemap,data->colRemap,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_remap_u8_pkd3_gpu(static_cast(data->cl_pSrc),data->srcDimensions,static_cast(data->cl_pDst),data->rowRemap,data->colRemap,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #elif ENABLE_HIP - // refreshremap(node, parameters, num, data); - // if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_remap_u8_pln1_gpu(static_cast(data->hip_pSrc),data->srcDimensions,static_cast(data->hip_pDst),data->rowRemap,data->colRemap,data->rppHandle); - // } - // else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_remap_u8_pkd3_gpu(static_cast(data->hip_pSrc),data->srcDimensions,static_cast(data->hip_pDst),data->rowRemap,data->colRemap,data->rppHandle); - // } - // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshremap(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_remap_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rowRemap, data->colRemap, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_remap_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rowRemap, data->colRemap, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeremap(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - remapLocalData *data = new remapLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->rowRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->colRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshremap(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeremap(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - remapLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - free(data->rowRemap); - free(data->colRemap); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status remap_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.remap", - VX_KERNEL_RPP_REMAPBATCHPD, - processremap, - 8, - validateremap, - initializeremap, - uninitializeremap); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ResizeCropMirrorPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ResizeCropMirrorPD.cpp deleted file mode 100644 index 76a535f67..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ResizeCropMirrorPD.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeCropMirrorPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; - vx_uint32 *mirrorFlag; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropMirrorPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->mirrorFlag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeCropMirrorPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropMirrorPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropMirrorPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshResizeCropMirrorPD(node, parameters, num, data); - - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshResizeCropMirrorPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizeCropMirrorPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropMirrorPDLocalData *data = new ResizeCropMirrorPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->mirrorFlag = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshResizeCropMirrorPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropMirrorPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->x1); - free(data->x2); - free(data->y1); - free(data->y2); - free(data->mirrorFlag); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ResizeCropMirrorPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropMirrorPD", - VX_KERNEL_RPP_RESIZECROPMIRRORPD, - processResizeCropMirrorPD, - 13, - validateResizeCropMirrorPD, - initializeResizeCropMirrorPD, - uninitializeResizeCropMirrorPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ResizeCropbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ResizeCropbatchPD.cpp deleted file mode 100644 index 094f89900..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ResizeCropbatchPD.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeCropbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeCropbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPDLocalData *data = new ResizeCropbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshResizeCropbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->x1); - free(data->x2); - free(data->y1); - free(data->y2); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ResizeCropbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropbatchPD", - VX_KERNEL_RPP_RESIZECROPBATCHPD, - processResizeCropbatchPD, - 12, - validateResizeCropbatchPD, - initializeResizeCropbatchPD, - uninitializeResizeCropbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ResizeMirrorNormalizeTensor.cpp b/amd_openvx_extensions/amd_rpp/source/image/ResizeMirrorNormalizeTensor.cpp deleted file mode 100644 index d318acbe3..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ResizeMirrorNormalizeTensor.cpp +++ /dev/null @@ -1,317 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeMirrorNormalizeTensorLocalData { - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize maxSrcDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *mean; - vx_float32 *std_dev; - vx_uint32 *mirror; - vx_uint32 chnShift; //NHWC to NCHW - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RpptDescPtr srcDescPtr, dstDescPtr; - RpptROIPtr roiTensorPtrSrc; - RpptRoiType roiType; - RpptImagePatchPtr dstImgSize; - RpptDesc srcDesc, dstDesc; -#if ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; - RpptImagePatch *d_dstImgSize; - RpptROI *d_roiTensorPtrSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeMirrorNormalizeTensor(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeMirrorNormalizeTensorLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize * 3, sizeof(vx_float32), data->mean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize * 3, sizeof(vx_float32), data->std_dev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->chnShift)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for(int i = 0; i < data->nbatchSize; i++) { - data->roiTensorPtrSrc[i].xywhROI.roiWidth = data->srcBatch_width[i]; - data->roiTensorPtrSrc[i].xywhROI.roiHeight = data->srcBatch_height[i]; - data->dstImgSize[i].width = data->dstBatch_width[i]; - data->dstImgSize[i].height = data->dstBatch_height[i]; - data->roiTensorPtrSrc[i].xywhROI.xy.x = 0; - data->roiTensorPtrSrc[i].xywhROI.xy.y = 0; - } -#if ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->d_dstImgSize, data->dstImgSize, data->nbatchSize * sizeof(RpptImagePatch), hipMemcpyHostToDevice)); - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->d_roiTensorPtrSrc, data->roiTensorPtrSrc, data->nbatchSize * sizeof(RpptROI), hipMemcpyHostToDevice)); - } -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeMirrorNormalizeTensor(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeMirrorNormalizeTensor: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeMirrorNormalizeTensor(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeMirrorNormalizeTensorLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - -#if ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) { - refreshResizeMirrorNormalizeTensor(node, parameters, num, data); - status = rppt_resize_mirror_normalize_gpu(static_cast(data->hip_pSrc), data->srcDescPtr, static_cast(data->hip_pDst), data->dstDescPtr, data->d_dstImgSize, RpptInterpolationType::BILINEAR, data->mean, data->std_dev, data->mirror, data->d_roiTensorPtrSrc, data->roiType, data->handle->rppHandle); - return_status = (status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - return return_status; - } -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizeMirrorNormalizeTensor(node, parameters, num, data); - status = rppt_resize_mirror_normalize_host(data->pSrc, data->srcDescPtr, data->pDst, data->dstDescPtr, data->dstImgSize, RpptInterpolationType::BILINEAR, data->mean, data->std_dev, data->mirror, data->roiTensorPtrSrc, data->roiType, data->handle->rppHandle); - return_status = (status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - return return_status; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeMirrorNormalizeTensor(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeMirrorNormalizeTensorLocalData * data = new ResizeMirrorNormalizeTensorLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - data->mean = static_cast(malloc(sizeof(vx_float32) * data->nbatchSize * 3)); - data->std_dev = static_cast(malloc(sizeof(vx_float32) * data->nbatchSize * 3)); - data->mirror = static_cast(malloc(sizeof(vx_uint32) * data->nbatchSize)); - data->srcBatch_width = static_cast(malloc(sizeof(Rpp32u) * data->nbatchSize)); - data->srcBatch_height = static_cast(malloc(sizeof(Rpp32u) * data->nbatchSize)); - data->dstBatch_width = static_cast(malloc(sizeof(Rpp32u) * data->nbatchSize)); - data->dstBatch_height = static_cast(malloc(sizeof(Rpp32u) * data->nbatchSize)); - data->dstImgSize = static_cast(malloc(sizeof(RpptImagePatch) * data->nbatchSize)); - - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - - // Check if it is a RGB or single channel U8 input - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - uint ip_channel = (df_image == VX_DF_IMAGE_RGB) ? 3 : 1; - - // Initializing tensor config parameters. - data->srcDescPtr = &data->srcDesc; - data->dstDescPtr = &data->dstDesc; - - data->srcDescPtr->dataType = RpptDataType::U8; - data->dstDescPtr->dataType = RpptDataType::U8; - - // Set numDims, offset, n/c/h/w values for src/dst - data->srcDescPtr->numDims = 4; - data->dstDescPtr->numDims = 4; - data->srcDescPtr->offsetInBytes = 0; - data->dstDescPtr->offsetInBytes = 0; - data->srcDescPtr->n = data->nbatchSize; - data->srcDescPtr->h = data->maxSrcDimensions.height; - data->srcDescPtr->w = data->maxSrcDimensions.width; - data->srcDescPtr->c = ip_channel; - data->dstDescPtr->n = data->nbatchSize; - data->dstDescPtr->h = data->maxDstDimensions.height; - data->dstDescPtr->w = data->maxDstDimensions.width; - data->dstDescPtr->c = ip_channel; - - data->srcDescPtr->layout = RpptLayout::NHWC; - data->dstDescPtr->layout = RpptLayout::NHWC; - - data->srcDescPtr->strides.nStride = ip_channel * data->srcDescPtr->w * data->srcDescPtr->h; - data->srcDescPtr->strides.hStride = ip_channel * data->srcDescPtr->w; - data->srcDescPtr->strides.wStride = ip_channel; - data->srcDescPtr->strides.cStride = 1; - - data->dstDescPtr->strides.nStride = ip_channel * data->dstDescPtr->w * data->dstDescPtr->h; - data->dstDescPtr->strides.hStride = ip_channel * data->dstDescPtr->w; - data->dstDescPtr->strides.wStride = ip_channel; - data->dstDescPtr->strides.cStride = 1; - - - // Initialize ROI tensors for src/dst - data->roiTensorPtrSrc = static_cast(calloc(data->nbatchSize, sizeof(RpptROI))); - - // Set ROI tensors types for src/dst - data->roiType = RpptRoiType::XYWH; -#if ENABLE_HIP - CHECK_HIP_RETURN_STATUS(hipMalloc(&data->d_dstImgSize, data->nbatchSize * sizeof(RpptImagePatch))); - CHECK_HIP_RETURN_STATUS(hipMalloc(&data->d_roiTensorPtrSrc, data->nbatchSize * sizeof(RpptROI))); -#endif - refreshResizeMirrorNormalizeTensor(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeMirrorNormalizeTensor(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeMirrorNormalizeTensorLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - -#if ENABLE_HIP - CHECK_HIP_RETURN_STATUS(hipFree(data->d_dstImgSize)); - CHECK_HIP_RETURN_STATUS(hipFree(data->d_roiTensorPtrSrc)); -#endif - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->mean); - free(data->std_dev); - free(data->mirror); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->roiTensorPtrSrc); - free(data->dstImgSize); - delete(data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph - -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - return VX_SUCCESS; -} - -vx_status ResizeMirrorNormalizeTensor_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeMirrorNormalizeTensor", - VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR, - processResizeMirrorNormalizeTensor, - 12, - validateResizeMirrorNormalizeTensor, - initializeResizeMirrorNormalizeTensor, - uninitializeResizeMirrorNormalizeTensor); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable Device buffer access since the kernel_f callback uses Device buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ResizebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ResizebatchPD.cpp deleted file mode 100644 index ec7ad0155..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ResizebatchPD.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vx_int32 output_format_toggle = 0; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPDLocalData *data = new ResizebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshResizebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph - -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - - // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; -} - -vx_status ResizebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizebatchPD", - VX_KERNEL_RPP_RESIZEBATCHPD, - processResizebatchPD, - 8, - validateResizebatchPD, - initializeResizebatchPD, - uninitializeResizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/Resizetensor.cpp b/amd_openvx_extensions/amd_rpp/source/image/Resizetensor.cpp deleted file mode 100644 index d5036d90c..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/Resizetensor.cpp +++ /dev/null @@ -1,331 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizetensorLocalData -{ - vxRppHandle * handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RpptDescPtr srcDescPtr, dstDescPtr; - RpptROIPtr roiTensorPtrSrc; - RpptRoiType roiType; - RpptImagePatchPtr dstImgSize; - RpptDesc srcDesc, dstDesc; - RpptInterpolationType interpolation_type; -#if ENABLE_HIP - void *pSrc_dev; - void *pDst_dev; - RpptImagePatch *dstImgSize_dev; - RpptROI *roiTensorPtrSrc_dev; -#endif -}; - -static vx_status VX_CALLBACK refreshResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizetensorLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->roiTensorPtrSrc[i].xywhROI.roiWidth = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->roiTensorPtrSrc[i].xywhROI.roiHeight = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstImgSize[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstImgSize[i].height = data->dstBatch_height[i]; - data->roiTensorPtrSrc[i].xywhROI.xy.x = 0; - data->roiTensorPtrSrc[i].xywhROI.xy.y = 0; - } -#if ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->pSrc_dev, sizeof(data->pSrc_dev))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->pDst_dev, sizeof(data->pDst_dev))); - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->dstImgSize_dev, data->dstImgSize, data->nbatchSize * sizeof(RpptImagePatch), hipMemcpyHostToDevice)); - CHECK_HIP_RETURN_STATUS(hipMemcpy(data->roiTensorPtrSrc_dev, data->roiTensorPtrSrc, data->nbatchSize * sizeof(RpptROI), hipMemcpyHostToDevice)); - } -#endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizetensor(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_INT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Resizetensor: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizetensorLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - refreshResizetensor(node, parameters, num, data); - rpp_status = rppt_resize_gpu(data->pSrc_dev, data->srcDescPtr, data->pDst_dev, data->dstDescPtr, data->dstImgSize_dev, data->interpolation_type, data->roiTensorPtrSrc_dev, data->roiType, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } -#endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizetensor(node, parameters, num, data); - rpp_status = rppt_resize_host(data->pSrc, data->srcDescPtr, data->pDst, data->dstDescPtr, data->dstImgSize, data->interpolation_type, data->roiTensorPtrSrc, data->roiType, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizetensorLocalData *data = new ResizetensorLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - int interpolation_type; - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstImgSize = (RpptImagePatch *)malloc(sizeof(RpptImagePatch) * data->nbatchSize); - - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - - // Check if it is a RGB or single channel U8 input - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - uint ip_channel = (df_image == VX_DF_IMAGE_RGB) ? 3 : 1; - - // Set the interpolartion type - data->interpolation_type = (RpptInterpolationType)interpolation_type; - - // Initializing tensor config parameters. - data->srcDescPtr = &data->srcDesc; - data->dstDescPtr = &data->dstDesc; - - data->srcDescPtr->dataType = RpptDataType::U8; - data->dstDescPtr->dataType = RpptDataType::U8; - - // Set numDims, offset, n/c/h/w values for src/dst - data->srcDescPtr->numDims = 4; - data->dstDescPtr->numDims = 4; - data->srcDescPtr->offsetInBytes = 0; - data->dstDescPtr->offsetInBytes = 0; - data->srcDescPtr->n = data->nbatchSize; - data->srcDescPtr->h = data->maxSrcDimensions.height; - data->srcDescPtr->w = data->maxSrcDimensions.width; - data->srcDescPtr->c = ip_channel; - data->dstDescPtr->n = data->nbatchSize; - data->dstDescPtr->h = data->maxDstDimensions.height; - data->dstDescPtr->w = data->maxDstDimensions.width; - data->dstDescPtr->c = ip_channel; - // Set layout and n/c/h/w strides for src/dst - if(df_image == VX_DF_IMAGE_U8) // For PLN1 images - { - data->srcDescPtr->layout = RpptLayout::NCHW; - data->dstDescPtr->layout = RpptLayout::NCHW; - data->srcDescPtr->strides.nStride = ip_channel * data->srcDescPtr->w * data->srcDescPtr->h; - data->srcDescPtr->strides.cStride = data->srcDescPtr->w * data->srcDescPtr->h; - data->srcDescPtr->strides.hStride = data->srcDescPtr->w; - data->srcDescPtr->strides.wStride = 1; - data->dstDescPtr->strides.nStride = ip_channel * data->dstDescPtr->w * data->dstDescPtr->h; - data->dstDescPtr->strides.cStride = data->dstDescPtr->w * data->dstDescPtr->h; - data->dstDescPtr->strides.hStride = data->dstDescPtr->w; - data->dstDescPtr->strides.wStride = 1; - } - else // For RGB (NHWC/NCHW) images - { - data->srcDescPtr->layout = RpptLayout::NHWC; - data->dstDescPtr->layout = RpptLayout::NHWC; - data->srcDescPtr->strides.nStride = ip_channel * data->srcDescPtr->w * data->srcDescPtr->h; - data->srcDescPtr->strides.hStride = ip_channel * data->srcDescPtr->w; - data->srcDescPtr->strides.wStride = ip_channel; - data->srcDescPtr->strides.cStride = 1; - data->dstDescPtr->strides.nStride = ip_channel * data->dstDescPtr->w * data->dstDescPtr->h; - data->dstDescPtr->strides.hStride = ip_channel * data->dstDescPtr->w; - data->dstDescPtr->strides.wStride = ip_channel; - data->dstDescPtr->strides.cStride = 1; - } - - // Initialize ROI tensors for src/dst - data->roiTensorPtrSrc = static_cast(calloc(data->nbatchSize, sizeof(RpptROI))); - - // Set ROI tensors types for src/dst - data->roiType = RpptRoiType::XYWH; -#if ENABLE_HIP - CHECK_HIP_RETURN_STATUS(hipMalloc(&data->dstImgSize_dev, data->nbatchSize * sizeof(RpptImagePatch))); - CHECK_HIP_RETURN_STATUS(hipMalloc(&data->roiTensorPtrSrc_dev, data->nbatchSize * sizeof(RpptROI))); -#endif - refreshResizetensor(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->srcDescPtr->n, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizetensorLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_HIP - CHECK_HIP_RETURN_STATUS(hipFree(data->dstImgSize_dev)); - CHECK_HIP_RETURN_STATUS(hipFree(data->roiTensorPtrSrc_dev)); -#endif - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->roiTensorPtrSrc); - free(data->dstImgSize); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph - -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - return VX_SUCCESS; -} - -vx_status Resizetensor_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Resizetensor", - VX_KERNEL_RPP_RESIZETENSOR, - processResizetensor, - 9, - validateResizetensor, - initializeResizetensor, - uninitializeResizetensor); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_HIP - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/RotatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/RotatebatchPD.cpp deleted file mode 100644 index e4e9c22f1..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/RotatebatchPD.cpp +++ /dev/null @@ -1,284 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RotatebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *angle; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RotatebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->angle, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRotatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RotatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RotatebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPDLocalData *data = new RotatebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - data->angle = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshRotatebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->angle); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status RotatebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RotatebatchPD", - VX_KERNEL_RPP_ROTATEBATCHPD, - processRotatebatchPD, - 9, - validateRotatebatchPD, - initializeRotatebatchPD, - uninitializeRotatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/SaturationbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/SaturationbatchPD.cpp deleted file mode 100644 index 5e2f783ab..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/SaturationbatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SaturationbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *saturationFactor; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->saturationFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSaturationbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SaturationbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SaturationbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->saturationFactor, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->saturationFactor, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - return VX_ERROR_NOT_SUPPORTED; - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->saturationFactor, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPDLocalData *data = new SaturationbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->saturationFactor = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshSaturationbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->saturationFactor); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status SaturationbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SaturationbatchPD", - VX_KERNEL_RPP_SATURATIONBATCHPD, - processSaturationbatchPD, - 7, - validateSaturationbatchPD, - initializeSaturationbatchPD, - uninitializeSaturationbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ScalebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ScalebatchPD.cpp deleted file mode 100644 index 65d47b86a..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ScalebatchPD.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ScalebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *percentage; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ScalebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->percentage, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateScalebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ScalebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ScalebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshScalebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_scale_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_scale_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshScalebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_scale_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_scale_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshScalebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_scale_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_scale_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPDLocalData *data = new ScalebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - data->percentage = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshScalebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->percentage); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ScalebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ScalebatchPD", - VX_KERNEL_RPP_SCALEBATCHPD, - processScalebatchPD, - 9, - validateScalebatchPD, - initializeScalebatchPD, - uninitializeScalebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp deleted file mode 100644 index 7124d1c8d..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/SequenceRearrangebatchPD.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SequenceRearrangebatchPDLocalData -{ - vxRppHandle *handle; - RppiSize dimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u device_type; - vx_uint32 new_sequence_length; - vx_uint32 sequence_length; - vx_uint32 sequence_count; - vx_uint32 *new_order; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK validateSequenceRearrangebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - vx_parameter param = vxGetParameterByIndex(node, 1); - vx_image image; - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &image, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - status = VX_ERROR_INVALID_VALUE; - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - vx_uint32 height, width; - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_WIDTH, &width, sizeof(width))); - vxReleaseImage(&image); - return status; -} - -static vx_status VX_CALLBACK processSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SequenceRearrangebatchPDLocalData *data = NULL; - vx_status return_status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); -#if ENABLE_OPENCL - cl_command_queue handle = data->handle->cmdq; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_offset = dst_sequence_start_address + (dst_index * elem_size); - auto src_offset = src_sequence_start_address + (src_index * elem_size); - if (clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, src_offset, dst_offset, elem_size, 0, NULL, NULL) != CL_SUCCESS) - return VX_FAILURE; - } - } - } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_offset = dst_sequence_start_address + (dst_index * elem_size); - auto src_offset = src_sequence_start_address + (src_index * elem_size); - if (clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, src_offset, dst_offset, elem_size, 0, NULL, NULL) != CL_SUCCESS) - return VX_FAILURE; - } - } - } - return_status = VX_SUCCESS; -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->hip_pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->hip_pSrc + src_sequence_start_address + (src_index * elem_size); - hipError_t status = hipMemcpyDtoD(dst_address, src_address, elem_size); - if (status != hipSuccess) - return VX_FAILURE; - } - } - } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->hip_pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->hip_pSrc + src_sequence_start_address + (src_index * elem_size); - hipError_t status = hipMemcpyDtoD(dst_address, src_address, elem_size); - if (status != hipSuccess) - return VX_FAILURE; - } - } - } - return_status = VX_SUCCESS; -#endif - } - else if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * elem_size); - memcpy(dst_address, src_address, elem_size); - } - } - } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * elem_size); - memcpy(dst_address, src_address, elem_size); - } - } - } - return_status = VX_SUCCESS; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SequenceRearrangebatchPDLocalData *data = new SequenceRearrangebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->new_sequence_length, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->sequence_length, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->sequence_count, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - data->new_order = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->new_sequence_length); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->new_sequence_length, sizeof(vx_uint32), data->new_order, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#else - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, data->pSrc, sizeof(data->pSrc))); -#endif - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSequenceRearrangebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - return VX_SUCCESS; -} - -vx_status SequenceRearrangebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SequenceRearrangebatchPD", - VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD, - processSequenceRearrangebatchPD, - 7, - validateSequenceRearrangebatchPD, - initializeSequenceRearrangebatchPD, - uninitializeSequenceRearrangebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/SnowbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/SnowbatchPD.cpp deleted file mode 100644 index a7910031e..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/SnowbatchPD.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SnowbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *snowValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->snowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSnowbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SnowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SnowbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_snow_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_snow_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_snow_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_snow_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_snow_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_snow_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->snowValue, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPDLocalData *data = new SnowbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->snowValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshSnowbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)) - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->snowValue); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status SnowbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SnowbatchPD", - VX_KERNEL_RPP_SNOWBATCHPD, - processSnowbatchPD, - 7, - validateSnowbatchPD, - initializeSnowbatchPD, - uninitializeSnowbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/SobelbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/SobelbatchPD.cpp deleted file mode 100644 index ea66646bf..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/SobelbatchPD.cpp +++ /dev/null @@ -1,249 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SobelbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *sobelType; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SobelbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->sobelType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSobelbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SobelbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SobelbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshSobelbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->sobelType, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->sobelType, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshSobelbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->sobelType, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->sobelType, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPDLocalData *data = new SobelbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->sobelType = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshSobelbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->sobelType); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status SobelbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SobelbatchPD", - VX_KERNEL_RPP_SOBELBATCHPD, - processSobelbatchPD, - 7, - validateSobelbatchPD, - initializeSobelbatchPD, - uninitializeSobelbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/SubtractbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/SubtractbatchPD.cpp deleted file mode 100644 index 12970b396..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/SubtractbatchPD.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SubtractbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SubtractbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSubtractbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node, 1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SubtractbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshSubtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_subtract_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_subtract_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshSubtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_subtract_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_subtract_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshSubtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_subtract_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_subtract_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPDLocalData *data = new SubtractbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshSubtractbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcBatch_height); - free(data->srcBatch_width); - free(data->srcDimensions); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status SubtractbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SubtractbatchPD", - VX_KERNEL_RPP_SUBTRACTBATCHPD, - processSubtractbatchPD, - 7, - validateSubtractbatchPD, - initializeSubtractbatchPD, - uninitializeSubtractbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/TensorAdd.cpp b/amd_openvx_extensions/amd_rpp/source/image/TensorAdd.cpp deleted file mode 100644 index 6398a43e4..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/TensorAdd.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct TensorAddLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp8u *pSrc1; - Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorAddLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, - bytes, data->pSrc1, 0, NULL, NULL); - if (err) - return VX_FAILURE; - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, - bytes, data->pSrc2, 0, NULL, NULL); - if (err) - return VX_FAILURE; -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t err; - err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; - err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; -#endif - } - - return status; -} - -static vx_status VX_CALLBACK validateTensorAdd(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; -} - -static vx_status VX_CALLBACK processTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorAddLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - size_t arr_size; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshTensorAdd(node, parameters, num, data); - rpp_status = rppi_tensor_add_u8_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), static_cast(data->cl_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshTensorAdd(node, parameters, num, data); - rpp_status = rppi_tensor_add_u8_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), static_cast(data->hip_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - hipError_t err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); - if (err != hipSuccess) - return VX_FAILURE; - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshTensorAdd(node, parameters, num, data); - rpp_status = rppi_tensor_add_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return return_status; -} - -static vx_status VX_CALLBACK initializeTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorAddLocalData *data = new TensorAddLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); -#elif ENABLE_HIP - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t status; - status = hipMalloc(&data->hip_pSrc1, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pSrc2, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pDst, bytes); - if (status != hipSuccess) - return VX_FAILURE; -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshTensorAdd(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorAddLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status TensorAdd_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorAdd", - VX_KERNEL_RPP_TENSORADD, - processTensorAdd, - 6, - validateTensorAdd, - initializeTensorAdd, - uninitializeTensorAdd); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/TensorLookup.cpp b/amd_openvx_extensions/amd_rpp/source/image/TensorLookup.cpp deleted file mode 100644 index 59f92fcc7..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/TensorLookup.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct TensorLookupLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp8u *pSrc; - Rpp8u *luPtr; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorLookupLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - // Input - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - //Output - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->luPtr = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->luPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); - // tensor dim values - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc, CL_TRUE, 0, - bytes, data->pSrc, 0, NULL, NULL); -#endif - } - - return status; -} - -static vx_status VX_CALLBACK validateTensorLookup(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; -} - -static vx_status VX_CALLBACK processTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorLookupLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - size_t arr_size; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - // #if ENABLE_OPENCL - // cl_command_queue handle = data->handle->cmdq; - // refreshTensorLookup(node, parameters, num, data); - // rpp_status = rppi_tensor_look_up_table_u8_gpu(static_cast(data->cl_pSrc),static_cast(data->cl_pDst), data->tensorDimensions, data->tensorDimensionsValue,data->luPtr,data->rppHandle); - // cl_command_queue theQueue; - // theQueue = data->handle->cmdq; - // cl_int err; - // STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - // size_t bytes = arr_size * sizeof(Rpp8u); - // clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); - // #endif - return VX_ERROR_NOT_IMPLEMENTED; - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshTensorLookup(node, parameters, num, data); - rpp_status = rppi_tensor_look_up_table_u8_host(data->pSrc, data->pDst, data->luPtr, data->tensorDimensions, data->tensorDimensionsValue); - } - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return return_status; -} - -static vx_status VX_CALLBACK initializeTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorLookupLocalData *data = new TensorLookupLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle->cmdq, sizeof(data->handle->cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshTensorLookup(node, parameters, num, data); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorLookupLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status TensorLookup_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorLookup", - VX_KERNEL_RPP_TENSORLOOKUP, - processTensorLookup, - 6, - validateTensorLookup, - initializeTensorLookup, - uninitializeTensorLookup); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/TensorMatrixMultiply.cpp b/amd_openvx_extensions/amd_rpp/source/image/TensorMatrixMultiply.cpp deleted file mode 100644 index 3eb7c7f61..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/TensorMatrixMultiply.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct TensorMatrixMultiplyLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp8u *pSrc1; - Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u *tensorDimensionsValue1; - Rpp32u *tensorDimensionsValue2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorMatrixMultiplyLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[3], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue1 = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue2 = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - size_t bytes; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, - bytes, data->pSrc1, 0, NULL, NULL); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, - bytes, data->pSrc2, 0, NULL, NULL); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t err; - err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; - err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; -#endif - } - - return status; -} - -static vx_status VX_CALLBACK validateTensorMatrixMultiply(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; -} - -static vx_status VX_CALLBACK processTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorMatrixMultiplyLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - size_t arr_size; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshTensorMatrixMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_matrix_multiply_u8_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), static_cast(data->cl_pDst), data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->handle->rppHandle); - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshTensorMatrixMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_matrix_multiply_u8_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), static_cast(data->hip_pDst), data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->handle->rppHandle); - hipError_t err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); - if (err != hipSuccess) - return VX_FAILURE; - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshTensorMatrixMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_matrix_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return return_status; -} - -static vx_status VX_CALLBACK initializeTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorMatrixMultiplyLocalData *data = new TensorMatrixMultiplyLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; - size_t bytes; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); -#elif ENABLE_HIP - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t status; - status = hipMalloc(&data->hip_pSrc1, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pSrc2, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pDst, bytes); - if (status != hipSuccess) - return VX_FAILURE; -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshTensorMatrixMultiply(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorMatrixMultiplyLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status TensorMatrixMultiply_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMatrixMultiply", - VX_KERNEL_RPP_TENSORMATRIXMULTIPLY, - processTensorMatrixMultiply, - 6, - validateTensorMatrixMultiply, - initializeTensorMatrixMultiply, - uninitializeTensorMatrixMultiply); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/TensorMultiply.cpp b/amd_openvx_extensions/amd_rpp/source/image/TensorMultiply.cpp deleted file mode 100644 index 4a848ba3a..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/TensorMultiply.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct TensorMultiplyLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp8u *pSrc1; - Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorMultiplyLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, - bytes, data->pSrc1, 0, NULL, NULL); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, - bytes, data->pSrc2, 0, NULL, NULL); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t err; - err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; - err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; -#endif - } - - return status; -} - -static vx_status VX_CALLBACK validateTensorMultiply(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; -} - -static vx_status VX_CALLBACK processTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorMultiplyLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - size_t arr_size; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshTensorMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_multiply_u8_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), static_cast(data->cl_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshTensorMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_multiply_u8_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), static_cast(data->hip_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - hipError_t err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); - if (err != hipSuccess) - return VX_FAILURE; - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshTensorMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return return_status; -} - -static vx_status VX_CALLBACK initializeTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorMultiplyLocalData *data = new TensorMultiplyLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); -#elif ENABLE_HIP - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t status; - status = hipMalloc(&data->hip_pSrc1, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pSrc2, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pDst, bytes); - if (status != hipSuccess) - return VX_FAILURE; -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshTensorMultiply(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorMultiplyLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status TensorMultiply_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMultiply", - VX_KERNEL_RPP_TENSORMULTIPLY, - processTensorMultiply, - 6, - validateTensorMultiply, - initializeTensorMultiply, - uninitializeTensorMultiply); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/TensorSubtract.cpp b/amd_openvx_extensions/amd_rpp/source/image/TensorSubtract.cpp deleted file mode 100644 index 5b3802adb..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/TensorSubtract.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct TensorSubtractLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp8u *pSrc1; - Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorSubtractLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, - bytes, data->pSrc1, 0, NULL, NULL); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, - bytes, data->pSrc2, 0, NULL, NULL); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t err; - err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; - err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); - if (err != hipSuccess) - return VX_FAILURE; -#endif - } - - return status; -} - -static vx_status VX_CALLBACK validateTensorSubtract(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; -} - -static vx_status VX_CALLBACK processTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorSubtractLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - size_t arr_size; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshTensorSubtract(node, parameters, num, data); - rpp_status = rppi_tensor_subtract_u8_gpu(static_cast(data->cl_pSrc1), static_cast(data->cl_pSrc2), static_cast(data->cl_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - cl_command_queue theQueue; - theQueue = data->handle->cmdq; - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshTensorSubtract(node, parameters, num, data); - rpp_status = rppi_tensor_subtract_u8_gpu(static_cast(data->hip_pSrc1), static_cast(data->hip_pSrc2), static_cast(data->hip_pDst), data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - hipError_t err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); - if (err != hipSuccess) - return VX_FAILURE; - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshTensorSubtract(node, parameters, num, data); - rpp_status = rppi_tensor_subtract_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->handle->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); - return return_status; -} - -static vx_status VX_CALLBACK initializeTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorSubtractLocalData *data = new TensorSubtractLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue - theQueue = data->handle->cmdq; - clGetCommandQueueInfo(theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); - data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); -#elif ENABLE_HIP - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - hipError_t status; - status = hipMalloc(&data->hip_pSrc1, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pSrc2, bytes); - if (status != hipSuccess) - return VX_FAILURE; - status = hipMalloc(&data->hip_pDst, bytes); - if (status != hipSuccess) - return VX_FAILURE; -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshTensorSubtract(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, 1, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - TensorSubtractLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status TensorSubtract_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorSubtract", - VX_KERNEL_RPP_TENSORSUBTRACT, - processTensorSubtract, - 6, - validateTensorSubtract, - initializeTensorSubtract, - uninitializeTensorSubtract); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/ThresholdingbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/ThresholdingbatchPD.cpp deleted file mode 100644 index 1938f6cac..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/ThresholdingbatchPD.cpp +++ /dev/null @@ -1,271 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ThresholdingbatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint8 *min; - vx_uint8 *max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ThresholdingbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint8), data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint8), data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateThresholdingbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ThresholdingbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ThresholdingbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshThresholdingbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_thresholding_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshThresholdingbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_thresholding_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshThresholdingbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_thresholding_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPDLocalData *data = new ThresholdingbatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->min = (vx_uint8 *)malloc(sizeof(vx_uint8) * data->nbatchSize); - data->max = (vx_uint8 *)malloc(sizeof(vx_uint8) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshThresholdingbatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->min); - free(data->max); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status ThresholdingbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ThresholdingbatchPD", - VX_KERNEL_RPP_THRESHOLDINGBATCHPD, - processThresholdingbatchPD, - 8, - validateThresholdingbatchPD, - initializeThresholdingbatchPD, - uninitializeThresholdingbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/VignettebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/VignettebatchPD.cpp deleted file mode 100644 index 55c9a0564..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/VignettebatchPD.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct VignettebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, VignettebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateVignettebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: VignettebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - VignettebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_vignette_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_vignette_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_vignette_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_vignette_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPDLocalData *data = new VignettebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - refreshVignettebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->stdDev); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status VignettebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.VignettebatchPD", - VX_KERNEL_RPP_VIGNETTEBATCHPD, - processVignettebatchPD, - 7, - validateVignettebatchPD, - initializeVignettebatchPD, - uninitializeVignettebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/WarpAffinebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/WarpAffinebatchPD.cpp deleted file mode 100644 index 1cc19bb60..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/WarpAffinebatchPD.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpAffinebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *affine; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffinebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, 6 * data->nbatchSize, sizeof(vx_float32), data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpAffinebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpAffinebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_affine_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPDLocalData *data = new WarpAffinebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - data->affine = (vx_float32 *)malloc(sizeof(vx_float32) * 6 * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshWarpAffinebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->affine); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status WarpAffinebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffinebatchPD", - VX_KERNEL_RPP_WARPAFFINEBATCHPD, - processWarpAffinebatchPD, - 9, - validateWarpAffinebatchPD, - initializeWarpAffinebatchPD, - uninitializeWarpAffinebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/image/WarpPerspectivebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/image/WarpPerspectivebatchPD.cpp deleted file mode 100644 index 59a97dcdb..000000000 --- a/amd_openvx_extensions/amd_rpp/source/image/WarpPerspectivebatchPD.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/* -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpPerspectivebatchPDLocalData -{ - vxRppHandle *handle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *perspective; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpPerspectivebatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, 9 * data->nbatchSize, sizeof(vx_float32), data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpPerspectivebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspectivebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpPerspectivebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { -#if ENABLE_OPENCL - refreshWarpPerspectivebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_gpu(static_cast(data->cl_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->cl_pDst), data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#elif ENABLE_HIP - refreshWarpPerspectivebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_gpu(static_cast(data->hip_pSrc), data->srcDimensions, data->maxSrcDimensions, static_cast(data->hip_pDst), data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshWarpPerspectivebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->handle->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPDLocalData *data = new WarpPerspectivebatchPDLocalData; - memset(data, 0, sizeof(*data)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->perspective = (vx_float32 *)malloc(sizeof(vx_float32) * 9 * data->nbatchSize); - refreshWarpPerspectivebatchPD(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->nbatchSize, data->device_type)); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - free(data->perspective); - delete (data); - return VX_SUCCESS; -} - -//! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph -static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) -) -{ - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes -#if ENABLE_OPENCL - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; -} - -vx_status WarpPerspectivebatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspectivebatchPD", - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD, - processWarpPerspectivebatchPD, - 9, - validateWarpPerspectivebatchPD, - initializeWarpPerspectivebatchPD, - uninitializeWarpPerspectivebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index 092406ea2..543481efc 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -44,91 +44,6 @@ vx_status get_kernels_to_publish() vx_status status = VX_SUCCESS; Kernel_List = new Kernellist(MAX_KERNELS); -#if RPP_LEGACY_SUPPORT - STATUS_ERROR_CHECK(ADD_KERNEL(BrightnessbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(GammaCorrectionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(BlendbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(BlurbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ContrastbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(PixelatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(JitterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(SnowbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(NoisebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(RandomShadowbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(FogbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(RainbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(RandomCropLetterBoxbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ExposurebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(HistogramBalancebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(AbsoluteDifferencebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(AccumulateWeightedbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(AccumulatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(AddbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(SubtractbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MagnitudebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MultiplybatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(PhasebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(AccumulateSquaredbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(BitwiseANDbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(BitwiseNOTbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ExclusiveORbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(InclusiveORbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(Histogram_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ThresholdingbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MaxbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MinbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MinMaxLoc_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(HistogramEqualizebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MeanStddev_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(FlipbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ResizebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ResizeCropbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(RotatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffinebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(FisheyebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(LensCorrectionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ScalebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(WarpPerspectivebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(DilatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ErodebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(HuebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(SaturationbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ColorTemperaturebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(VignettebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ChannelExtractbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ChannelCombinebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(LookUpTablebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(BoxFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(SobelbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(MedianFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(CustomConvolutionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(NonMaxSupressionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(GaussianFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(NonLinearFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(LocalBinaryPatternbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(DataObjectCopybatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(GaussianImagePyramidbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(LaplacianImagePyramid_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(CannyEdgeDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(HarrisCornerDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(FastCornerDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(remap_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(TensorAdd_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(TensorSubtract_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(TensorMultiply_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(TensorMatrixMultiply_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(TensorLookup_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ColorTwistbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(CropMirrorNormalizePD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(CropPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ResizeCropMirrorPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(CopybatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(NopbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(ResizeMirrorNormalizeTensor_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(SequenceRearrangebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(Resizetensor_Register)); -#endif - //tensor STATUS_ERROR_CHECK(ADD_KERNEL(Blend_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Blur_Register)); diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index e497493a7..67d32b126 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -34,1850 +34,6 @@ vx_uint32 getGraphAffinity(vx_graph graph) return affinity.device_type; } -#if RPP_LEGACY_SUPPORT -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BrightnessbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_array beta, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)alpha, - (vx_reference)beta, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BRIGHTNESSBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_GammaCorrectionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array gamma, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)gamma, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BlendbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)alpha, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BLENDBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BlurbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BLURBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ContrastbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array min, vx_array max, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)min, - (vx_reference)max, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CONTRASTBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_PixelatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_PIXELATEBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_JitterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_JITTERBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_SnowbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array snowValue, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)snowValue, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_SNOWBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_NoisebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array noiseProbability, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)noiseProbability, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_NOISEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_RandomShadowbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_array numberOfShadows, vx_array maxSizeX, vx_array maxSizeY, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)x1, - (vx_reference)y1, - (vx_reference)x2, - (vx_reference)y2, - (vx_reference)numberOfShadows, - (vx_reference)maxSizeX, - (vx_reference)maxSizeY, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RANDOMSHADOWBATCHPD, params, 13); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_FogbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array fogValue, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)fogValue, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_FOGBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_RainbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array rainValue, vx_array rainWidth, vx_array rainHeight, vx_array rainTransperancy, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)rainValue, - (vx_reference)rainWidth, - (vx_reference)rainHeight, - (vx_reference)rainTransperancy, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RAINBATCHPD, params, 10); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBoxbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)x1, - (vx_reference)y1, - (vx_reference)x2, - (vx_reference)y2, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD, params, 12); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ExposurebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array exposureValue, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)exposureValue, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_EXPOSUREBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_HistogramBalancebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_AbsoluteDifferencebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_AccumulateWeightedbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_array alpha, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)alpha, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_AccumulatebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ACCUMULATEBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_AddbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ADDBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_SubtractbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_SUBTRACTBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MagnitudebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MAGNITUDEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MultiplybatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MULTIPLYBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_PhasebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_PHASEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_AccumulateSquaredbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD, params, 5); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BitwiseANDbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BITWISEANDBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BitwiseNOTbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BITWISENOTBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ExclusiveORbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_EXCLUSIVEORBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_InclusiveORbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_INCLUSIVEORBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_Histogram(vx_graph graph, vx_image pSrc, vx_array outputHistogram, vx_scalar bins) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)outputHistogram, - (vx_reference)bins, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_HISTOGRAM, params, 4); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array min, vx_array max, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)min, - (vx_reference)max, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_THRESHOLDINGBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MaxbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MAXBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MinbatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MINBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MinMaxLoc(vx_graph graph, vx_image pSrc, vx_scalar min, vx_scalar max, vx_scalar minLoc, vx_scalar maxLoc) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)min, - (vx_reference)max, - (vx_reference)minLoc, - (vx_reference)maxLoc, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MINMAXLOC, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_HistogramEqualizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MeanStddev(vx_graph graph, vx_image pSrc, vx_scalar mean, vx_scalar stdDev) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)mean, - (vx_reference)stdDev, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MEANSTDDEV, params, 4); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_FlipbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array flipAxis, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)flipAxis, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_FLIPBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RESIZEBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_int32 interpolation_type, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_scalar INTERPOLATION_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_INT32, &interpolation_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)INTERPOLATION_TYPE, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RESIZETENSOR, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)x1, - (vx_reference)y1, - (vx_reference)x2, - (vx_reference)y2, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RESIZECROPBATCHPD, params, 12); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_RotatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array angle, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)angle, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ROTATEBATCHPD, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array affine, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)affine, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_WARPAFFINEBATCHPD, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_FisheyebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_FISHEYEBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_LensCorrectionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array strength, vx_array zoom, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)strength, - (vx_reference)zoom, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_LENSCORRECTIONBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ScalebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array percentage, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)percentage, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_SCALEBATCHPD, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array perspective, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)perspective, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_DilatebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_DILATEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ErodebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_ERODEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_HuebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array hueShift, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)hueShift, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_HUEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_SaturationbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array saturationFactor, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)saturationFactor, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_SATURATIONBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ColorTemperaturebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array adjustmentValue, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)adjustmentValue, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_VignettebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)stdDev, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_VIGNETTEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ChannelExtractbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array extractChannelNumber, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)extractChannelNumber, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ChannelCombinebatchPD(vx_graph graph, vx_image pSrc1, vx_image pSrc2, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pSrc3, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pSrc3, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_LookUpTablebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array lutPtr, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)lutPtr, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_LOOKUPTABLEBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_BoxFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_BOXFILTERBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_SobelbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array sobelType, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)sobelType, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_SOBELBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_MedianFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_MEDIANFILTERBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CustomConvolutionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernel, vx_array kernelWidth, vx_array kernelHeight, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernel, - (vx_reference)kernelWidth, - (vx_reference)kernelHeight, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_NonMaxSupressionbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_GaussianFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)stdDev, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_NonLinearFilterbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_NONLINEARFILTERBATCHPD, params, 7); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_LocalBinaryPatternbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_DataObjectCopybatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_GaussianImagePyramidbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array stdDev, vx_array kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)stdDev, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_LaplacianImagePyramid(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_scalar stdDev, vx_scalar kernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)stdDev, - (vx_reference)kernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CannyEdgeDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array max, vx_array min, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)max, - (vx_reference)min, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CANNYEDGEDETECTOR, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_HarrisCornerDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight,vx_image pDst, vx_array gaussianKernelSize, vx_array stdDev, vx_array kernelSize, vx_array kValue, vx_array threshold, vx_array nonMaxKernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)gaussianKernelSize, - (vx_reference)stdDev, - (vx_reference)kernelSize, - (vx_reference)kValue, - (vx_reference)threshold, - (vx_reference)nonMaxKernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_HARRISCORNERDETECTOR, params, 12); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_FastCornerDetector(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array noOfPixels, vx_array threshold, vx_array nonMaxKernelSize, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)noOfPixels, - (vx_reference)threshold, - (vx_reference)nonMaxKernelSize, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_FASTCORNERDETECTOR, params, 9); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_remap(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array rowRemap, vx_array colRemap, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)rowRemap, - (vx_reference)colRemap, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_REMAPBATCHPD, params, 8); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_TensorAdd(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)pDst, - (vx_reference)tensorDimensions, - (vx_reference)tensorDimensionValues, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_TENSORADD, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_TensorSubtract(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)pDst, - (vx_reference)tensorDimensions, - (vx_reference)tensorDimensionValues, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_TENSORSUBTRACT, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_TensorMultiply(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_scalar tensorDimensions, vx_array tensorDimensionValues) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)pDst, - (vx_reference)tensorDimensions, - (vx_reference)tensorDimensionValues, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_TENSORMULTIPLY, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_TensorMatrixMultiply(vx_graph graph, vx_array pSrc1, vx_array pSrc2, vx_array pDst, vx_array tensorDimensionValues1, vx_array tensorDimensionValues2) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc1, - (vx_reference)pSrc2, - (vx_reference)pDst, - (vx_reference)tensorDimensionValues1, - (vx_reference)tensorDimensionValues2, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_TENSORMATRIXMULTIPLY, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_TensorLookup(vx_graph graph, vx_array pSrc, vx_array pDst, vx_array lutPtr, vx_scalar tensorDimensions, vx_array tensorDimensionValues) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)pDst, - (vx_reference)lutPtr, - (vx_reference)tensorDimensions, - (vx_reference)tensorDimensionValues, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_TENSORLOOKUP, params, 6); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ColorTwistbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array alpha, vx_array beta, vx_array hue, vx_array sat, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)alpha, - (vx_reference)beta, - (vx_reference)hue, - (vx_reference)sat, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_COLORTWISTBATCHPD, params, 10); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CropMirrorNormalizebatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array mean, vx_array std_dev, vx_array flip, vx_scalar chnShift, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)x1, - (vx_reference)y1, - (vx_reference)mean, - (vx_reference)std_dev, - (vx_reference)flip, - (vx_reference)chnShift, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD, params, 14); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CropPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)x1, - (vx_reference)y1, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_CROPPD, params, 10); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizeCropMirrorPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_array mirror, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)srcImgWidth, - (vx_reference)srcImgHeight, - (vx_reference)pDst, - (vx_reference)dstImgWidth, - (vx_reference)dstImgHeight, - (vx_reference)x1, - (vx_reference)x2, - (vx_reference)y1, - (vx_reference)y2, - (vx_reference)mirror, - (vx_reference)NBATCHSIZE, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_RESIZECROPMIRRORPD, params, 13); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizeMirrorNormalizeTensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array mean, vx_array std_dev, vx_array flip, vx_scalar chnShift, vx_uint32 nbatchSize) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if(vxGetStatus((vx_reference)context) == VX_SUCCESS) { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize); - vx_reference params[] = { - (vx_reference) pSrc, - (vx_reference) srcImgWidth, - (vx_reference) srcImgHeight, - (vx_reference) pDst, - (vx_reference) dstImgWidth, - (vx_reference) dstImgHeight, - (vx_reference) mean, - (vx_reference) std_dev, - (vx_reference) flip, - (vx_reference) chnShift, - (vx_reference) NBATCHSIZE, - (vx_reference) DEV_TYPE - }; - node = createNode(graph, VX_KERNEL_RPP_RESIZEMIRRORNORMALIZETENSOR, params, 12); - } - return node; -} - -VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_CopybatchPD(vx_graph graph, vx_image pSrc, vx_image pDst) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)pDst, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_COPYBATCHPD, params, 3); - } - return node; -} - -//Creating node for Pixelate effect -VX_API_CALL vx_node VX_API_CALL vxExtrppNode_NopbatchPD(vx_graph graph, vx_image pSrc, vx_image pDst) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)pDst, - (vx_reference)DEV_TYPE}; - node = createNode(graph, VX_KERNEL_RPP_NOPBATCHPD, params, 3); - } - return node; -} - -VX_API_CALL vx_node VX_API_CALL vxExtrppNode_SequenceRearrangebatchPD(vx_graph graph, vx_image pSrc, vx_image pDst, vx_array pNewOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if(vxGetStatus((vx_reference)context) == VX_SUCCESS) { - vx_uint32 devType = getGraphAffinity(graph); - vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); - vx_scalar newSequenceLengthScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &newSequenceLength); - vx_scalar sequenceLengthScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceLength); - vx_scalar sequenceCountScalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceCount); - vx_reference params[] = { - (vx_reference) pSrc, - (vx_reference) pDst, - (vx_reference) pNewOrder, - (vx_reference) newSequenceLengthScalar, - (vx_reference) sequenceLengthScalar, - (vx_reference) sequenceCountScalar, - (vx_reference) deviceType - }; - node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGEBATCHPD, params, 7); - } - return node; -} -#endif - //tensor VX_API_ENTRY vx_node VX_API_CALL vxExtRppBrightness(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pAlpha, vx_array pBeta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) { @@ -2571,7 +727,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSaturation(vx_graph graph, vx_tensor pS return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pSnowValue, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) { +VX_API_ENTRY vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pBrightnessCoefficient, vx_array pSnowThreshold, vx_array pDarkMode, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { @@ -2581,12 +737,14 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSnow(vx_graph graph, vx_tensor pSrc, vx (vx_reference)pSrc, (vx_reference)pSrcRoi, (vx_reference)pDst, - (vx_reference)pSnowValue, + (vx_reference)pBrightnessCoefficient, + (vx_reference)pSnowThreshold, + (vx_reference)pDarkMode, (vx_reference)inputLayout, (vx_reference)outputLayout, (vx_reference)roiType, (vx_reference)deviceType}; - node = createNode(graph, VX_KERNEL_RPP_SNOW, params, 8); + node = createNode(graph, VX_KERNEL_RPP_SNOW, params, 10); } return node; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp index d56c0a03f..28ebb957a 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/FishEye.cpp @@ -35,8 +35,6 @@ struct FishEyeLocalData { vxTensorLayout outputLayout; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS]; - RppiSize *pSrcDimensions; - RppiSize maxSrcDimensions; }; static vx_status VX_CALLBACK refreshFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num, FishEyeLocalData *data) { @@ -57,17 +55,12 @@ static vx_status VX_CALLBACK refreshFishEye(vx_node node, const vx_reference *pa STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } data->pSrcRoi = reinterpret_cast(roi_tensor_ptr); - // Fill width and height array with ROI data required by RPP batchPD kernels - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth; - data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight; - } if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) { unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for (int f = 0; f < num_of_frames; f++) { - data->pSrcDimensions[index + f] = data->pSrcDimensions[n]; + data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI; } } } @@ -122,19 +115,11 @@ static vx_status VX_CALLBACK processFishEye(vx_node node, const vx_reference *pa #if ENABLE_OPENCL return_status = VX_ERROR_NOT_IMPLEMENTED; #elif ENABLE_HIP - if (data->pSrcDesc->c == 1) { - rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle); - } else { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle); - } + rpp_status = rppt_fisheye_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - if (data->pSrcDesc->c == 1) { - rpp_status = rppi_fisheye_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle); - } else { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSrcDesc->n, data->handle->rppHandle); - } + rpp_status = rppt_fisheye_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -172,9 +157,6 @@ static vx_status VX_CALLBACK initializeFishEye(vx_node node, const vx_reference data->pDstDesc->offsetInBytes = 0; fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims); - data->maxSrcDimensions.height = data->pSrcDesc->h; - data->maxSrcDimensions.width = data->pSrcDesc->w; - data->pSrcDimensions = new RppiSize[data->pSrcDesc->n]; refreshFishEye(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -184,7 +166,6 @@ static vx_status VX_CALLBACK initializeFishEye(vx_node node, const vx_reference static vx_status VX_CALLBACK uninitializeFishEye(vx_node node, const vx_reference *parameters, vx_uint32 num) { FishEyeLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - delete[] data->pSrcDimensions; delete data->pSrcDesc; delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp index 09c69e763..abb5c7df1 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Hue.cpp @@ -36,8 +36,6 @@ struct HueLocalData { vxTensorLayout outputLayout; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS]; - RppiSize *pSrcDimensions; - RppiSize maxSrcDimensions; }; static vx_status VX_CALLBACK refreshHue(vx_node node, const vx_reference *parameters, vx_uint32 num, HueLocalData *data) { @@ -59,18 +57,13 @@ static vx_status VX_CALLBACK refreshHue(vx_node node, const vx_reference *parame STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } data->pSrcRoi = reinterpret_cast(roi_tensor_ptr); - // Fill width and height array with ROI data required by RPP batchPD kernels - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth; - data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight; - } if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) { unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for (unsigned f = 0; f < num_of_frames; f++) { data->pHueShift[index + f] = data->pHueShift[n]; - data->pSrcDimensions[index + f] = data->pSrcDimensions[n]; + data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI; } } } @@ -128,11 +121,11 @@ static vx_status VX_CALLBACK processHue(vx_node node, const vx_reference *parame #if ENABLE_OPENCL return VX_ERROR_NOT_IMPLEMENTED; #elif ENABLE_HIP - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pHueShift, data->pSrcDesc->n, data->handle->rppHandle); + rpp_status = rppt_hue_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pHueShift, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pHueShift, data->pSrcDesc->n, data->handle->rppHandle); + rpp_status = rppt_hue_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pHueShift, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -170,10 +163,13 @@ static vx_status VX_CALLBACK initializeHue(vx_node node, const vx_reference *par data->pDstDesc->offsetInBytes = 0; fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims); - data->maxSrcDimensions.height = data->pSrcDesc->h; - data->maxSrcDimensions.width = data->pSrcDesc->w; - data->pSrcDimensions = new RppiSize[data->pSrcDesc->n]; - data->pHueShift = new vx_float32[data->pSrcDesc->n]; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + data->pHueShift = new vx_float32[data->pSrcDesc->n]; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostMalloc(&data->pHueShift, data->pSrcDesc->n * sizeof(vx_float32))); +#endif + } refreshHue(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -183,8 +179,13 @@ static vx_status VX_CALLBACK initializeHue(vx_node node, const vx_reference *par static vx_status VX_CALLBACK uninitializeHue(vx_node node, const vx_reference *parameters, vx_uint32 num) { HueLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - delete[] data->pHueShift; - delete[] data->pSrcDimensions; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + delete[] data->pHueShift; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostFree(data->pHueShift)); +#endif + } delete data->pSrcDesc; delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp index e51cd84ff..620f6f048 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Saturation.cpp @@ -36,8 +36,6 @@ struct SaturationLocalData { vxTensorLayout outputLayout; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS]; - RppiSize *pSrcDimensions; - RppiSize maxSrcDimensions; }; static vx_status VX_CALLBACK refreshSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationLocalData *data) { @@ -59,18 +57,13 @@ static vx_status VX_CALLBACK refreshSaturation(vx_node node, const vx_reference STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } data->pSrcRoi = reinterpret_cast(roi_tensor_ptr); - // Fill width and height array with ROI data required by RPP batchPD kernels - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth; - data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight; - } if (data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW) { unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for (unsigned f = 0; f < num_of_frames; f++) { data->pSaturationFactor[index + f] = data->pSaturationFactor[n]; - data->pSrcDimensions[index + f] = data->pSrcDimensions[n]; + data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI; } } } @@ -128,11 +121,11 @@ static vx_status VX_CALLBACK processSaturation(vx_node node, const vx_reference #if ENABLE_OPENCL return_status = VX_ERROR_NOT_IMPLEMENTED; #elif ENABLE_HIP - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSaturationFactor, data->inputTensorDims[0], data->handle->rppHandle); + rpp_status = rppt_saturation_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSaturationFactor, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSaturationFactor, data->inputTensorDims[0], data->handle->rppHandle); + rpp_status = rppt_saturation_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSaturationFactor, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -170,11 +163,13 @@ static vx_status VX_CALLBACK initializeSaturation(vx_node node, const vx_referen data->pDstDesc->offsetInBytes = 0; fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims); - data->pSaturationFactor = new vx_float32[data->pSrcDesc->n]; - data->pSrcDimensions = new RppiSize[data->pSrcDesc->n]; - - data->maxSrcDimensions.height = data->pSrcDesc->h; - data->maxSrcDimensions.width = data->pSrcDesc->w; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + data->pSaturationFactor = new vx_float32[data->pSrcDesc->n]; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostMalloc(&data->pSaturationFactor, data->pSrcDesc->n * sizeof(vx_float32))); +#endif + } refreshSaturation(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -184,8 +179,13 @@ static vx_status VX_CALLBACK initializeSaturation(vx_node node, const vx_referen static vx_status VX_CALLBACK uninitializeSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) { SaturationLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - delete[] data->pSaturationFactor; - delete[] data->pSrcDimensions; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + delete[] data->pSaturationFactor; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostFree(data->pSaturationFactor)); +#endif + } delete data->pSrcDesc; delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp index a7e9e77f2..99ecd8e4c 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Snow.cpp @@ -27,7 +27,9 @@ struct SnowLocalData { vx_uint32 deviceType; RppPtr_t pSrc; RppPtr_t pDst; - vx_float32 *pSnowValue; + vx_float32 *pBrightnessCoefficient; + vx_float32 *pSnowThreshold; + vx_int32 *pDarkMode; RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; RpptROI *pSrcRoi; @@ -36,13 +38,13 @@ struct SnowLocalData { vxTensorLayout outputLayout; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t ouputTensorDims[RPP_MAX_TENSOR_DIMS]; - RppiSize *pSrcDimensions; - RppiSize maxSrcDimensions; }; static vx_status VX_CALLBACK refreshSnow(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowLocalData *data) { vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSnowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pBrightnessCoefficient, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->pSnowThreshold, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_int32), data->pDarkMode, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); void *roi_tensor_ptr; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { @@ -59,17 +61,14 @@ static vx_status VX_CALLBACK refreshSnow(vx_node node, const vx_reference *param STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } data->pSrcRoi = reinterpret_cast(roi_tensor_ptr); - // Fill width and height array with ROI data required by RPP batchPD kernels - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDimensions[i].width = data->pSrcRoi[i].xywhROI.roiWidth; - data->pSrcDimensions[i].height = data->pSrcRoi[i].xywhROI.roiHeight; - } - if ((data->inputLayout == 2 || data->inputLayout == 3)) { // For NFCHW and NFHWC formats + if ((data->inputLayout == vxTensorLayout::VX_NFHWC || data->inputLayout == vxTensorLayout::VX_NFCHW)) { // For NFCHW and NFHWC formats unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' for (int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for (unsigned f = 0; f < num_of_frames; f++) { - data->pSnowValue[index + f] = data->pSnowValue[n]; + data->pBrightnessCoefficient[index + f] = data->pBrightnessCoefficient[n]; + data->pSnowThreshold[index + f] = data->pSnowThreshold[n]; + data->pDarkMode[index + f] = data->pDarkMode[n]; data->pSrcRoi[index + f].xywhROI = data->pSrcRoi[n].xywhROI; } } @@ -80,18 +79,18 @@ static vx_status VX_CALLBACK refreshSnow(vx_node node, const vx_reference *param static vx_status VX_CALLBACK validateSnow(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { vx_status status = VX_SUCCESS; vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_INT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_INT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_INT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) + if (scalar_type != VX_TYPE_INT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type); // Check for input tensor size_t num_tensor_dims; @@ -125,19 +124,11 @@ static vx_status VX_CALLBACK processSnow(vx_node node, const vx_reference *param #if ENABLE_OPENCL return_status = VX_ERROR_NOT_IMPLEMENTED; #elif ENABLE_HIP - if (data->pSrcDesc->c == 1) { - rpp_status = rppi_snow_u8_pln1_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle); - } else { - rpp_status = rppi_snow_u8_pkd3_batchPD_gpu(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle); - } + rpp_status = rppt_snow_gpu(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pBrightnessCoefficient, data->pSnowThreshold, data->pDarkMode, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - if (data->pSrcDesc->c == 1) { - rpp_status = rppi_snow_u8_pln1_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle); - } else { - rpp_status = rppi_snow_u8_pkd3_batchPD_host(data->pSrc, data->pSrcDimensions, data->maxSrcDimensions, data->pDst, data->pSnowValue, data->pSrcDesc->n, data->handle->rppHandle); - } + rpp_status = rppt_snow_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pBrightnessCoefficient, data->pSnowThreshold, data->pDarkMode, data->pSrcRoi, data->roiType, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -149,10 +140,10 @@ static vx_status VX_CALLBACK initializeSnow(vx_node node, const vx_reference *pa vx_enum input_tensor_dtype, output_tensor_dtype; vx_int32 roi_type, input_layout, output_layout; - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &output_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &roi_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); data->roiType = static_cast(roi_type); data->inputLayout = static_cast(input_layout); data->outputLayout = static_cast(output_layout); @@ -175,10 +166,17 @@ static vx_status VX_CALLBACK initializeSnow(vx_node node, const vx_reference *pa data->pDstDesc->offsetInBytes = 0; fillDescriptionPtrfromDims(data->pDstDesc, data->outputLayout, data->ouputTensorDims); - data->pSnowValue = new vx_float32[data->pSrcDesc->n]; - data->pSrcDimensions = new RppiSize[data->pSrcDesc->n]; - data->maxSrcDimensions.height = data->pSrcDesc->h; - data->maxSrcDimensions.width = data->pSrcDesc->w; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + data->pBrightnessCoefficient = new vx_float32[data->pSrcDesc->n]; + data->pSnowThreshold = new vx_float32[data->pSrcDesc->n]; + data->pDarkMode = new vx_int32[data->pSrcDesc->n]; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostMalloc(&data->pBrightnessCoefficient, data->pSrcDesc->n * sizeof(vx_float32))); + CHECK_HIP_RETURN_STATUS(hipHostMalloc(&data->pSnowThreshold, data->pSrcDesc->n * sizeof(vx_float32))); + CHECK_HIP_RETURN_STATUS(hipHostMalloc(&data->pDarkMode, data->pSrcDesc->n * sizeof(vx_int32))); +#endif + } refreshSnow(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -188,8 +186,17 @@ static vx_status VX_CALLBACK initializeSnow(vx_node node, const vx_reference *pa static vx_status VX_CALLBACK uninitializeSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) { SnowLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - delete[] data->pSnowValue; - delete[] data->pSrcDimensions; + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + delete[] data->pBrightnessCoefficient; + delete[] data->pSnowThreshold; + delete[] data->pDarkMode; + } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_HIP + CHECK_HIP_RETURN_STATUS(hipHostFree(data->pBrightnessCoefficient)); + CHECK_HIP_RETURN_STATUS(hipHostFree(data->pSnowThreshold)); + CHECK_HIP_RETURN_STATUS(hipHostFree(data->pDarkMode)); +#endif + } delete data->pSrcDesc; delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); @@ -220,7 +227,7 @@ vx_status Snow_Register(vx_context context) { vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Snow", VX_KERNEL_RPP_SNOW, processSnow, - 8, + 10, validateSnow, initializeSnow, uninitializeSnow); @@ -241,11 +248,13 @@ vx_status Snow_Register(vx_context context) { PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); // brightnessCoefficient + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); // snowThreshold + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); // darkMode + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); // inputLayout + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); // outputLayout + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); // roiType + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); // deviceType PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } if (status != VX_SUCCESS) {