@@ -94,13 +94,14 @@ namespace cv { namespace cudev {
9494 __host__ UniqueTexture (const size_t sizeInBytes, T* data, const bool normalizedCoords = false , const cudaTextureFilterMode filterMode = cudaFilterModePoint,
9595 const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
9696 {
97- create (1 , static_cast < int >( sizeInBytes/ sizeof (T)) , data, sizeInBytes , normalizedCoords, filterMode, addressMode, readMode);
97+ create (sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode);
9898 }
9999
100100 __host__ ~UniqueTexture () {
101101 if (tex != cudaTextureObject_t ()) {
102102 try {
103103 CV_CUDEV_SAFE_CALL (cudaDestroyTextureObject (tex));
104+ CV_CUDEV_SAFE_CALL (cudaFree (internalSrc));
104105 }
105106 catch (const cv::Exception& ex) {
106107 std::ostringstream os;
@@ -132,39 +133,62 @@ namespace cv { namespace cudev {
132133 __host__ explicit operator bool () const noexcept { return tex != cudaTextureObject_t (); }
133134
134135 private:
136+ __host__ void createTextureObject (cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
137+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
138+ {
139+ cudaTextureDesc texDescr;
140+ std::memset (&texDescr, 0 , sizeof (texDescr));
141+ texDescr.normalizedCoords = normalizedCoords;
142+ texDescr.filterMode = filterMode;
143+ texDescr.addressMode [0 ] = addressMode;
144+ texDescr.addressMode [1 ] = addressMode;
145+ texDescr.addressMode [2 ] = addressMode;
146+ texDescr.readMode = readMode;
147+ CV_CUDEV_SAFE_CALL (cudaCreateTextureObject (&tex, &texRes, &texDescr, 0 ));
148+ }
149+
150+ template <class T1 >
151+ __host__ void create (const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
152+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
153+ {
154+ cudaResourceDesc texRes;
155+ std::memset (&texRes, 0 , sizeof (texRes));
156+ texRes.resType = cudaResourceTypeLinear;
157+ texRes.res .linear .devPtr = data;
158+ texRes.res .linear .sizeInBytes = sizeInBytes;
159+ texRes.res .linear .desc = cudaCreateChannelDesc<T1>();
160+ createTextureObject (texRes, normalizedCoords, filterMode, addressMode, readMode);
161+ }
162+
163+ __host__ void create (const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
164+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
165+ {
166+ create<uint2>(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode);
167+ }
135168
136169 template <class T1 >
137170 __host__ void create (const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
138171 const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
139172 {
140173 cudaResourceDesc texRes;
141174 std::memset (&texRes, 0 , sizeof (texRes));
142- if (rows == 1 ) {
143- CV_Assert (rows == 1 && cols*sizeof (T) == step);
144- texRes.resType = cudaResourceTypeLinear;
145- texRes.res .linear .devPtr = data;
146- texRes.res .linear .sizeInBytes = step;
147- texRes.res .linear .desc = cudaCreateChannelDesc<T1>();
175+ texRes.resType = cudaResourceTypePitch2D;
176+ texRes.res .pitch2D .height = rows;
177+ texRes.res .pitch2D .width = cols;
178+ // temporary fix for single row/columns until TexturePtr is reworked
179+ if (rows == 1 || cols == 1 ) {
180+ size_t dStep = 0 ;
181+ CV_CUDEV_SAFE_CALL (cudaMallocPitch (&internalSrc, &dStep, cols * sizeof (T1), rows));
182+ CV_CUDEV_SAFE_CALL (cudaMemcpy2D (internalSrc, dStep, data, step, cols * sizeof (T1), rows, cudaMemcpyDeviceToDevice));
183+ texRes.res .pitch2D .devPtr = internalSrc;
184+ texRes.res .pitch2D .pitchInBytes = dStep;
148185 }
149186 else {
150- texRes.resType = cudaResourceTypePitch2D;
151187 texRes.res .pitch2D .devPtr = data;
152- texRes.res .pitch2D .height = rows;
153- texRes.res .pitch2D .width = cols;
154188 texRes.res .pitch2D .pitchInBytes = step;
155- texRes.res .pitch2D .desc = cudaCreateChannelDesc<T1>();
156189 }
157-
158- cudaTextureDesc texDescr;
159- std::memset (&texDescr, 0 , sizeof (texDescr));
160- texDescr.normalizedCoords = normalizedCoords;
161- texDescr.filterMode = filterMode;
162- texDescr.addressMode [0 ] = addressMode;
163- texDescr.addressMode [1 ] = addressMode;
164- texDescr.addressMode [2 ] = addressMode;
165- texDescr.readMode = readMode;
166-
167- CV_CUDEV_SAFE_CALL (cudaCreateTextureObject (&tex, &texRes, &texDescr, 0 ));
190+ texRes.res .pitch2D .desc = cudaCreateChannelDesc<T1>();
191+ createTextureObject (texRes, normalizedCoords, filterMode, addressMode, readMode);
168192 }
169193
170194 __host__ void create (const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
@@ -175,6 +199,7 @@ namespace cv { namespace cudev {
175199
176200 private:
177201 cudaTextureObject_t tex;
202+ T* internalSrc = 0 ;
178203 };
179204
180205 /* * @brief sharable smart CUDA texture object
@@ -250,9 +275,9 @@ namespace cv { namespace cudev {
250275 {
251276 }
252277
253- __host__ TextureOff (PtrStepSz<T> src, const int yoff = 0 , const int xoff = 0 , const bool normalizedCoords = false , const cudaTextureFilterMode filterMode = cudaFilterModePoint,
278+ __host__ TextureOff (PtrStepSz<T> src, const int yoff_ = 0 , const int xoff_ = 0 , const bool normalizedCoords = false , const cudaTextureFilterMode filterMode = cudaFilterModePoint,
254279 const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) :
255- TextureOff(src.rows, src.cols, src.data, src.step, yoff, xoff , normalizedCoords, filterMode, addressMode, readMode)
280+ TextureOff(src.rows, src.cols, src.data, src.step, yoff_, xoff_ , normalizedCoords, filterMode, addressMode, readMode)
256281 {
257282 }
258283
0 commit comments