diff --git a/Multi_Frame_Flow/external_packages/channelnorm_package/src/ChannelNorm_kernel.cu b/Multi_Frame_Flow/external_packages/channelnorm_package/src/ChannelNorm_kernel.cu index 9976af9..cbf4df3 100644 --- a/Multi_Frame_Flow/external_packages/channelnorm_package/src/ChannelNorm_kernel.cu +++ b/Multi_Frame_Flow/external_packages/channelnorm_package/src/ChannelNorm_kernel.cu @@ -1,8 +1,8 @@ #include #include -#define CUDA_NUM_THREADS 512 -#define THREADS_PER_BLOCK 64 +#define CUDA_NUM_THREADS 512 +#define THREADS_PER_BLOCK 64 #define DIM0(TENSOR) ((TENSOR).x) #define DIM1(TENSOR) ((TENSOR).y) @@ -82,7 +82,7 @@ __global__ void kernel_ChannelNorm_backward_input1(const int n, const float* inp void ChannelNorm_kernel_forward(THCState* state, THCudaTensor* input1, THCudaTensor* output, int norm_deg) { int n = 0; - + const long4 input1_size = make_long4(input1->size[0], input1->size[1], input1->size[2], input1->size[3]); const long4 input1_stride = make_long4(input1->stride[0], input1->stride[1], input1->stride[2], input1->stride[3]); @@ -90,8 +90,8 @@ void ChannelNorm_kernel_forward(THCState* state, THCudaTensor* input1, THCudaTen const long4 output_stride = make_long4(output->stride[0], output->stride[1], output->stride[2], output->stride[3]); n = THCudaTensor_nElement(state, output); - kernel_ChannelNorm_updateOutput<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>( - n, THCudaTensor_data(state, input1), input1_size, input1_stride, THCudaTensor_data(state, output), output_size, output_stride, + kernel_ChannelNorm_updateOutput<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, c10::cuda::getCurrentCUDAStream() >>>( + n, THCudaTensor_data(state, input1), input1_size, input1_stride, THCudaTensor_data(state, output), output_size, output_stride, norm_deg); THCudaCheck(cudaGetLastError()); @@ -113,7 +113,7 @@ void ChannelNorm_kernel_backward(THCState* state, THCudaTensor* input1, THCudaTe const long4 gradInput1_stride = make_long4(gradInput1->stride[0], gradInput1->stride[1], gradInput1->stride[2], gradInput1->stride[3]); n = THCudaTensor_nElement(state, gradInput1); - kernel_ChannelNorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>( + kernel_ChannelNorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, c10::cuda::getCurrentCUDAStream() >>>( n, THCudaTensor_data(state, input1), input1_size, input1_stride, THCudaTensor_data(state, output), output_size, output_stride, THCudaTensor_data(state, gradOutput), gradOutput_size, gradOutput_stride, THCudaTensor_data(state, gradInput1), gradInput1_size, gradInput1_stride, norm_deg diff --git a/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c b/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c index 8d30005..f7a3d69 100644 --- a/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c +++ b/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c @@ -15,7 +15,7 @@ int corr1d_cuda_forward(THCudaTensor *input1, int stride1, int stride2, int corr_type_multiply - //single_direction=0 + //single_direction=0 ) { @@ -44,7 +44,7 @@ int corr1d_cuda_forward(THCudaTensor *input1, int x_shift = -neighborhood_grid_radius_; // Number of output channels amounts to displacement combinations in X direction only!! - int nOutputPlane = neighborhood_grid_width_;//Same, because 1D X-correlation + int nOutputPlane = neighborhood_grid_width_;//Same, because 1D X-correlation // Inputs float * input1_data = THCudaTensor_data(state, input1); @@ -64,7 +64,7 @@ int corr1d_cuda_forward(THCudaTensor *input1, float * rbot1_data = THCudaTensor_data(state, rbot1); float * rbot2_data = THCudaTensor_data(state, rbot2); - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); int pwidthheight = paddedbottomwidth * paddedbottomheight; @@ -145,7 +145,7 @@ int corr1d_cuda_backward(THCudaTensor *input1, int pwidthheight = paddedbottomwidth * paddedbottomheight; - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); blob_rearrange_ongpu_1d(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); diff --git a/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c b/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c index de1cd2a..c104194 100644 --- a/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c +++ b/Multi_Frame_Flow/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c @@ -62,7 +62,7 @@ int corr_cuda_forward(THCudaTensor *input1, float * rbot1_data = THCudaTensor_data(state, rbot1); float * rbot2_data = THCudaTensor_data(state, rbot2); - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); int pwidthheight = paddedbottomwidth * paddedbottomheight; @@ -141,7 +141,7 @@ int corr_cuda_backward(THCudaTensor *input1, int pwidthheight = paddedbottomwidth * paddedbottomheight; - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); blob_rearrange_ongpu(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/build.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/build.py deleted file mode 100755 index fab5993..0000000 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/build.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import torch -from torch.utils.ffi import create_extension - - -sources = ['correlation_package/src/corr.c'] -headers = ['correlation_package/src/corr.h'] - -sources += ['correlation_package/src/corr1d.c'] -headers += ['correlation_package/src/corr1d.h'] - -defines = [] -with_cuda = False - -if torch.cuda.is_available(): - print('Including CUDA code.') - sources += ['correlation_package/src/corr_cuda.c'] - headers += ['correlation_package/src/corr_cuda.h'] - - sources += ['correlation_package/src/corr1d_cuda.c'] - headers += ['correlation_package/src/corr1d_cuda.h'] - - defines += [('WITH_CUDA', None)] - with_cuda = True - -this_file = os.path.dirname(os.path.realpath(__file__)) -extra_objects = ['correlation_package/src/corr_cuda_kernel.cu.o'] -extra_objects += ['correlation_package/src/corr1d_cuda_kernel.cu.o'] -extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] - -ffi = create_extension( - 'correlation_package._ext.corr', - package=True, - headers=headers, - sources=sources, - define_macros=defines, - relative_to=__file__, - with_cuda=with_cuda, - extra_objects=extra_objects, -) - -if __name__ == '__main__': - ffi.build() diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/corr/__init__.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/corr/__init__.py index a2b8205..e69de29 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/corr/__init__.py +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/corr/__init__.py @@ -1,12 +0,0 @@ - -from torch.utils.ffi import _wrap_function -from ._corr import lib as _lib, ffi as _ffi - -__all__ = [] -def _import_symbols(locals): - for symbol in dir(_lib): - fn = getattr(_lib, symbol) - locals[symbol] = _wrap_function(fn, _ffi) - __all__.append(symbol) - -_import_symbols(locals()) diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/corr.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/corr.py index 04172ad..4e78239 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/corr.py +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/corr.py @@ -13,10 +13,11 @@ def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, st self.stride2 = stride2 self.corr_multiply = corr_multiply + @staticmethod def forward(self, input1, input2): self.save_for_backward(input1, input2) - + rbot1 = input1.new() rbot2 = input2.new() output = input1.new() @@ -33,6 +34,7 @@ def forward(self, input1, input2): return output + @staticmethod def backward(self, grad_output): input1, input2 = self.saved_tensors @@ -71,6 +73,7 @@ def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, st self.stride2 = stride2 self.corr_multiply = corr_multiply + @staticmethod def forward(self, input1, input2): self.save_for_backward(input1, input2) @@ -91,6 +94,7 @@ def forward(self, input1, input2): return output + @staticmethod def backward(self, grad_output): input1, input2 = self.saved_tensors diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/corr.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/corr.py index 99611d9..e4992d4 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/corr.py +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/corr.py @@ -16,6 +16,7 @@ def __init__(self, pad_size=None, kernel_size=None, max_displacement=None, def reset_params(self): return + @staticmethod def forward(self, input1, input2): return correlation(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2) @@ -40,6 +41,7 @@ def __init__(self, pad_size=None, kernel_size=None, max_displacement=None, def reset_params(self): return + @staticmethod def forward(self, input1, input2): return correlation1d(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2) diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c index 8d30005..e5e4620 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c @@ -15,17 +15,17 @@ int corr1d_cuda_forward(THCudaTensor *input1, int stride1, int stride2, int corr_type_multiply - //single_direction=0 + //single_direction=0 ) { // TODO: Shapechecks - int batchSize = input1->size[0]; + int batchSize = input1->size(0); - long nInputPlane = input1->size[1]; - long nInputRows = input1->size[2]; - long nInputCols = input1->size[3]; + long nInputPlane = input1->size(1); + long nInputRows = input1->size(2); + long nInputCols = input1->size(3); long inputWidthHeight = nInputRows * nInputCols; long kernel_radius_ = (kernel_size - 1) / 2; @@ -44,7 +44,7 @@ int corr1d_cuda_forward(THCudaTensor *input1, int x_shift = -neighborhood_grid_radius_; // Number of output channels amounts to displacement combinations in X direction only!! - int nOutputPlane = neighborhood_grid_width_;//Same, because 1D X-correlation + int nOutputPlane = neighborhood_grid_width_;//Same, because 1D X-correlation // Inputs float * input1_data = THCudaTensor_data(state, input1); @@ -64,7 +64,7 @@ int corr1d_cuda_forward(THCudaTensor *input1, float * rbot1_data = THCudaTensor_data(state, rbot1); float * rbot2_data = THCudaTensor_data(state, rbot2); - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); int pwidthheight = paddedbottomwidth * paddedbottomheight; @@ -103,10 +103,10 @@ int corr1d_cuda_backward(THCudaTensor *input1, float * input1_data = THCudaTensor_data(state, input1); float * input2_data = THCudaTensor_data(state, input2); - long nInputCols = input1->size[3]; - long nInputRows = input1->size[2]; - long nInputPlane = input1->size[1]; - long batchSize = input1->size[0]; + long nInputCols = input1->size(3); + long nInputRows = input1->size(2); + long nInputPlane = input1->size(1); + long batchSize = input1->size(0); // THCudaTensor_resizeAs(state, gradInput1, input1); // THCudaTensor_resizeAs(state, gradInput2, input2); @@ -145,7 +145,7 @@ int corr1d_cuda_backward(THCudaTensor *input1, int pwidthheight = paddedbottomwidth * paddedbottomheight; - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); blob_rearrange_ongpu_1d(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c index de1cd2a..1b9325d 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c @@ -20,11 +20,11 @@ int corr_cuda_forward(THCudaTensor *input1, // TODO: Shapechecks - int batchSize = input1->size[0]; + int batchSize = input1->size(0); - long nInputPlane = input1->size[1]; - long nInputRows = input1->size[2]; - long nInputCols = input1->size[3]; + long nInputPlane = input1->size(1); + long nInputRows = input1->size(2); + long nInputCols = input1->size(3); long inputWidthHeight = nInputRows * nInputCols; long kernel_radius_ = (kernel_size - 1) / 2; @@ -62,7 +62,7 @@ int corr_cuda_forward(THCudaTensor *input1, float * rbot1_data = THCudaTensor_data(state, rbot1); float * rbot2_data = THCudaTensor_data(state, rbot2); - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); int pwidthheight = paddedbottomwidth * paddedbottomheight; @@ -100,10 +100,10 @@ int corr_cuda_backward(THCudaTensor *input1, float * input1_data = THCudaTensor_data(state, input1); float * input2_data = THCudaTensor_data(state, input2); - long nInputCols = input1->size[3]; - long nInputRows = input1->size[2]; - long nInputPlane = input1->size[1]; - long batchSize = input1->size[0]; + long nInputCols = input1->size(3); + long nInputRows = input1->size(2); + long nInputPlane = input1->size(1); + long batchSize = input1->size(0); // THCudaTensor_resizeAs(state, gradInput1, input1); // THCudaTensor_resizeAs(state, gradInput2, input2); @@ -141,7 +141,7 @@ int corr_cuda_backward(THCudaTensor *input1, int pwidthheight = paddedbottomwidth * paddedbottomheight; - cudaStream_t stream = THCState_getCurrentStream(state); + cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); blob_rearrange_ongpu(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/setup.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/setup.py index 6a02058..86b64f1 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/setup.py +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/setup.py @@ -4,8 +4,23 @@ from setuptools import setup, find_packages +import torch +from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension + this_file = os.path.dirname(__file__) + +corr_source = ['correlation_package/src/corr.c', 'correlation_package/src/corr1d.c'] +corr_includes = ['correlation_package/src/'] + +if torch.cuda.is_available(): + print('Including CUDA code.') + ext_fnct = CUDAExtension + corr_source += ['correlation_package/src/corr_cuda.c', 'correlation_package/src/corr1d_cuda.c'] + corr_source += ['correlation_package/src/corr_cuda_kernel.cu', 'correlation_package/src/corr1d_cuda_kernel.cu'] +else: + ext_fnct = CppExtension + setup( name="correlation_package", version="0.1", @@ -13,15 +28,16 @@ url="https://github.com/jbarker-nvidia/pytorch-correlation", author="Jon Barker", author_email="jbarker@nvidia.com", - # Require cffi - install_requires=["cffi>=1.0.0"], - setup_requires=["cffi>=1.0.0"], # Exclude the build files. packages=find_packages(exclude=["build"]), # Package where to put the extensions. Has to be a prefix of build.py ext_package="", # Extensions to compile - cffi_modules=[ - os.path.join(this_file, "build.py:ffi") + ext_modules=[ + ext_fnct( + 'correlation_package._ext.corr', + corr_source, include_dirs=corr_includes, + extra_compile_args={'cxx': ['-std=c++14']},), ], + cmdclass={'build_ext': BuildExtension} ) diff --git a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/test/test.py b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/test/test.py index 60c5e68..214d0da 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/test/test.py +++ b/PyTorch/external_packages/correlation-pytorch-master/correlation-pytorch/test/test.py @@ -99,7 +99,7 @@ def test_correlation1d_0(): model2 = correlation1d(1, 1, 1, 1, 1, 1) - y2 = model2(A_, B_) + y2 = model2.apply(A_, B_) print(y2) # should be 1x3x2x2 return @@ -113,7 +113,7 @@ def test_correlation1d(): #import pdb; pdb.set_trace() model = correlation1d(20, 1, 20, 1, 1, 1) - y = model(A_, B_) + y = model.apply(A_, B_) print(y.size()) print('Functional interface test passed') diff --git a/PyTorch/external_packages/correlation-pytorch-master/make_cuda.sh b/PyTorch/external_packages/correlation-pytorch-master/make_cuda.sh index b2294e4..65bfb62 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/make_cuda.sh +++ b/PyTorch/external_packages/correlation-pytorch-master/make_cuda.sh @@ -1,13 +1,7 @@ #!/usr/bin/env bash -CUDA_PATH=/usr/local/cuda-8.0 - cd correlation-pytorch/correlation_package/src echo "Compiling correlation layer kernels by nvcc..." -# TODO (JEB): Check which arches we need -nvcc -c -o corr_cuda_kernel.cu.o corr_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 -nvcc -c -o corr1d_cuda_kernel.cu.o corr1d_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 - cd ../../ -python setup.py build install +python setup.py build install --user diff --git a/PyTorch/external_packages/correlation-pytorch-master/readme.MD b/PyTorch/external_packages/correlation-pytorch-master/readme.MD index 6a6c83c..723cffc 100755 --- a/PyTorch/external_packages/correlation-pytorch-master/readme.MD +++ b/PyTorch/external_packages/correlation-pytorch-master/readme.MD @@ -2,13 +2,8 @@ This repository contains a custom pytorch package that adds a module and functio To install: -1. Run `pip install cffi` - -2. Run `make_cuda.sh` - -3. Run `python setup.py build install` - -4. (optional) Run `python test/test.py` +1. Run `make_cuda.sh` +2. (optional) Run `python test/test.py` #### Acknowledgement diff --git a/PyTorch/script_pwc.py b/PyTorch/script_pwc.py index 85fe32f..d5a1936 100755 --- a/PyTorch/script_pwc.py +++ b/PyTorch/script_pwc.py @@ -4,14 +4,14 @@ import numpy as np from math import ceil from torch.autograd import Variable -from scipy.ndimage import imread +from imageio import imread import models """ Contact: Deqing Sun (deqings@nvidia.com); Zhile Ren (jrenzhile@gmail.com) """ def writeFlowFile(filename,uv): """ - According to the matlab code of Deqing Sun and c++ source code of Daniel Scharstein + According to the matlab code of Deqing Sun and c++ source code of Daniel Scharstein Contact: dqsun@cs.brown.edu Contact: schar@middlebury.edu """ @@ -56,12 +56,12 @@ def writeFlowFile(filename,uv): for _i, _inputs in enumerate(im_all): im_all[_i] = im_all[_i][:, :, ::-1] im_all[_i] = 1.0 * im_all[_i]/255.0 - + im_all[_i] = np.transpose(im_all[_i], (2, 0, 1)) im_all[_i] = torch.from_numpy(im_all[_i]) - im_all[_i] = im_all[_i].expand(1, im_all[_i].size()[0], im_all[_i].size()[1], im_all[_i].size()[2]) + im_all[_i] = im_all[_i].expand(1, im_all[_i].size()[0], im_all[_i].size()[1], im_all[_i].size()[2]) im_all[_i] = im_all[_i].float() - + im_all = torch.autograd.Variable(torch.cat(im_all,1).cuda(), volatile=True) net = models.pwc_dc_net(pwc_model_fn) @@ -72,8 +72,8 @@ def writeFlowFile(filename,uv): flo = flo[0] * 20.0 flo = flo.cpu().data.numpy() -# scale the flow back to the input size -flo = np.swapaxes(np.swapaxes(flo, 0, 1), 1, 2) # +# scale the flow back to the input size +flo = np.swapaxes(np.swapaxes(flo, 0, 1), 1, 2) # u_ = cv2.resize(flo[:,:,0],(W,H)) v_ = cv2.resize(flo[:,:,1],(W,H)) u_ *= W/ float(W_)