Skip to content

Commit 7ab8666

Browse files
committed
Add ROI layer
1 parent 56f76d6 commit 7ab8666

File tree

16 files changed

+923
-1
lines changed

16 files changed

+923
-1
lines changed
Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.ipynb_checkpoints/
2+
build/
3+
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Hint from TensorLayer
2+
- From `https://github.com/deepsense-ai/roi-pooling`
3+
- Date 31 Aug 2017
4+
- Remember to modify the `CUDA_LIB` in Makefile before `python setup.py install`.
5+
- Make sure `roi_pooling_example.py` and `test_roi_layer.py` is runable.
6+
7+
8+
----
9+
10+
11+
## RoI pooling in TensorFlow
12+
13+
This repo contains the implementation of **Region of Interest pooling** as a custom TensorFlow operation. The CUDA code responsible for the computations was largely taken from the original [Caffe implementation by Ross Girshick](https://github.com/rbgirshick/fast-rcnn).
14+
15+
For more information about RoI pooling you can check out [Region of interest pooling explained](https://deepsense.io/region-of-interest-pooling-explained/) at our [deepsense.io](https://deepsense.io/) blog.
16+
17+
![Region of Interest Pooling animation](roi_pooling_animation.gif)
18+
19+
20+
## Requirements
21+
22+
To compile and use `roi_pooling` layer you need to have:
23+
24+
* [CUDA](https://developer.nvidia.com/cuda-toolkit) (tested with 8.0)
25+
* [https://www.tensorflow.org/](TensorFlow) (tested with 0.12.0 and 1.0.0)
26+
27+
Only official TensorFlow releases are currently supported. If you're using a custom built TensorFlow compiled with a different GCC version (e.g. 5.X) you may need to modify the makefile to [enable the new ABI version](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html).
28+
29+
30+
## Install
31+
32+
Since it uses compilation
33+
34+
```bash
35+
$ git clone [email protected]:deepsense-io/roi-pooling.git
36+
$ cd roi-pooling
37+
$ python setup.py install
38+
```
39+
40+
Right now we provide only GPU implementation (no CPU at this time).
41+
42+
43+
## Usage
44+
45+
After successful installation you can use the operation like this:
46+
47+
```python
48+
from roi_pooling.roi_pooling_ops import roi_pooling
49+
50+
# here obtain feature map and regions of interest
51+
rpooling = roi_pooling(feature_map, rois, 7, 7)
52+
# continue the model
53+
```
54+
55+
Working example in Jupyter Notebook: [examples/roi_pooling_minimal_example.ipynb](https://github.com/deepsense-io/roi-pooling/blob/master/examples/roi_pooling_minimal_example.ipynb)
56+

tensorlayer/third_party/roi_pooling/examples/__init__.py

Whitespace-only changes.
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"* blog post: [Region of interest pooling explained - deepsense.io](https://deepsense.io/region-of-interest-pooling-explained/)\n",
8+
"* repository: [deepsense-io/roi-pooling](https://github.com/deepsense-io/roi-pooling)"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 1,
14+
"metadata": {
15+
"collapsed": true
16+
},
17+
"outputs": [],
18+
"source": [
19+
"from __future__ import print_function\n",
20+
"\n",
21+
"import tensorflow as tf\n",
22+
"import numpy as np\n",
23+
"\n",
24+
"from roi_pooling.roi_pooling_ops import roi_pooling"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": 2,
30+
"metadata": {
31+
"collapsed": true
32+
},
33+
"outputs": [],
34+
"source": [
35+
"# 4x4 feature map with only 1 channel\n",
36+
"input_value = [[\n",
37+
" [[1], [2], [4], [4]],\n",
38+
" [[3], [4], [1], [2]],\n",
39+
" [[6], [2], [1], [7]],\n",
40+
" [[1], [3], [2], [8]]\n",
41+
"]]\n",
42+
"input_value = np.asarray(input_value, dtype='float32')"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": 3,
48+
"metadata": {
49+
"collapsed": true
50+
},
51+
"outputs": [],
52+
"source": [
53+
"# regions of interest as lists of:\n",
54+
"# feature map index, upper left, bottom right coordinates\n",
55+
"rois_value = [\n",
56+
" [0, 0, 0, 1, 3],\n",
57+
" [0, 2, 2, 3, 3],\n",
58+
" [0, 1, 0, 3, 2]\n",
59+
"]\n",
60+
"rois_value = np.asarray(rois_value, dtype='int32')\n",
61+
"\n",
62+
"# in this case we have 3 RoI pooling operations:\n",
63+
"# * channel 0, rectangular region (0, 0) to (1, 3)\n",
64+
"# xx..\n",
65+
"# xx..\n",
66+
"# xx..\n",
67+
"# xx..\n",
68+
"#\n",
69+
"# * channel 0, rectangular region (2, 2) to (3, 3)\n",
70+
"# ....\n",
71+
"# ....\n",
72+
"# ..xx\n",
73+
"# ..xx\n",
74+
"# * channel 0, rectangular region (1, 0) to (3, 2)\n",
75+
"# ....\n",
76+
"# xxx.\n",
77+
"# xxx.\n",
78+
"# xxx."
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": 4,
84+
"metadata": {
85+
"collapsed": false
86+
},
87+
"outputs": [
88+
{
89+
"name": "stdout",
90+
"output_type": "stream",
91+
"text": [
92+
"[[[[ 3. 4.]\n",
93+
" [ 6. 3.]]]\n",
94+
"\n",
95+
"\n",
96+
" [[[ 1. 7.]\n",
97+
" [ 2. 8.]]]\n",
98+
"\n",
99+
"\n",
100+
" [[[ 4. 4.]\n",
101+
" [ 4. 7.]]]]\n"
102+
]
103+
}
104+
],
105+
"source": [
106+
"input_featuremap = tf.placeholder(tf.float32)\n",
107+
"rois = tf.placeholder(tf.int32)\n",
108+
"input_const = tf.constant(input_value, tf.float32)\n",
109+
"rois_const = tf.constant(rois_value, tf.int32)\n",
110+
"y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2)\n",
111+
"\n",
112+
"with tf.Session('') as sess:\n",
113+
" y_output = sess.run(y, feed_dict={input_featuremap: input_value, rois: rois_value})\n",
114+
" print(y_output)"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {
121+
"collapsed": true
122+
},
123+
"outputs": [],
124+
"source": []
125+
}
126+
],
127+
"metadata": {
128+
"kernelspec": {
129+
"display_name": "Python 2",
130+
"language": "python",
131+
"name": "python2"
132+
},
133+
"language_info": {
134+
"codemirror_mode": {
135+
"name": "ipython",
136+
"version": 2
137+
},
138+
"file_extension": ".py",
139+
"mimetype": "text/x-python",
140+
"name": "python",
141+
"nbconvert_exporter": "python",
142+
"pygments_lexer": "ipython2",
143+
"version": "2.7.12"
144+
}
145+
},
146+
"nbformat": 4,
147+
"nbformat_minor": 2
148+
}
Binary file not shown.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
TF_INC = $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
2+
CUDA_LIB = /usr/local/cuda-8.0/lib64
3+
4+
all: clean build test
5+
6+
build: roi_pooling.so
7+
8+
roi_pooling.cu.o: roi_pooling.cu.cc
9+
nvcc -std=c++11 -c -o $@ $? -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -D _GLIBCXX_USE_CXX11_ABI=0
10+
11+
roi_pooling.so: roi_pooling.cc roi_pooling.cu.o
12+
g++ -std=c++11 -shared -o $@ $? -I $(TF_INC) -fPIC -lcudart -L$(CUDA_LIB) -D _GLIBCXX_USE_CXX11_ABI=0
13+
14+
test: build
15+
python roi_pooling_test.py
16+
17+
clean:
18+
rm -f *.o *.so *.pyc *.npy

tensorlayer/third_party/roi_pooling/roi_pooling/__init__.py

Whitespace-only changes.
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#include "tensorflow/core/framework/op.h"
2+
#include "tensorflow/core/framework/op_kernel.h"
3+
#include <cstdio>
4+
#include <iostream>
5+
#include <typeinfo>
6+
7+
using namespace tensorflow;
8+
using namespace std;
9+
10+
REGISTER_OP("RoiPooling")
11+
.Input("input: float32")
12+
.Input("rois: int32")
13+
.Attr("pool_height: int")
14+
.Attr("pool_width: int")
15+
.Output("output: float32")
16+
.Output("argmax_output: int32");
17+
18+
19+
#define Dtype float
20+
21+
void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width,
22+
int pooled_height, int pooled_width, Dtype* output, int* argmax_output);
23+
24+
// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it
25+
26+
class RoiPoolingOp : public OpKernel {
27+
private:
28+
int pool_height_, pool_width_;
29+
public:
30+
explicit RoiPoolingOp(OpKernelConstruction* context) : OpKernel(context) {
31+
OP_REQUIRES_OK(context,
32+
context->GetAttr("pool_height", &pool_height_));
33+
34+
OP_REQUIRES_OK(context,
35+
context->GetAttr("pool_width", &pool_width_));
36+
}
37+
38+
39+
void Compute(OpKernelContext* context) override {
40+
// Grab the input tensor
41+
const Tensor& input_tensor = context->input(0);
42+
const Tensor& rois_tensor = context->input(1);
43+
44+
auto input = input_tensor.flat<float>();
45+
auto rois = rois_tensor.flat<int32>();
46+
47+
// Create an output tensor
48+
Tensor* output_tensor = NULL;
49+
Tensor* argmax_output_tensor = NULL;
50+
51+
auto input_shape = input_tensor.shape();
52+
auto rois_shape = rois_tensor.shape();
53+
54+
int n_rois = rois_shape.dim_size(0);
55+
int height = input_shape.dim_size(1);
56+
int width = input_shape.dim_size(2);
57+
int channels = input_shape.dim_size(3);
58+
59+
TensorShape output_shape = TensorShape({static_cast<int64>(n_rois),
60+
static_cast<int64>(channels),
61+
static_cast<int64>(pool_height_),
62+
static_cast<int64>(pool_width_)});
63+
64+
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
65+
&output_tensor));
66+
67+
OP_REQUIRES_OK(context, context->allocate_output(1, output_shape,
68+
&argmax_output_tensor));
69+
70+
auto output = output_tensor->template flat<float>();
71+
auto argmax_output = argmax_output_tensor->template flat<int32>();
72+
73+
RoiPoolingKernelLauncher(input.data(), rois.data(),
74+
n_rois, channels,
75+
height, width,
76+
pool_height_, pool_width_,
77+
output.data(), argmax_output.data());
78+
}
79+
};
80+
81+
REGISTER_KERNEL_BUILDER(Name("RoiPooling").Device(DEVICE_GPU), RoiPoolingOp);
82+
83+
///////////// RoiPoolingGrad
84+
85+
86+
REGISTER_OP("RoiPoolingGrad")
87+
.Input("orig_input: float32")
88+
.Input("orig_rois: int32")
89+
.Input("orig_output: float32")
90+
.Input("orig_argmax_output: int32")
91+
.Input("orig_output_grad: float32")
92+
.Attr("pool_height: int")
93+
.Attr("pool_width: int")
94+
.Output("output: float32")
95+
.Doc(R"doc(
96+
region of interest pooling grad
97+
)doc");
98+
99+
#define Dtype float
100+
void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois,
101+
int mb_size,
102+
int n_rois, int channels, int height, int width,
103+
int pooled_height, int pooled_width,
104+
const Dtype* orig_output, const int* orig_argmax_output,
105+
const Dtype* orig_output_grad,
106+
Dtype* output);
107+
108+
// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it
109+
110+
class RoiPoolingGradOp : public OpKernel {
111+
private:
112+
int pool_height_, pool_width_;
113+
public:
114+
explicit RoiPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) {
115+
OP_REQUIRES_OK(context,
116+
context->GetAttr("pool_height", &pool_height_));
117+
118+
OP_REQUIRES_OK(context,
119+
context->GetAttr("pool_width", &pool_width_));
120+
}
121+
122+
123+
void Compute(OpKernelContext* context) override {
124+
// Grab the input tensor
125+
const Tensor& orig_input_tensor = context->input(0);
126+
const Tensor& orig_rois_tensor = context->input(1);
127+
const Tensor& orig_output_tensor = context->input(2);
128+
const Tensor& orig_argmax_output_tensor = context->input(3);
129+
const Tensor& orig_output_grad_tensor = context->input(4);
130+
131+
auto orig_input = orig_input_tensor.flat<float>();
132+
auto orig_rois = orig_rois_tensor.flat<int32>();
133+
auto orig_output = orig_output_tensor.flat<float>();
134+
auto orig_argmax_output = orig_argmax_output_tensor.flat<int32>();
135+
auto orig_output_grad = orig_output_grad_tensor.flat<float>();
136+
137+
// Create an output tensor
138+
Tensor* output_tensor = NULL;
139+
auto orig_input_shape = orig_input_tensor.shape();
140+
auto orig_rois_shape = orig_rois_tensor.shape();
141+
auto grads_shape = orig_input_shape;
142+
143+
int mb_size = orig_input_shape.dim_size(0);
144+
int n_rois = orig_rois_shape.dim_size(0);
145+
int height = orig_input_shape.dim_size(1);
146+
int width = orig_input_shape.dim_size(2);
147+
int channels = orig_input_shape.dim_size(3);
148+
149+
OP_REQUIRES_OK(context, context->allocate_output(0, grads_shape,
150+
&output_tensor));
151+
152+
auto output = output_tensor->template flat<float>();
153+
154+
// Call the cuda kernel launcher
155+
RoiPoolingGradKernelLauncher(orig_input.data(), orig_rois.data(),
156+
mb_size, n_rois, channels, height, width, pool_height_, pool_width_,
157+
orig_output.data(), orig_argmax_output.data(), orig_output_grad.data(), output.data());
158+
}
159+
};
160+
161+
162+
REGISTER_KERNEL_BUILDER(Name("RoiPoolingGrad").Device(DEVICE_GPU), RoiPoolingGradOp);

0 commit comments

Comments
 (0)