Skip to content

Commit e8b6c05

Browse files
authored
Slice layer (#3055)
* Added tensor slice layer * Added interface description for slice layer * Add doc for copy_tensor overload
1 parent ef3d636 commit e8b6c05

File tree

9 files changed

+707
-0
lines changed

9 files changed

+707
-0
lines changed

dlib/cuda/cpu_dlib.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3105,6 +3105,76 @@ namespace dlib
31053105
}
31063106
}
31073107

3108+
// ------------------------------------------------------------------------------------
3109+
3110+
void copy_tensor(
3111+
bool add_to,
3112+
tensor& dest,
3113+
size_t dk, size_t dnr, size_t dnc,
3114+
const tensor& src,
3115+
size_t sk, size_t snr, size_t snc,
3116+
size_t k, size_t nr, size_t nc
3117+
)
3118+
{
3119+
size_t dest_stride_sample = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
3120+
size_t dest_stride_k = static_cast<size_t>(dest.nc() * dest.nr());
3121+
size_t dest_stride_nr = static_cast<size_t>(dest.nc());
3122+
3123+
size_t src_stride_sample = static_cast<size_t>(src.nc() * src.nr() * src.k());
3124+
size_t src_stride_k = static_cast<size_t>(src.nc() * src.nr());
3125+
size_t src_stride_nr = static_cast<size_t>(src.nc());
3126+
3127+
DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
3128+
DLIB_CASSERT(dest.k() - dk >= k &&
3129+
dest.nr() - dnr >= nr &&
3130+
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
3131+
DLIB_CASSERT(src.k() - sk >= k &&
3132+
src.nr() - snr >= nr &&
3133+
src.nc() - snc >= nc, "Not enough space in src tensor");
3134+
3135+
float* dest_p = dest.host() + dk * dest_stride_k \
3136+
+ dnr * dest_stride_nr \
3137+
+ dnc;
3138+
3139+
const float* src_p = src.host() + sk * src_stride_k \
3140+
+ snr * src_stride_nr \
3141+
+ snc;
3142+
3143+
for (long i = 0; i < src.num_samples(); ++i)
3144+
{
3145+
float* dest_channel_p = dest_p;
3146+
const float* src_channel_p = src_p;
3147+
3148+
for (long j = 0; j < k; ++j)
3149+
{
3150+
float* dest_row_p = dest_channel_p;
3151+
const float* src_row_p = src_channel_p;
3152+
3153+
for (long r = 0; r < nr; ++r)
3154+
{
3155+
if (add_to)
3156+
{
3157+
for (size_t c = 0; c < nc; ++c)
3158+
dest_row_p[c] += src_row_p[c];
3159+
}
3160+
else
3161+
{
3162+
::memcpy(dest_row_p, src_row_p, nc * sizeof(float));
3163+
}
3164+
3165+
dest_row_p += dest_stride_nr;
3166+
src_row_p += src_stride_nr;
3167+
}
3168+
3169+
dest_channel_p += dest_stride_k;
3170+
src_channel_p += src_stride_k;
3171+
}
3172+
3173+
dest_p += dest_stride_sample;
3174+
src_p += src_stride_sample;
3175+
}
3176+
}
3177+
31083178
// ------------------------------------------------------------------------------------
31093179

31103180
void transpose(

dlib/cuda/cpu_dlib.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,17 @@ namespace dlib
692692
size_t count_k
693693
);
694694

695+
// -----------------------------------------------------------------------------------
696+
697+
void copy_tensor(
698+
bool add_to,
699+
tensor& dest,
700+
size_t dk, size_t dnr, size_t dnc,
701+
const tensor& src,
702+
size_t sk, size_t snr, size_t snc,
703+
size_t k, size_t nr, size_t nc
704+
);
705+
695706
// -----------------------------------------------------------------------------------
696707

697708
void transpose(

dlib/cuda/cuda_dlib.cu

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2623,6 +2623,77 @@ namespace dlib
26232623
}
26242624
}
26252625

2626+
__global__ void _cuda_copy_strided_tensor_add_to (float* dest, const float* src,
2627+
size_t ns, size_t nk, size_t nr, size_t nc,
2628+
size_t dk, size_t dr, size_t dc,
2629+
size_t sk, size_t sr, size_t sc)
2630+
{
2631+
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
2632+
{
2633+
size_t n,k,r,c;
2634+
unpack_idx(i, nk,nr,nc, n,k,r,c);
2635+
dest[pack_idx(dk,dr,dc, n,k,r,c)] += src[pack_idx(sk,sr,sc, n,k,r,c)];
2636+
}
2637+
}
2638+
2639+
__global__ void _cuda_copy_strided_tensor (float* dest, const float* src,
2640+
size_t ns, size_t nk, size_t nr, size_t nc,
2641+
size_t dk, size_t dr, size_t dc,
2642+
size_t sk, size_t sr, size_t sc)
2643+
{
2644+
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
2645+
{
2646+
size_t n,k,r,c;
2647+
unpack_idx(i, nk,nr,nc, n,k,r,c);
2648+
dest[pack_idx(dk,dr,dc, n,k,r,c)] = src[pack_idx(sk,sr,sc, n,k,r,c)];
2649+
}
2650+
}
2651+
2652+
void copy_tensor(
2653+
bool add_to,
2654+
tensor& dest,
2655+
size_t dk, size_t dnr, size_t dnc,
2656+
const tensor& src,
2657+
size_t sk, size_t snr, size_t snc,
2658+
size_t k, size_t nr, size_t nc
2659+
)
2660+
{
2661+
2662+
DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
2663+
DLIB_CASSERT(dest.k() - dk >= k &&
2664+
dest.nr() - dnr >= nr &&
2665+
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
2666+
DLIB_CASSERT(src.k() - sk >= k &&
2667+
src.nr() - snr >= nr &&
2668+
src.nc() - snc >= nc, "Not enough space in src tensor");
2669+
2670+
float* dest_p = dest.device() + dk * static_cast<size_t>(dest.nc() * dest.nr()) \
2671+
+ dnr * static_cast<size_t>(dest.nc()) \
2672+
+ dnc;
2673+
2674+
const float* src_p = src.device() + sk * static_cast<size_t>(src.nc() * src.nr()) \
2675+
+ snr * static_cast<size_t>(src.nc()) \
2676+
+ snc;
2677+
2678+
if (add_to)
2679+
{
2680+
launch_kernel(_cuda_copy_strided_tensor_add_to, max_jobs(dest.size()),
2681+
dest_p, src_p, dest.num_samples(),
2682+
k, nr, nc,
2683+
dest.k(), dest.nr(), dest.nc(),
2684+
src.k(), src.nr(), src.nc());
2685+
}
2686+
else
2687+
{
2688+
launch_kernel(_cuda_copy_strided_tensor, max_jobs(dest.size()),
2689+
dest_p, src_p, dest.num_samples(),
2690+
k, nr, nc,
2691+
dest.k(), dest.nr(), dest.nc(),
2692+
src.k(), src.nr(), src.nc());
2693+
}
2694+
}
2695+
2696+
26262697
// ----------------------------------------------------------------------------------------
26272698

26282699
__global__ void _cuda_transpose(size_t dsize, size_t dk, size_t dnr, size_t dnc, float* d,

dlib/cuda/cuda_dlib.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,17 @@ namespace dlib
589589
size_t count_k
590590
);
591591

592+
// ----------------------------------------------------------------------------------------
593+
594+
void copy_tensor(
595+
bool add_to,
596+
tensor& dest,
597+
size_t dk, size_t dnr, size_t dnc,
598+
const tensor& src,
599+
size_t sk, size_t snr, size_t snc,
600+
size_t k, size_t nr, size_t nc
601+
);
602+
592603
// ----------------------------------------------------------------------------------------
593604

594605
void transpose(

dlib/cuda/tensor_tools.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,24 @@ namespace dlib { namespace tt
13331333
#endif
13341334
}
13351335

1336+
// ----------------------------------------------------------------------------------------
1337+
1338+
void copy_tensor(
1339+
bool add_to,
1340+
tensor& dest,
1341+
size_t dk, size_t dnr, size_t dnc,
1342+
const tensor& src,
1343+
size_t sk, size_t snr, size_t snc,
1344+
size_t k, size_t nr, size_t nc
1345+
)
1346+
{
1347+
#ifdef DLIB_USE_CUDA
1348+
cuda::copy_tensor(add_to, dest, dk, dnr, dnc , src, sk, snr, snc, k, nr, nc);
1349+
#else
1350+
cpu::copy_tensor(add_to, dest, dk, dnr, dnc, src, sk, snr, snc, k, nr, nc);
1351+
#endif
1352+
}
1353+
13361354
// ----------------------------------------------------------------------------------------
13371355

13381356
void inv::

dlib/cuda/tensor_tools.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2334,6 +2334,38 @@ namespace dlib { namespace tt
23342334
i.e., copies content of each sample from src in to corresponding place of sample at dest.
23352335
!*/
23362336

2337+
// ----------------------------------------------------------------------------------------
2338+
2339+
void copy_tensor(
2340+
bool add_to,
2341+
tensor& dest,
2342+
size_t dk, size_t dnr, size_t dnc,
2343+
const tensor& src,
2344+
size_t sk, size_t snr, size_t snc,
2345+
size_t k, size_t nr, size_t nc
2346+
);
2347+
/*!
2348+
requires
2349+
- dest.num_samples() == src.num_samples()
2350+
- dest.k() - dk >= k
2351+
- dest.nr() - dnr >= nr
2352+
- dest.nc() - dnc >= nc
2353+
- src.k() - sk >= k
2354+
- src.nr() - snr >= nr
2355+
- src.nc() - snc >= nc
2356+
- is_same_object(dest,src) == false
2357+
- The memory areas of src and dest do not overlap.
2358+
ensures
2359+
- if (add_to) then
2360+
- performs: dest[i, j + dk, r + dnr, c + dnc] += src[i, j + sk, r + snr, c + snc], where j in [0..k],
2361+
r in [0..nr] and c in [0..nc]
2362+
i.e., adds content of each sample from src in to corresponding place of sample at dest.
2363+
- else
2364+
- performs: dest[i, j + dk, r + dnr, c + dnc] = src[i, j + sk, r + snr, c +snc], where j in [0..k],
2365+
r in [0..nr] and c in [0..nc]
2366+
i.e., copies content of each sample from src in to corresponding place of sample at dest.
2367+
!*/
2368+
23372369
// ----------------------------------------------------------------------------------------
23382370

23392371
void transpose(

dlib/dnn/layers.h

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4631,6 +4631,131 @@ namespace dlib
46314631
>
46324632
using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>;
46334633

4634+
// ----------------------------------------------------------------------------------------
4635+
4636+
template <
4637+
long _offset_k,
4638+
long _offset_nr,
4639+
long _offset_nc,
4640+
long _k,
4641+
long _nr,
4642+
long _nc
4643+
>
4644+
class slice_
4645+
{
4646+
static_assert(_offset_k >= 0, "The channel offset must be >= 0.");
4647+
static_assert(_offset_nr >= 0, "The row offset must be >= 0.");
4648+
static_assert(_offset_nc >= 0, "The column offset must be >= 0.");
4649+
static_assert(_k > 0, "The number of channels must be > 0.");
4650+
static_assert(_nr > 0, "The number of rows must be > 0.");
4651+
static_assert(_nc > 0, "The number of columns must be > 0.");
4652+
public:
4653+
slice_(
4654+
)
4655+
{
4656+
}
4657+
4658+
template <typename SUBNET>
4659+
void setup (const SUBNET& sub)
4660+
{
4661+
DLIB_CASSERT((long)sub.get_output().size() >= sub.get_output().num_samples()*(_offset_k+_offset_nr+_offset_nc+_k*_nr*_nc),
4662+
"The tensor we are trying to slice from the input tensor is too big to fit into the input tensor.");
4663+
}
4664+
4665+
template <typename SUBNET>
4666+
void forward(const SUBNET& sub, resizable_tensor& output)
4667+
{
4668+
output.set_size(sub.get_output().num_samples(), _k, _nr, _nc);
4669+
tt::copy_tensor(false, output, 0, 0, 0, sub.get_output(), _offset_k, _offset_nr, _offset_nc, _k, _nr, _nc);
4670+
}
4671+
4672+
template <typename SUBNET>
4673+
void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
4674+
{
4675+
tt::copy_tensor(true, sub.get_gradient_input(), _offset_k, _offset_nr, _offset_nc, gradient_input, 0, 0, 0, _k, _nr, _nc);
4676+
}
4677+
4678+
const tensor& get_layer_params() const { return params; }
4679+
tensor& get_layer_params() { return params; }
4680+
4681+
friend void serialize(const slice_& /*item*/, std::ostream& out)
4682+
{
4683+
serialize("slice_", out);
4684+
serialize(_offset_k, out);
4685+
serialize(_offset_nr, out);
4686+
serialize(_offset_nc, out);
4687+
serialize(_k, out);
4688+
serialize(_nr, out);
4689+
serialize(_nc, out);
4690+
}
4691+
4692+
friend void deserialize(slice_& /*item*/, std::istream& in)
4693+
{
4694+
std::string version;
4695+
deserialize(version, in);
4696+
if (version != "slice_")
4697+
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::slice_.");
4698+
4699+
long offset_k;
4700+
long offset_nr;
4701+
long offset_nc;
4702+
long k;
4703+
long nr;
4704+
long nc;
4705+
deserialize(offset_k, in);
4706+
deserialize(offset_nr, in);
4707+
deserialize(offset_nc, in);
4708+
deserialize(k, in);
4709+
deserialize(nr, in);
4710+
deserialize(nc, in);
4711+
4712+
if (offset_k != _offset_k) throw serialization_error("Wrong offset_k found while deserializing dlib::slice_");
4713+
if (offset_nr != _offset_nr) throw serialization_error("Wrong offset_nr found while deserializing dlib::slice_");
4714+
if (offset_nc != _offset_nc) throw serialization_error("Wrong offset_nc found while deserializing dlib::slice_");
4715+
if (k != _k) throw serialization_error("Wrong k found while deserializing dlib::slice_");
4716+
if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::slice_");
4717+
if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::slice_");
4718+
}
4719+
4720+
friend std::ostream& operator<<(std::ostream& out, const slice_& /*item*/)
4721+
{
4722+
out << "slice\t ("
4723+
<< "offset_k="<<_offset_k
4724+
<< "offset_nr="<<_offset_nr
4725+
<< "offset_nc="<<_offset_nc
4726+
<< ", k="<<_k
4727+
<< ", nr="<<_nr
4728+
<< ", nc="<<_nc
4729+
<< ")";
4730+
return out;
4731+
}
4732+
4733+
friend void to_xml(const slice_& /*item*/, std::ostream& out)
4734+
{
4735+
out << "<slice";
4736+
out << " offset_k='"<<_offset_k<<"'";
4737+
out << " offset_nr='"<<_offset_nr<<"'";
4738+
out << " offset_nr='"<<_offset_nc<<"'";
4739+
out << " k='"<<_k<<"'";
4740+
out << " nr='"<<_nr<<"'";
4741+
out << " nc='"<<_nc<<"'";
4742+
out << "/>\n";
4743+
}
4744+
private:
4745+
resizable_tensor params; // unused
4746+
};
4747+
4748+
template <
4749+
long offset_k,
4750+
long offset_nr,
4751+
long offset_nc,
4752+
long k,
4753+
long nr,
4754+
long nc,
4755+
typename SUBNET
4756+
>
4757+
using slice = add_layer<slice_<offset_k,offset_nr,offset_nc,k,nr,nc>, SUBNET>;
4758+
46344759
// ----------------------------------------------------------------------------------------
46354760

46364761
template <long long row_stride = 2, long long col_stride = 2>

0 commit comments

Comments
 (0)