@@ -1620,122 +1620,175 @@ namespace dlib
16201620
16211621 namespace ttimpl
16221622 {
1623- void softmax (
1624- const long num_locations,
1625- const long num_channels,
1626- tensor& dest,
1627- const tensor& src
1628- )
1629- {
1630- DLIB_ASSERT (num_channels*num_locations == src.nr ()*src.nc ()*src.k ());
1631- DLIB_CASSERT (have_same_dimensions (dest,src));
1632- const auto d = dest.host ();
1633- const auto s = src.host ();
1623+ void softmax (
1624+ const long num_locations,
1625+ const long num_channels,
1626+ tensor& dest,
1627+ const tensor& src,
1628+ operation_mode mode = operation_mode::CHANNEL_WISE
1629+ )
1630+ {
1631+ DLIB_ASSERT (num_channels * num_locations == src.nr () * src.nc () * src.k ());
1632+ DLIB_CASSERT (have_same_dimensions (dest, src));
1633+ const auto d = dest.host ();
1634+ const auto s = src.host ();
16341635
1635- // Note that we subtract out the max values in each channel before applying
1636- // exp() to avoid numeric overflow in the subsequent computations. Doing this
1637- // doesn't change the resulting output, it just makes it more numerically
1638- // stable.
1639- for (long n = 0 ; n < src.num_samples (); ++n)
1640- {
1641- auto ss = s + num_locations*num_channels*n;
1642- auto dd = d + num_locations*num_channels*n;
1643- for (long i = 0 ; i < num_locations; ++i)
1636+ for (long n = 0 ; n < src.num_samples (); ++n)
16441637 {
1645- float max_val = -std::numeric_limits<float >::infinity ();
1646- for (long k = 0 ; k < num_channels; ++k)
1647- max_val = std::max (max_val, ss[k*num_locations]);
1638+ auto ss = s + num_locations * num_channels * n;
1639+ auto dd = d + num_locations * num_channels * n;
16481640
1649- for (long k = 0 ; k < num_channels; ++k)
1650- dd[k*num_locations] = std::exp (ss[k*num_locations]-max_val);
1641+ if (mode == operation_mode::CHANNEL_WISE)
1642+ {
1643+ for (long i = 0 ; i < num_locations; ++i)
1644+ {
1645+ float max_val = -std::numeric_limits<float >::infinity ();
1646+ for (long k = 0 ; k < num_channels; ++k)
1647+ max_val = std::max (max_val, ss[k * num_locations]);
16511648
1652- ++ss;
1653- ++dd;
1654- }
1655- }
1649+ float sum = 0 .0f ;
1650+ for (long k = 0 ; k < num_channels; ++k)
1651+ {
1652+ dd[k * num_locations] = std::exp (ss[k * num_locations] - max_val);
1653+ sum += dd[k * num_locations];
1654+ }
1655+ for (long k = 0 ; k < num_channels; ++k)
1656+ dd[k * num_locations] /= sum;
16561657
1657- // Now normalize each channel so they sum to 1.
1658- for (long n = 0 ; n < src.num_samples (); ++n)
1659- {
1660- const auto dd = d + num_locations*num_channels*n;
1661- for (long i = 0 ; i < num_locations; ++i)
1662- {
1663- const auto ddd = dd+i;
1658+ ++ss;
1659+ ++dd;
1660+ }
1661+ }
1662+ else if (mode == operation_mode::PLANE_WISE)
1663+ {
1664+ for (long k = 0 ; k < num_channels; ++k)
1665+ {
1666+ auto s_channel = ss + k * num_locations;
1667+ auto d_channel = dd + k * num_locations;
1668+ for (long r = 0 ; r < src.nr (); ++r)
1669+ {
1670+ float max_val = -std::numeric_limits<float >::infinity ();
1671+ for (long c = 0 , idx = r * src.nc (); c < src.nc (); ++c, ++idx)
1672+ max_val = std::max (max_val, s_channel[idx]);
16641673
1665- float temp = 0 ;
1666- for (long k = 0 ; k < num_channels; ++k)
1667- temp += ddd[k*num_locations];
1668- for (long k = 0 ; k < num_channels; ++k)
1669- ddd[k*num_locations] /= temp;
1674+ if (max_val == -std::numeric_limits<float >::infinity ())
1675+ {
1676+ for (long c = 0 , idx = r * src.nc (); c < src.nc (); ++c, ++idx)
1677+ d_channel[idx] = 0 .0f ;
1678+ }
1679+ else
1680+ {
1681+ float sum = 0 .0f ;
1682+ for (long c = 0 , idx = r * src.nc (); c < src.nc (); ++c, ++idx)
1683+ {
1684+ d_channel[idx] = std::exp (s_channel[idx] - max_val);
1685+ sum += d_channel[idx];
1686+ }
1687+ for (long c = 0 , idx = r * src.nc (); c < src.nc (); ++c, ++idx)
1688+ d_channel[idx] /= sum;
1689+ }
1690+ }
1691+ }
1692+ }
16701693 }
16711694 }
1672- }
16731695
1674- void softmax_gradient (
1675- const long num_locations,
1676- const long num_channels,
1677- tensor& grad,
1678- const tensor& dest,
1679- const tensor& gradient_input
1680- )
1681- {
1682- DLIB_ASSERT (num_channels*num_locations == grad.nr ()*grad.nc ()*grad.k ());
1683- DLIB_CASSERT (have_same_dimensions (grad,dest));
1684- DLIB_CASSERT (have_same_dimensions (grad,gradient_input));
1685- const auto d = dest.host ();
1686- const auto g = grad.host ();
1687- const auto in = gradient_input.host ();
1688-
1689-
1690- for (long n = 0 ; n < grad.num_samples (); ++n)
1696+ void softmax_gradient (
1697+ const long num_locations,
1698+ const long num_channels,
1699+ tensor& grad,
1700+ const tensor& dest,
1701+ const tensor& gradient_input,
1702+ operation_mode mode = operation_mode::CHANNEL_WISE
1703+ )
16911704 {
1692- const auto d2 = d + num_locations*num_channels*n;
1693- const auto g2 = g + num_locations*num_channels*n;
1694- const auto in2 = in + num_locations*num_channels*n;
1695- for (long i = 0 ; i < num_locations; ++i)
1705+ DLIB_ASSERT (num_channels * num_locations == grad.nr () * grad.nc () * grad.k ());
1706+ DLIB_CASSERT (have_same_dimensions (grad, dest));
1707+ DLIB_CASSERT (have_same_dimensions (grad, gradient_input));
1708+
1709+ const auto d = dest.host ();
1710+ const auto g = grad.host ();
1711+ const auto in = gradient_input.host ();
1712+ for (long n = 0 ; n < grad.num_samples (); ++n)
16961713 {
1697- const auto d3 = d2+i ;
1698- const auto g3 = g2+i ;
1699- const auto in3 = in2+i ;
1714+ const auto d2 = d + num_locations * num_channels * n ;
1715+ const auto g2 = g + num_locations * num_channels * n ;
1716+ const auto in2 = in + num_locations * num_channels * n ;
17001717
1701- float temp = 0 ;
1702- for (long k = 0 ; k < num_channels; ++k)
1703- temp += -d3[k*num_locations]*in3[k*num_locations];
1704- if (is_same_object (gradient_input, grad))
1718+ if (mode == operation_mode::CHANNEL_WISE)
17051719 {
1706- for (long k = 0 ; k < num_channels; ++k)
1707- g3[k*num_locations] = d3[k*num_locations]*(temp+in3[k*num_locations]);
1720+ for (long i = 0 ; i < num_locations; ++i)
1721+ {
1722+ const auto d3 = d2 + i;
1723+ const auto g3 = g2 + i;
1724+ const auto in3 = in2 + i;
1725+ float sum = 0 .0f ;
1726+ for (long k = 0 ; k < num_channels; ++k)
1727+ sum += -d3[k * num_locations] * in3[k * num_locations];
1728+ if (is_same_object (gradient_input, grad))
1729+ {
1730+ for (long k = 0 ; k < num_channels; ++k)
1731+ g3[k * num_locations] = d3[k * num_locations] * (sum + in3[k * num_locations]);
1732+ }
1733+ else
1734+ {
1735+ for (long k = 0 ; k < num_channels; ++k)
1736+ g3[k * num_locations] += d3[k * num_locations] * (sum + in3[k * num_locations]);
1737+ }
1738+ }
17081739 }
1709- else
1740+ else if (mode == operation_mode::PLANE_WISE)
17101741 {
17111742 for (long k = 0 ; k < num_channels; ++k)
1712- g3[k*num_locations] += d3[k*num_locations]*(temp+in3[k*num_locations]);
1743+ {
1744+ const auto d_channel = d2 + k * num_locations;
1745+ const auto g_channel = g2 + k * num_locations;
1746+ const auto in_channel = in2 + k * num_locations;
1747+ for (long r = 0 ; r < grad.nr (); ++r)
1748+ {
1749+ float sum = 0 .0f ;
1750+ for (long c = 0 , idx = r * grad.nc (); c < grad.nc (); ++c, ++idx)
1751+ sum += -d_channel[idx] * in_channel[idx];
1752+ if (is_same_object (gradient_input, grad))
1753+ {
1754+ for (long c = 0 , idx = r * grad.nc (); c < grad.nc (); ++c, ++idx)
1755+ g_channel[idx] = d_channel[idx] * (sum + in_channel[idx]);
1756+ }
1757+ else
1758+ {
1759+ for (long c = 0 , idx = r * grad.nc (); c < grad.nc (); ++c, ++idx)
1760+ g_channel[idx] += d_channel[idx] * (sum + in_channel[idx]);
1761+ }
1762+ }
1763+ }
17131764 }
17141765 }
17151766 }
17161767 }
1717- }
17181768
17191769 // ----------------------------------------------------------------------------------------
17201770
1721- void softmax (
1771+ void softmax (
17221772 tensor& dest,
1723- const tensor& src
1773+ const tensor& src,
1774+ operation_mode mode
17241775 )
17251776 {
1726- DLIB_CASSERT (have_same_dimensions (dest,src));
1727- ttimpl::softmax (src.nr ()*src.nc (), src.k (), dest, src);
1777+ DLIB_CASSERT (have_same_dimensions (dest, src));
1778+ DLIB_CASSERT (mode == operation_mode::CHANNEL_WISE || mode == operation_mode::PLANE_WISE, " Invalid softmax mode" );
1779+ ttimpl::softmax (src.nr () * src.nc (), src.k (), dest, src, mode);
17281780 }
17291781
1730- void softmax_gradient (
1782+ void softmax_gradient (
17311783 tensor& grad,
17321784 const tensor& dest,
1733- const tensor& gradient_input
1785+ const tensor& gradient_input,
1786+ operation_mode mode
17341787 )
17351788 {
1736- DLIB_CASSERT (have_same_dimensions (grad,dest));
1737- DLIB_CASSERT (have_same_dimensions (grad,gradient_input));
1738- ttimpl::softmax_gradient (grad.nr ()* grad.nc (), grad.k (), grad, dest, gradient_input);
1789+ DLIB_CASSERT (have_same_dimensions (grad, dest));
1790+ DLIB_CASSERT (have_same_dimensions (grad, gradient_input));
1791+ ttimpl::softmax_gradient (grad.nr () * grad.nc (), grad.k (), grad, dest, gradient_input, mode );
17391792 }
17401793
17411794 // ------------------------------------------------------------------------------------
0 commit comments