Adding support for tf.sigmoid_cross_entropy_with_logits and tf.where (#128)

cesarsouza · migueldeicaza · commit d3a3c3b148d1 · 2017-09-22T04:35:10.000+02:00
* GH-127: Add support for tf.sigmoid_cross_entropy_with_logits and tf.where * Adding a note stating that part of the original TF implementation has been left behind since it wasn't needed for TFSharp.
diff --git a/TensorFlowSharp/OperationsExtras.cs b/TensorFlowSharp/OperationsExtras.cs
@@ -488,6 +488,77 @@ public TFOutput ClipByAverageNorm (TFOutput x, TFOutput clip_norm, string operNa
 			}
 		}
 
+		/// <summary>
+		///   Computes sigmoid cross entropy given `logits`.
+		/// </summary>
+		/// 
+		/// <remarks>
+		///    Measures the probability error in discrete classification tasks in which each
+		///    class is independent and not mutually exclusive.For instance, one could
+		///    perform multilabel classification where a picture can contain both an elephant
+		///    and a dog at the same time.
+		/// </remarks>
+		/// 
+		public TFOutput SigmoidCrossEntropyWithLogits (TFOutput labels, TFOutput logits, string operName = null)
+		{
+			// https://github.com/tensorflow/tensorflow/blob/r1.3/tensorflow/python/ops/nn_impl.py#L100
+
+			var scopeName = this.MakeName ("logistic_loss", operName);
+			using (var newScope = this.WithScope (scopeName)) {
+				// Note: The following lines have not been ported from the original TF implementation since 
+				// TensorFlowSharp API should guarantee that logits and labels are of type TFOutput by design:
+				//
+				//   logits = ops.convert_to_tensor(logits, name: "logits");
+				//   labels = ops.convert_to_tensor(labels, name: "labels");
+				//   try
+				//   {
+				//       labels.get_shape().merge_with(logits.get_shape())
+				//   }
+				//   catch
+				//   {
+				//       throw new ArgumentException("logits and labels must have the same shape ({logits.get_shape()} vs {labels.get_shape()})");
+				//   }
+
+				// The logistic loss formula from above is
+				// x - x * z + log(1 + exp(-x))
+				// For x < 0, a more numerically stable formula is
+				//   -x * z + log(1 + exp(x))
+				// Note that these two expressions can be combined into the following:
+				// max(x, 0) - x * z + log(1 + exp(-abs(x)))
+				// To allow computing gradients at zero, we define custom versions of max and
+				// abs functions.
+				TFOutput zeros = this.ZerosLike (logits);
+				TFOutput cond = this.GreaterEqual (logits, zeros);
+				TFOutput relu_logits = this.Where (cond, logits, zeros);
+				TFOutput neg_abs_logits = this.Where (cond, this.Neg (logits), logits);
+				return this.Add (
+					this.Sub (relu_logits, this.Mul (logits, labels)),
+					this.Log1p (this.Exp (neg_abs_logits)),
+					operName: operName);
+			}
+		}
+
+		/// <summary>
+		///   Return elements from x or y depending on condition.
+		/// </summary>
+		/// 
+		/// <param name="condition">LabeledTensor of type `bool`.</param>
+		/// <param name="x">LabeledTensor for values where condition is true.</param>
+		/// <param name="y">LabeledTensor for values where condition is false.</param>
+		/// <param name="name">Optional op name.</param>
+		/// 
+		/// <returns>The labeled tensor with values according to condition.</returns>
+		/// 
+		public TFOutput Where (TFOutput condition, TFOutput? x, TFOutput? y, string name = null)
+		{
+			// https://github.com/tensorflow/tensorflow/blob/d4ce3b4681b3a550c095b2cd18a79494d1cc4039/tensorflow/python/ops/array_ops.py#L2342
+			if (x == null && y == null)
+				return this.Where (input: condition, operName: name);
+			else if (x != null && y != null)
+				return this.Select (condition: condition, t: x.Value, e: y.Value, operName: name);
+			throw new ArgumentException ("x and y must both be non-None or both be None.");
+		}
+
 		/// <summary>
 		/// Stacks a list of rank-`R` tensors into one rank-`(R+1)` tensor.
 		/// </summary>
diff --git a/tests/TensorFlowSharp.Tests.CSharp/MathTests.cs b/tests/TensorFlowSharp.Tests.CSharp/MathTests.cs
@@ -71,5 +71,35 @@ public void Should_ReduceMean (double [,] input, int? axis, object expected)
 				}
 			}
 		}
+
+		private static IEnumerable<object []> sigmoidCrossEntropyData ()
+		{
+			yield return new object [] { new [] { 1.0, 0.0, 1.0, 1.0 }, new [] { 1.0, 0.0, 1.0, 1.0 }, new [] { 0.31326168751822281, 0.69314718055994529, 0.31326168751822281, 0.31326168751822281 } };
+			yield return new object [] { new [] { 1.0, 0.0, 1.0, 1.0 }, new [] { -0.2, 4.2, 0.0, 0.0 }, new [] { 0.79813886938159184, 4.2148842546719187, 0.69314718055994529, 0.69314718055994529 } };
+			yield return new object [] { new [] { 1.0, 0.0 }, new [] { -2.1, -2, -4, 3.0 }, null };
+		}
+
+		[Theory]
+		[MemberData (nameof (sigmoidCrossEntropyData))]
+		public void Should_SigmoidCrossEntropyWithLogits (double [] labels, double [] logits, double [] expected) 
+		{
+			using (var graph = new TFGraph ())
+			using (var session = new TFSession (graph)) {
+				var tlabels = graph.Placeholder (TFDataType.Double, new TFShape (2, 2));
+				var tlogits = graph.Placeholder (TFDataType.Double, new TFShape (2, 2));
+
+				TFOutput y = graph.SigmoidCrossEntropyWithLogits (tlabels, tlogits);
+
+				if (expected != null) {
+					TFTensor [] result = session.Run (new [] { tlabels, tlogits }, new TFTensor [] { labels, logits }, new [] { y });
+
+					double [] actual = (double [])result [0].GetValue ();
+					TestUtils.MatrixEqual (expected, actual, precision: 8);
+				} else {
+					Assert.Throws<TFException> (() => session.Run (new [] { tlabels, tlogits }, new TFTensor [] { labels, logits }, new [] { y }));
+				}
+			}
+		}
+
 	}
 }