feat(cross entropy): introduce (#14)

AugustinMauroy · web-flow · commit fc30fb477000 · 2025-05-20T15:10:24.000+02:00
diff --git a/examples/cifar-10.ts b/examples/cifar-10.ts
@@ -1,8 +1,9 @@
 import { readFileSync } from "node:fs";
+
 import { Model } from "../src/core/mod.ts";
 import { Dense, ReLU, Softmax } from "../src/layers/mod.ts";
 import { Adam } from "../src/optimizes/mod.ts";
-import { MeanSquaredError } from "../src/losses/mod.ts";
+import { CrossEntropyLoss } from "../src/losses/mod.ts";
 
 const CIFAR_IMAGE_HEIGHT = 32;
 const CIFAR_IMAGE_WIDTH = 32;
@@ -200,7 +201,7 @@ model.addLayer(new Softmax());
 // 3. Compile the Model
 model.compile(
 	new Adam(0.001), // Adam optimizer
-	new MeanSquaredError(), // Using MSE as it's available, though CrossEntropy is typical for classification
+	new CrossEntropyLoss(), // Cross-entropy loss for multi-class classification
 	["accuracy"], // Metric
 );
 
diff --git a/examples/cifar-100.ts b/examples/cifar-100.ts
@@ -1,8 +1,9 @@
 import { readFileSync } from "node:fs";
+
 import { Model } from "../src/core/mod.ts";
 import { Dense, ReLU, Softmax } from "../src/layers/mod.ts";
 import { Adam } from "../src/optimizes/mod.ts";
-import { MeanSquaredError } from "../src/losses/mod.ts";
+import { CrossEntropyLoss } from "../src/losses/mod.ts";
 
 const CIFAR100_IMAGE_HEIGHT = 32;
 const CIFAR100_IMAGE_WIDTH = 32;
@@ -199,11 +200,7 @@ model.addLayer(new Dense(128, CIFAR100_NUM_FINE_CLASSES)); // Hidden layer to ou
 model.addLayer(new Softmax());
 
 // 3. Compile the Model
-model.compile(
-	new Adam(0.001),
-	new MeanSquaredError(), // CrossEntropyLoss would be more appropriate for multi-class
-	["accuracy"],
-);
+model.compile(new Adam(0.001), new CrossEntropyLoss(), ["accuracy"]);
 
 // 4. Train the Model
 console.log("Starting model training...");
diff --git a/examples/mnist.ts b/examples/mnist.ts
@@ -2,7 +2,7 @@ import { readFileSync } from "node:fs";
 import { Model } from "../src/core/mod.ts";
 import { Dense, ReLU, Softmax } from "../src/layers/mod.ts";
 import { Adam } from "../src/optimizes/mod.ts";
-import { MeanSquaredError } from "../src/losses/mod.ts";
+import { CrossEntropyLoss } from "../src/losses/mod.ts";
 
 const MNIST_IMAGE_MAGIC_NUMBER = 2051;
 const MNIST_LABEL_MAGIC_NUMBER = 2049;
@@ -174,7 +174,7 @@ model.addLayer(new Softmax()); // Softmax for multi-class probability output
 // 3. Compile the Model
 model.compile(
 	new Adam(0.001), // Adam optimizer
-	new MeanSquaredError(), // Using MSE as it's available. CrossEntropyLoss is often preferred for classification.
+	new CrossEntropyLoss(), // Cross-entropy loss for multi-class classification
 	["accuracy"], // Metric
 );
 
diff --git a/src/losses/binary_cross_entropy.test.ts b/src/losses/binary_cross_entropy.test.ts
@@ -0,0 +1,35 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { BinaryCrossEntropyLoss } from "./binary_cross_entropy.ts";
+
+describe("BinaryCrossEntropyLoss", () => {
+	it("should calculate the binary cross-entropy loss correctly", () => {
+		const predictions = [0.9, 0.2, 0.8];
+		const targets = [1, 0, 1];
+		const binaryCrossEntropy = new BinaryCrossEntropyLoss();
+		const loss = binaryCrossEntropy.calculate(predictions, targets);
+		assert.strictEqual(loss, 0.18388253942874858);
+	});
+
+	it("should throw an error for different length arrays", () => {
+		const predictions = [0.9, 0.2];
+		const targets = [1, 0, 1];
+		const binaryCrossEntropy = new BinaryCrossEntropyLoss();
+		assert.throws(
+			() => {
+				binaryCrossEntropy.calculate(predictions, targets);
+			},
+			{
+				message: "Predictions and targets must have the same length.",
+			},
+		);
+	});
+
+	it("should return 0 for empty arrays", () => {
+		const predictions: number[] = [];
+		const targets: number[] = [];
+		const binaryCrossEntropy = new BinaryCrossEntropyLoss();
+		const loss = binaryCrossEntropy.calculate(predictions, targets);
+		assert.strictEqual(loss, 0);
+	});
+});
diff --git a/src/losses/binary_cross_entropy.ts b/src/losses/binary_cross_entropy.ts
@@ -0,0 +1,45 @@
+/**
+ * BinaryCrossEntropyLoss calculates the binary cross-entropy loss between predictions and target values.
+ * This loss is commonly used for binary classification tasks.
+ *
+ * Formula:
+ * `L = -Σ(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))`
+ *
+ * @example
+ * ```typescript
+ * const binaryCrossEntropy = new BinaryCrossEntropyLoss();
+ * const predictions = [0.9, 0.2, 0.8];
+ * const targets = [1, 0, 1];
+ * const loss = binaryCrossEntropy.calculate(predictions, targets);
+ * console.log("Binary CrossEntropy Loss:", loss); // Output: ~0.1839
+ * ```
+ */
+export class BinaryCrossEntropyLoss {
+	/**
+	 * Calculates the binary cross-entropy loss.
+	 * @param predictions An array of predicted probabilities (values between 0 and 1).
+	 * @param targets An array of binary target values (0 or 1).
+	 * @returns The calculated binary cross-entropy loss.
+	 * @throws Error if the predictions and targets arrays do not have the same length.
+	 */
+	calculate(predictions: number[], targets: number[]): number {
+		if (predictions.length !== targets.length) {
+			throw new Error("Predictions and targets must have the same length.");
+		}
+
+		if (predictions.length === 0 || targets.length === 0) {
+			return 0; // Return 0 for empty arrays
+		}
+
+		const epsilon = 1e-12; // To avoid log(0)
+		let loss = 0;
+
+		for (let i = 0; i < predictions.length; i++) {
+			const yTrue = targets[i];
+			const yPred = Math.min(Math.max(predictions[i], epsilon), 1 - epsilon); // Clamp predictions to avoid log(0)
+			loss -= yTrue * Math.log(yPred) + (1 - yTrue) * Math.log(1 - yPred);
+		}
+
+		return loss / predictions.length; // Average loss
+	}
+}
diff --git a/src/losses/cross_entropy.test.ts b/src/losses/cross_entropy.test.ts
@@ -0,0 +1,27 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { CrossEntropyLoss } from "./cross_entropy.ts";
+
+describe("CrossEntropyLoss", () => {
+	it("should calculate the correct loss for given predictions and targets", () => {
+		const crossEntropy = new CrossEntropyLoss();
+		const predictions = [0.7, 0.2, 0.1];
+		const targets = [1, 0, 0];
+		const loss = crossEntropy.calculate(predictions, targets);
+		assert.strictEqual(loss, 0.1188916479791013);
+	});
+
+	it("should throw an error if predictions and targets have different lengths", () => {
+		const crossEntropy = new CrossEntropyLoss();
+		const predictions = [0.7, 0.2];
+		const targets = [1, 0, 0];
+		assert.throws(
+			() => {
+				crossEntropy.calculate(predictions, targets);
+			},
+			{
+				message: "Predictions and targets must have the same length.",
+			},
+		);
+	});
+});
diff --git a/src/losses/cross_entropy.ts b/src/losses/cross_entropy.ts
@@ -0,0 +1,42 @@
+/**
+ * CrossEntropyLoss calculates the cross-entropy loss between predictions and target values.
+ * This loss is commonly used for classification tasks.
+ *
+ * Formula:
+ * `L = -Σ(y_true * log(y_pred))`
+ *
+ * @example
+ * ```typescript
+ * const crossEntropy = new CrossEntropyLoss();
+ * const predictions = [0.7, 0.2, 0.1];
+ * const targets = [1, 0, 0];
+ * const loss = crossEntropy.calculate(predictions, targets);
+ * console.log("CrossEntropy Loss:", loss); // Output: ~0.3567
+ * ```
+ */
+export class CrossEntropyLoss {
+	/**
+	 * Calculates the cross-entropy loss.
+	 * @param predictions An array of predicted probabilities (must sum to 1).
+	 * @param targets An array of one-hot encoded target values.
+	 * @returns The calculated cross-entropy loss, summed across all samples.
+	 * @throws Error if the predictions and targets arrays do not have the same length.
+	 */
+	calculate(predictions: number[], targets: number[]): number {
+		if (predictions.length !== targets.length) {
+			throw new Error("Predictions and targets must have the same length.");
+		}
+
+		let loss = 0;
+		for (let i = 0; i < predictions.length; i++) {
+			if (targets[i] === 1) {
+				// Avoid log(0) by adding a small epsilon
+				const epsilon = 1e-12;
+				loss -= Math.log(predictions[i] + epsilon);
+			}
+		}
+
+		// Normalize the loss by the number of samples
+		return loss / predictions.length;
+	}
+}
diff --git a/src/losses/mod.ts b/src/losses/mod.ts
@@ -1 +1,3 @@
 export * from "./mse.ts";
+export * from "./cross_entropy.ts";
+export * from "./binary_cross_entropy.ts";

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`export * from "./mse.ts";`
	`2`	`+export * from "./cross_entropy.ts";`
	`3`	`+export * from "./binary_cross_entropy.ts";`