From e621502ba37e95d9002c1582e5a08b42e97d4742 Mon Sep 17 00:00:00 2001 From: yabetancourt Date: Mon, 8 Dec 2025 16:28:15 +0100 Subject: [PATCH] BAEL-9501 Cosine similarity implementation --- algorithms-modules/algorithms-numeric/pom.xml | 12 +++ .../CosineSimilarityUnitTest.java | 80 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 algorithms-modules/algorithms-numeric/src/test/java/com/baeldung/algorithms/cosinesimilarity/CosineSimilarityUnitTest.java diff --git a/algorithms-modules/algorithms-numeric/pom.xml b/algorithms-modules/algorithms-numeric/pom.xml index 444a3ccd19e7..968ce788a1b6 100644 --- a/algorithms-modules/algorithms-numeric/pom.xml +++ b/algorithms-modules/algorithms-numeric/pom.xml @@ -24,10 +24,22 @@ jmh-generator-annprocess ${jmh.version} + + org.nd4j + nd4j-api + ${nd4j.version} + + + org.nd4j + nd4j-native + ${nd4j.version} + runtime + 1.35 + 1.0.0-M2.1 \ No newline at end of file diff --git a/algorithms-modules/algorithms-numeric/src/test/java/com/baeldung/algorithms/cosinesimilarity/CosineSimilarityUnitTest.java b/algorithms-modules/algorithms-numeric/src/test/java/com/baeldung/algorithms/cosinesimilarity/CosineSimilarityUnitTest.java new file mode 100644 index 000000000000..ec7fe65a50bb --- /dev/null +++ b/algorithms-modules/algorithms-numeric/src/test/java/com/baeldung/algorithms/cosinesimilarity/CosineSimilarityUnitTest.java @@ -0,0 +1,80 @@ +package com.baeldung.algorithms.cosinesimilarity; + +import org.junit.jupiter.api.Test; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.reduce3.CosineSimilarity; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.Arrays; +import java.util.stream.IntStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class CosineSimilarityUnitTest { + + static final double[] VECTOR_A = {3, 4}; + static final double[] VECTOR_B = {5, 12}; + static final double EXPECTED_SIMILARITY = 0.9692307692307692; + + static double calculateCosineSimilarity(double[] vectorA, double[] vectorB) { + if (vectorA == null || vectorB == null || vectorA.length != vectorB.length || vectorA.length == 0) { + throw new IllegalArgumentException("Vectors must be non-null, non-empty, and of the same length."); + } + double dotProduct = 0.0; + double magnitudeA = 0.0; + double magnitudeB = 0.0; + for (int i = 0; i < vectorA.length; i++) { + dotProduct += vectorA[i] * vectorB[i]; + magnitudeA += vectorA[i] * vectorA[i]; + magnitudeB += vectorB[i] * vectorB[i]; + } + double finalMagnitudeA = Math.sqrt(magnitudeA); + double finalMagnitudeB = Math.sqrt(magnitudeB); + if (finalMagnitudeA == 0.0 || finalMagnitudeB == 0.0) { + return 0.0; + } + return dotProduct / (finalMagnitudeA * finalMagnitudeB); + } + + public static double calculateCosineSimilarityWithStreams(double[] vectorA, double[] vectorB) { + if (vectorA == null || vectorB == null || vectorA.length != vectorB.length || vectorA.length == 0) { + throw new IllegalArgumentException("Vectors must be non-null, non-empty, and of the same length."); + } + + double dotProduct = IntStream.range(0, vectorA.length).mapToDouble(i -> vectorA[i] * vectorB[i]).sum(); + double magnitudeA = Arrays.stream(vectorA).map(v -> v * v).sum(); + double magnitudeB = IntStream.range(0, vectorA.length).mapToDouble(i -> vectorB[i] * vectorB[i]).sum(); + double finalMagnitudeA = Math.sqrt(magnitudeA); + double finalMagnitudeB = Math.sqrt(magnitudeB); + if (finalMagnitudeA == 0.0 || finalMagnitudeB == 0.0) { + return 0.0; + } + + return dotProduct / (finalMagnitudeA * finalMagnitudeB); + } + + @Test + void givenTwoHighlySimilarVectors_whenCalculatedNatively_thenReturnsHighSimilarityScore() { + double actualSimilarity = calculateCosineSimilarity(VECTOR_A, VECTOR_B); + assertEquals(EXPECTED_SIMILARITY, actualSimilarity, 1e-15); + } + + @Test + void givenTwoHighlySimilarVectors_whenCalculatedNativelyWithStreams_thenReturnsHighSimilarityScore() { + double actualSimilarity = calculateCosineSimilarityWithStreams(VECTOR_A, VECTOR_B); + assertEquals(EXPECTED_SIMILARITY, actualSimilarity, 1e-15); + } + + @Test + void givenTwoHighlySimilarVectors_whenCalculatedNativelyWithCommonsMath_thenReturnsHighSimilarityScore() { + + INDArray vec1 = Nd4j.create(VECTOR_A); + INDArray vec2 = Nd4j.create(VECTOR_B); + + CosineSimilarity cosSim = new CosineSimilarity(vec1, vec2); + double actualSimilarity = Nd4j.getExecutioner().exec(cosSim).getDouble(0); + + assertEquals(EXPECTED_SIMILARITY, actualSimilarity, 1e-15); + } + +} \ No newline at end of file