diff --git a/Algorithms.Tests/MachineLearning/KNearestNeighborsTests.cs b/Algorithms.Tests/MachineLearning/KNearestNeighborsTests.cs new file mode 100644 index 00000000..047e8860 --- /dev/null +++ b/Algorithms.Tests/MachineLearning/KNearestNeighborsTests.cs @@ -0,0 +1,86 @@ +using NUnit.Framework; +using Algorithms.MachineLearning; +using System; + +namespace Algorithms.Tests.MachineLearning; + +[TestFixture] +public class KNearestNeighborsTests +{ + [Test] + public void Constructor_InvalidK_ThrowsException() + { + Assert.Throws(() => new KNearestNeighbors(0)); + } + + [Test] + public void AddSample_NullFeatures_ThrowsException() + { + var knn = new KNearestNeighbors(3); + double[]? features = null; + Assert.Throws(() => knn.AddSample(features!, "A")); + } + + [Test] + public void Predict_NoTrainingData_ThrowsException() + { + var knn = new KNearestNeighbors(1); + Assert.Throws(() => knn.Predict(new[] { 1.0 })); + } + + [Test] + public void Predict_NullFeatures_ThrowsException() + { + var knn = new KNearestNeighbors(1); + knn.AddSample(new[] { 1.0 }, "A"); + double[]? features = null; + Assert.Throws(() => knn.Predict(features!)); + } + + [Test] + public void EuclideanDistance_DifferentLengths_ThrowsException() + { + Assert.Throws(() => KNearestNeighbors.EuclideanDistance(new[] { 1.0 }, new[] { 1.0, 2.0 })); + } + + [Test] + public void EuclideanDistance_CorrectResult() + { + double[] a = { 1.0, 2.0 }; + double[] b = { 4.0, 6.0 }; + double expected = 5.0; + double actual = KNearestNeighbors.EuclideanDistance(a, b); + Assert.That(actual, Is.EqualTo(expected).Within(1e-9)); + } + + [Test] + public void Predict_SingleNeighbor_CorrectLabel() + { + var knn = new KNearestNeighbors(1); + knn.AddSample(new[] { 1.0, 2.0 }, "A"); + knn.AddSample(new[] { 3.0, 4.0 }, "B"); + var label = knn.Predict(new[] { 1.1, 2.1 }); + Assert.That(label, Is.EqualTo("A")); + } + + [Test] + public void Predict_MajorityVote_CorrectLabel() + { + var knn = new KNearestNeighbors(3); + knn.AddSample(new[] { 0.0, 0.0 }, "A"); + knn.AddSample(new[] { 0.1, 0.1 }, "A"); + knn.AddSample(new[] { 1.0, 1.0 }, "B"); + var label = knn.Predict(new[] { 0.05, 0.05 }); + Assert.That(label, Is.EqualTo("A")); + } + + [Test] + public void Predict_TieBreaker_ReturnsConsistentLabel() + { + var knn = new KNearestNeighbors(2); + knn.AddSample(new[] { 0.0, 0.0 }, "A"); + knn.AddSample(new[] { 1.0, 1.0 }, "B"); + var label = knn.Predict(new[] { 0.5, 0.5 }); + Assert.That(label, Is.EqualTo("A")); + } +} diff --git a/Algorithms/MachineLearning/KNearestNeighbors.cs b/Algorithms/MachineLearning/KNearestNeighbors.cs new file mode 100644 index 00000000..fde78326 --- /dev/null +++ b/Algorithms/MachineLearning/KNearestNeighbors.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Algorithms.MachineLearning; + +/// +/// K-Nearest Neighbors (KNN) classifier implementation. +/// This algorithm classifies data points based on the majority label of their k nearest neighbors. +/// +/// +/// The type of the label used for classification. This can be any type that represents the class or category of a sample. +/// +public class KNearestNeighbors +{ + private readonly List<(double[] Features, TLabel Label)> trainingData = new(); + private readonly int k; + + /// + /// Initializes a new instance of the classifier. + /// + /// Number of neighbors to consider for classification. + /// Thrown if k is less than 1. + public KNearestNeighbors(int k) + { + if (k < 1) + { + throw new ArgumentOutOfRangeException(nameof(k), "k must be at least 1."); + } + + this.k = k; + } + + /// + /// Calculates the Euclidean distance between two feature vectors. + /// + /// First feature vector. + /// Second feature vector. + /// Euclidean distance. + /// Thrown if vectors are of different lengths. + public static double EuclideanDistance(double[] a, double[] b) + { + if (a.Length != b.Length) + { + throw new ArgumentException("Feature vectors must be of the same length."); + } + + double sum = 0; + for (int i = 0; i < a.Length; i++) + { + double diff = a[i] - b[i]; + sum += diff * diff; + } + + return Math.Sqrt(sum); + } + + /// + /// Adds a training sample to the classifier. + /// + /// Feature vector of the sample. + /// Label of the sample. + public void AddSample(double[] features, TLabel label) + { + if (features == null) + { + throw new ArgumentNullException(nameof(features)); + } + + trainingData.Add((features, label)); + } + + /// + /// Predicts the label for a given feature vector using the KNN algorithm. + /// + /// Feature vector to classify. + /// Predicted label. + /// Thrown if there is no training data. + public TLabel Predict(double[] features) + { + if (trainingData.Count == 0) + { + throw new InvalidOperationException("No training data available."); + } + + if (features == null) + { + throw new ArgumentNullException(nameof(features)); + } + + // Compute distances to all training samples + var distances = trainingData + .Select(td => (Label: td.Label, Distance: EuclideanDistance(features, td.Features))) + .OrderBy(x => x.Distance) + .Take(k) + .ToList(); + + // Majority vote + var labelCounts = distances + .GroupBy(x => x.Label) + .Select(g => new { Label = g.Key, Count = g.Count() }) + .OrderByDescending(x => x.Count) + .ThenBy(x => x.Label?.GetHashCode() ?? 0) + .ToList(); + + return labelCounts.First().Label; + } +} diff --git a/README.md b/README.md index 88a3787e..db32a559 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,7 @@ find more than one implementation for the same objective but using different alg * [CollaborativeFiltering](./Algorithms/RecommenderSystem/CollaborativeFiltering) * [Machine Learning](./Algorithms/MachineLearning) * [Linear Regression](./Algorithms/MachineLearning/LinearRegression.cs) + * [K-Nearest Neighbors](./Algorithms/MachineLearning/KNearestNeighbors.cs) * [Logistic Regression](./Algorithms/MachineLearning/LogisticRegression.cs) * [Searches](./Algorithms/Search) * [A-Star](./Algorithms/Search/AStar/)