added k_nearset_neighbors

anishk85 · anishk85 · commit 83bba97a77fe · 2025-10-05T22:04:01.000+05:30
diff --git a/DIRECTORY.md b/DIRECTORY.md
@@ -163,6 +163,7 @@
     * [K Means](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/k_means.rs)
     * [Linear Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/linear_regression.rs)
     * [Logistic Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/logistic_regression.rs)
+    * [K Nearest Neigbors](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/k_nearest_neighbors.rs)
     * Loss Function
       * [Average Margin Ranking Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/average_margin_ranking_loss.rs)
       * [Hinge Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/hinge_loss.rs)
diff --git a/src/machine_learning/k_nearest_neighbors.rs b/src/machine_learning/k_nearest_neighbors.rs
@@ -0,0 +1,349 @@
+//! K-Nearest Neighbors (KNN) algorithm implementation
+//! 
+//! KNN is a supervised machine learning algorithm used for classification and regression.
+//! It predicts the class/value of a data point based on the k nearest neighbors in the feature space.
+//!
+//! # Examples
+//!
+//! ```
+//! use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+//!
+//! let mut knn = KNearestNeighbors::new(3);
+//!
+//! let training_data = vec![
+//!     DataPoint::new(vec![1.0, 1.0], "A".to_string()),
+//!     DataPoint::new(vec![2.0, 2.0], "A".to_string()),
+//!     DataPoint::new(vec![5.0, 5.0], "B".to_string()),
+//! ];
+//!
+//! knn.fit(training_data);
+//! 
+//! let prediction = knn.predict(&vec![1.5, 1.5]);
+//! assert_eq!(prediction, Some("A".to_string()));
+//! ```
+
+use std::collections::HashMap;
+
+/// Represents a data point with features and a label
+#[derive(Debug, Clone, PartialEq)] // Added PartialEq for better testing
+pub struct DataPoint {
+    pub features: Vec<f64>,
+    pub label: String,
+}
+
+impl DataPoint {
+    /// Creates a new DataPoint
+    ///
+    /// # Arguments
+    ///
+    /// * `features` - Feature vector for the data point
+    /// * `label` - Class label for the data point
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::DataPoint;
+    ///
+    /// let point = DataPoint::new(vec![1.0, 2.0], "A".to_string());
+    /// ```
+    pub fn new(features: Vec<f64>, label: String) -> Self {
+        DataPoint { features, label }
+    }
+}
+
+/// K-Nearest Neighbors classifier
+///
+/// # Examples
+///
+/// ```
+/// use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+///
+/// let mut knn = KNearestNeighbors::new(3);
+/// ```
+#[derive(Debug)]
+pub struct KNearestNeighbors {
+    k: usize,
+    training_data: Vec<DataPoint>,
+}
+
+impl KNearestNeighbors {
+    /// Creates a new KNN classifier with k neighbors
+    ///
+    /// # Arguments
+    ///
+    /// * `k` - Number of nearest neighbors to consider
+    ///
+    /// # Panics
+    ///
+    /// Panics if k is 0
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::KNearestNeighbors;
+    ///
+    /// let knn = KNearestNeighbors::new(3);
+    /// ```
+    pub fn new(k: usize) -> Self {
+        assert!(k > 0, "k must be greater than 0");
+        KNearestNeighbors {
+            k,
+            training_data: Vec::new(),
+        }
+    }
+
+    /// Trains the KNN model with training data
+    ///
+    /// # Arguments
+    ///
+    /// * `training_data` - Vector of labeled data points
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+    ///
+    /// let mut knn = KNearestNeighbors::new(3);
+    /// let data = vec![DataPoint::new(vec![1.0, 2.0], "A".to_string())];
+    /// knn.fit(data);
+    /// ```
+    pub fn fit(&mut self, training_data: Vec<DataPoint>) {
+        self.training_data = training_data;
+    }
+
+    /// Calculates Euclidean distance between two feature vectors
+    ///
+    /// # Panics
+    ///
+    /// Panics if feature vectors have different lengths
+    fn euclidean_distance(&self, a: &[f64], b: &[f64]) -> f64 {
+        assert_eq!(
+            a.len(),
+            b.len(),
+            "Feature vectors must have the same length"
+        );
+        a.iter()
+            .zip(b.iter())
+            .map(|(x, y)| (x - y).powi(2))
+            .sum::<f64>()
+            .sqrt()
+    }
+
+    /// Predicts the label for a given data point
+    ///
+    /// Returns `None` if training data is empty
+    ///
+    /// # Arguments
+    ///
+    /// * `features` - Feature vector to classify
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+    ///
+    /// let mut knn = KNearestNeighbors::new(1);
+    /// knn.fit(vec![DataPoint::new(vec![1.0, 1.0], "A".to_string())]);
+    /// let result = knn.predict(&vec![1.5, 1.5]);
+    /// assert_eq!(result, Some("A".to_string()));
+    /// ```
+    pub fn predict(&self, features: &[f64]) -> Option<String> {
+        if self.training_data.is_empty() {
+            return None;
+        }
+
+        // Calculate distances to all training points
+        let mut distances: Vec<(f64, &DataPoint)> = self
+            .training_data
+            .iter()
+            .map(|point| (self.euclidean_distance(features, &point.features), point))
+            .collect();
+
+        // Sort by distance
+        distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
+
+        // Take k nearest neighbors
+        let k_nearest = &distances[..self.k.min(distances.len())];
+
+        // Count votes for each label
+        let mut votes: HashMap<String, usize> = HashMap::new();
+        for (_, point) in k_nearest {
+            *votes.entry(point.label.clone()).or_insert(0) += 1;
+        }
+
+        // Return the label with the most votes
+        votes
+            .into_iter()
+            .max_by_key(|(_, count)| *count)
+            .map(|(label, _)| label)
+    }
+
+    /// Predicts labels for multiple data points
+    ///
+    /// # Arguments
+    ///
+    /// * `features_batch` - Slice of feature vectors to classify
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+    ///
+    /// let mut knn = KNearestNeighbors::new(1);
+    /// knn.fit(vec![DataPoint::new(vec![1.0, 1.0], "A".to_string())]);
+    /// let results = knn.predict_batch(&[vec![1.5, 1.5], vec![1.2, 1.2]]);
+    /// ```
+    pub fn predict_batch(&self, features_batch: &[Vec<f64>]) -> Vec<Option<String>> {
+        features_batch
+            .iter()
+            .map(|features| self.predict(features))
+            .collect()
+    }
+
+    /// Calculates accuracy on test data
+    ///
+    /// Returns accuracy as a value between 0.0 and 1.0
+    ///
+    /// # Arguments
+    ///
+    /// * `test_data` - Test data points with known labels
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use the_algorithms_rust::machine_learning::{DataPoint, KNearestNeighbors};
+    ///
+    /// let mut knn = KNearestNeighbors::new(1);
+    /// knn.fit(vec![DataPoint::new(vec![1.0, 1.0], "A".to_string())]);
+    /// let test_data = vec![DataPoint::new(vec![1.1, 1.1], "A".to_string())];
+    /// let accuracy = knn.score(&test_data);
+    /// assert!(accuracy > 0.0);
+    /// ```
+    pub fn score(&self, test_data: &[DataPoint]) -> f64 {
+        if test_data.is_empty() {
+            return 0.0;
+        }
+
+        let correct = test_data
+            .iter()
+            .filter(|point| {
+                if let Some(predicted) = self.predict(&point.features) {
+                    predicted == point.label
+                } else {
+                    false
+                }
+            })
+            .count();
+
+        correct as f64 / test_data.len() as f64
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_knn_simple_classification() {
+        let mut knn = KNearestNeighbors::new(3);
+
+        let training_data = vec![
+            DataPoint::new(vec![1.0, 1.0], "A".to_string()),
+            DataPoint::new(vec![1.5, 1.5], "A".to_string()),
+            DataPoint::new(vec![2.0, 2.0], "A".to_string()),
+            DataPoint::new(vec![5.0, 5.0], "B".to_string()),
+            DataPoint::new(vec![5.5, 5.5], "B".to_string()),
+            DataPoint::new(vec![6.0, 6.0], "B".to_string()),
+        ];
+
+        knn.fit(training_data);
+
+        assert_eq!(knn.predict(&vec![1.2, 1.2]).unwrap(), "A");
+        assert_eq!(knn.predict(&vec![5.2, 5.2]).unwrap(), "B");
+    }
+
+    #[test]
+    fn test_euclidean_distance() {
+        let knn = KNearestNeighbors::new(1);
+        let distance = knn.euclidean_distance(&vec![0.0, 0.0], &vec![3.0, 4.0]);
+        assert!((distance - 5.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_knn_with_k_equals_one() {
+        let mut knn = KNearestNeighbors::new(1);
+
+        let training_data = vec![
+            DataPoint::new(vec![1.0, 1.0], "A".to_string()),
+            DataPoint::new(vec![10.0, 10.0], "B".to_string()),
+        ];
+
+        knn.fit(training_data);
+
+        assert_eq!(knn.predict(&vec![1.5, 1.5]).unwrap(), "A");
+        assert_eq!(knn.predict(&vec![9.5, 9.5]).unwrap(), "B");
+    }
+
+    #[test]
+    fn test_knn_accuracy() {
+        let mut knn = KNearestNeighbors::new(3);
+
+        let training_data = vec![
+            DataPoint::new(vec![1.0, 1.0], "A".to_string()),
+            DataPoint::new(vec![1.5, 1.5], "A".to_string()),
+            DataPoint::new(vec![2.0, 2.0], "A".to_string()),
+            DataPoint::new(vec![5.0, 5.0], "B".to_string()),
+            DataPoint::new(vec![5.5, 5.5], "B".to_string()),
+            DataPoint::new(vec![6.0, 6.0], "B".to_string()),
+        ];
+
+        knn.fit(training_data);
+
+        let test_data = vec![
+            DataPoint::new(vec![1.2, 1.2], "A".to_string()),
+            DataPoint::new(vec![5.2, 5.2], "B".to_string()),
+        ];
+
+        let accuracy = knn.score(&test_data);
+        assert!((accuracy - 1.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_predict_batch() {
+        let mut knn = KNearestNeighbors::new(3);
+
+        let training_data = vec![
+            DataPoint::new(vec![1.0, 1.0], "A".to_string()),
+            DataPoint::new(vec![2.0, 2.0], "A".to_string()),
+            DataPoint::new(vec![5.0, 5.0], "B".to_string()),
+            DataPoint::new(vec![6.0, 6.0], "B".to_string()),
+        ];
+
+        knn.fit(training_data);
+
+        let features_batch = vec![vec![1.5, 1.5], vec![5.5, 5.5]];
+        let predictions = knn.predict_batch(&features_batch);
+
+        assert_eq!(predictions[0].as_ref().unwrap(), "A");
+        assert_eq!(predictions[1].as_ref().unwrap(), "B");
+    }
+
+    #[test]
+    #[should_panic(expected = "k must be greater than 0")]
+    fn test_knn_zero_k() {
+        KNearestNeighbors::new(0);
+    }
+
+    #[test]
+    fn test_empty_training_data() {
+        let knn = KNearestNeighbors::new(3);
+        assert!(knn.predict(&vec![1.0, 1.0]).is_none());
+    }
+
+    #[test]
+    #[should_panic(expected = "Feature vectors must have the same length")]
+    fn test_mismatched_feature_lengths() {
+        let knn = KNearestNeighbors::new(1);
+        knn.euclidean_distance(&vec![1.0, 2.0], &vec![1.0]);
+    }
+}
diff --git a/src/machine_learning/mod.rs b/src/machine_learning/mod.rs
@@ -4,6 +4,8 @@ mod linear_regression;
 mod logistic_regression;
 mod loss_function;
 mod optimization;
+mod k_nearest_neighbors;
+
 
 pub use self::cholesky::cholesky;
 pub use self::k_means::k_means;
@@ -18,3 +20,5 @@ pub use self::loss_function::mse_loss;
 pub use self::loss_function::neg_log_likelihood;
 pub use self::optimization::gradient_descent;
 pub use self::optimization::Adam;
+pub use self::k_nearest_neighbors::{DataPoint, KNearestNeighbors};
+