Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions Kmeans_Implentation_CSRT
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import random
import math
import matplotlib.pyplot as plt

def euclidean_distance(point1, point2):
"""Compute the Euclidean distance between two points."""
return math.sqrt(sum((p1 - p2)**2 for p1, p2 in zip(point1, point2)))

def initialize_clusters(data, k):
"""Randomly initialize k cluster centers."""
centers = random.sample(data, k)
return centers

def assign_clusters(data, centers):
"""Assign each data point to the nearest cluster."""
clusters = [[] for _ in range(len(centers))]
distances = [[] for _ in range(len(centers))]
for i, point in enumerate(data):
nearest_center = None
nearest_distance = float('inf')
for j, center in enumerate(centers):
distance = euclidean_distance(point, center)
if distance < nearest_distance:
nearest_center = center
nearest_distance = distance
clusters[centers.index(nearest_center)].append(point)
distances[centers.index(nearest_center)].append(nearest_distance)
return clusters, distances

def recalculate_centers(clusters, distances):
"""Recalculate the cluster centers."""
new_centers = []
for i, cluster in enumerate(clusters):
if cluster:
new_center = [sum(feature) / len(cluster) for feature in zip(*cluster)]
new_centers.append(new_center)
else:
new_centers.append(clusters[random.randint(0, len(clusters) - 1)][0])
return new_centers

def k_means_clustering(data, k, max_iterations=100, tolerance=1e-5):
"""Run the k-means clustering algorithm."""
centers = initialize_clusters(data, k)
for _ in range(max_iterations):
clusters, distances = assign_clusters(data, centers)
old_centers = centers
centers = recalculate_centers(clusters, distances)
if all(euclidean_distance(old_center, new_center) < tolerance for old_center, new_center in zip(old_centers, centers)):
break
return clusters

def generate_data(n, k, min_val, max_val):
"""Generate a dataset with n data points and k clusters."""
data = []
centers = []
for i in range(k):
center = [random.uniform(min_val, max_val) for _ in range(2)]
centers.append(center)
for j in range(int(n/k)):
point = list(center)
point[0] += random.uniform(-0.5, 0.5)
point[1] += random.uniform(-0.5, 0.5)
data.append(point)
return data, centers

def plot_clusters(data, centers, clusters):
"""Plot the clusters and cluster centers."""
plt.figure(figsize=(8, 6))
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, cluster in enumerate(clusters):
for point in cluster:
plt.scatter(point[0], point[1], color=colors[i], alpha=0.5)
plt.scatter(centers[i][0], centers[i][1], marker='x', color='black', s=100)
plt.title('K-Means Clustering')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

def test_k_means_clustering():
"""Test the k-means clustering algorithm."""
data, centers = generate_data(100, 3, 0, 10)
clusters = k_means_clustering(data, 3)
plot_clusters(data, centers, clusters)

if __name__ == "__main__":
test_k_means_clustering()
44 changes: 44 additions & 0 deletions knn_classifier.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
#include <utility>
using namespace std;
// Structure to represent a data point
struct DataPoint {
vector<double> features;
int label;
};
// Function to calculate Euclidean distance between two data points
double euclideanDistance(const vector<double>& p1, const vector<double>& p2) {
double distance = 0.0;
for (size_t i = 0; i < p1.size(); ++i) {
distance += pow(p1[i] - p2[i], 2);
}
return sqrt(distance);
}
// KNN classifier function
int knnClassifier(const vector<DataPoint>& trainingData, const vector<double>& inputFeatures, int k) {
// Vector to store distances from input to training data points along with their indices
vector<pair<double, int>> distances;

// Calculate distances from input to each training data point
for (size_t i = 0; i < trainingData.size(); ++i) {
double distance = euclideanDistance(trainingData[i].features, inputFeatures);
distances.push_back(make_pair(distance, i));
}

// Sort distances in ascending order
sort(distances.begin(), distances.end());

// Count votes for each class among the k nearest neighbors
vector<int> classVotes(10, 0); // Assuming labels are from 0 to 9
for (int i = 0; i < k; ++i) {
int index = distances[i].second;
int label = trainingData[index].label;
classVotes[label]++;
}

// Return the class with the highest vote
return distance(classVotes.begin(), max_element(classVotes.begin(), classVotes.end()));
}