Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions dbscan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import random
import matplotlib.pyplot as plt
from collections import deque

UNCLASSIFIED = -1
NOISE = 0

def region_query(dataset, point_index, eps):
neighbors = []
for i, point in enumerate(dataset):
if i != point_index and euclidean_distance(point, dataset[point_index]) <= eps:
neighbors.append(i)
return neighbors

def euclidean_distance(point1, point2):
return ((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) ** 0.5

def expand_cluster(dataset, point_index, cluster_id, eps, min_pts, clusters):
seeds = deque(region_query(dataset, point_index, eps))
if len(seeds) < min_pts:
clusters[point_index] = NOISE
return False
else:
clusters[point_index] = cluster_id
while seeds:
current_point_index = seeds.popleft()
if clusters[current_point_index] == UNCLASSIFIED:
neighbor_points = region_query(dataset, current_point_index, eps)
if len(neighbor_points) >= min_pts:
seeds.extend(neighbor_points)
if clusters[current_point_index] == UNCLASSIFIED or clusters[current_point_index] == NOISE:
clusters[current_point_index] = cluster_id
return True

def dbscan(dataset, eps, min_pts):
cluster_id = 1
clusters = [UNCLASSIFIED] * len(dataset)
for i, point in enumerate(dataset):
if clusters[i] == UNCLASSIFIED:
if expand_cluster(dataset, i, cluster_id, eps, min_pts, clusters):
cluster_id += 1
return clusters

def plot_clusters(dataset, clusters):
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

for i, point in enumerate(dataset):
if clusters[i] == NOISE:
plt.scatter(point[0], point[1], color='black')
else:
plt.scatter(point[0], point[1], color=colors[clusters[i] % len(colors)])

plt.xlabel('X')
plt.ylabel('Y')
plt.title('DBSCAN Clustering')
plt.show()

if __name__ == "__main__":
dataset = []

# First cluster
for _ in range(100):
rand_x = random.uniform(0, 3)
rand_y = random.uniform(0, 3)
dataset.append([rand_x, rand_y])

# Second cluster
for _ in range(100):
rand_x = random.uniform(7, 10)
rand_y = random.uniform(0, 3)
dataset.append([rand_x, rand_y])

# Third cluster
for _ in range(100):
rand_x = random.uniform(3.5, 6.5)
rand_y = random.uniform(6, 10)
dataset.append([rand_x, rand_y])


eps = 1.0
min_pts = 5

clusters = dbscan(dataset, eps, min_pts)
plot_clusters(dataset, clusters)
34 changes: 34 additions & 0 deletions least_squares_regression.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <bits/stdc++.h>

using namespace std;


void regression(vector<float> x, vector<float> y)
{
int n = x.size();
float mx = accumulate(x.begin(), x.end(), 0)/n;
float my = accumulate(y.begin(), y.end(), 0)/n;

float sx = accumulate(x.begin(), x.end(), 0);
float sy = accumulate(y.begin(), y.end(), 0);
float sx_sy = 0;
float sx_x = 0;
for(int i =0; i<n; i++)
{
sx_sy += x[i]*y[i];
sx_x += x[i]*x[i];
}
float b = (n*sx_sy-sx*sy)/(n*sx_x-sx*sx);

float a = my - b*mx;

printf("y= %.3f + %.3f*x", a, b);
}

int main(void)
{
vector<float> x = { 5, 7, 12, 16, 20 };
vector<float> y = { 40, 120, 180, 210, 240 };
regression(x,y);
return 0;
}