CSRT-NTUA · ntua-el22813 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 22, 2024
diff --git a/dbscan.py b/dbscan.py
@@ -0,0 +1,84 @@
+import random
+import matplotlib.pyplot as plt
+from collections import deque
+
+UNCLASSIFIED = -1
+NOISE = 0
+
+def region_query(dataset, point_index, eps):
+    neighbors = []
+    for i, point in enumerate(dataset):
+        if i != point_index and euclidean_distance(point, dataset[point_index]) <= eps:
+            neighbors.append(i)
+    return neighbors
+
+def euclidean_distance(point1, point2):
+    return ((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) ** 0.5
+
+def expand_cluster(dataset, point_index, cluster_id, eps, min_pts, clusters):
+    seeds = deque(region_query(dataset, point_index, eps))
+    if len(seeds) < min_pts:
+        clusters[point_index] = NOISE
+        return False
+    else:
+        clusters[point_index] = cluster_id
+        while seeds:
+            current_point_index = seeds.popleft()
+            if clusters[current_point_index] == UNCLASSIFIED:
+                neighbor_points = region_query(dataset, current_point_index, eps)
+                if len(neighbor_points) >= min_pts:
+                    seeds.extend(neighbor_points)
+            if clusters[current_point_index] == UNCLASSIFIED or clusters[current_point_index] == NOISE:
+                clusters[current_point_index] = cluster_id
+    return True
+
+def dbscan(dataset, eps, min_pts):
+    cluster_id = 1
+    clusters = [UNCLASSIFIED] * len(dataset)
+    for i, point in enumerate(dataset):
+        if clusters[i] == UNCLASSIFIED:
+            if expand_cluster(dataset, i, cluster_id, eps, min_pts, clusters):
+                cluster_id += 1
+    return clusters
+
+def plot_clusters(dataset, clusters):
+    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
+
+    for i, point in enumerate(dataset):
+        if clusters[i] == NOISE:
+            plt.scatter(point[0], point[1], color='black')
+        else:
+            plt.scatter(point[0], point[1], color=colors[clusters[i] % len(colors)])
+
+    plt.xlabel('X')
+    plt.ylabel('Y')
+    plt.title('DBSCAN Clustering')
+    plt.show()
+
+if __name__ == "__main__":
+    dataset = []
+
+    # First cluster
+    for _ in range(100):
+        rand_x = random.uniform(0, 3)
+        rand_y = random.uniform(0, 3)
+        dataset.append([rand_x, rand_y])
+
+    # Second cluster
+    for _ in range(100):
+        rand_x = random.uniform(7, 10)
+        rand_y = random.uniform(0, 3)
+        dataset.append([rand_x, rand_y])
+
+    # Third cluster
+    for _ in range(100):
+        rand_x = random.uniform(3.5, 6.5)
+        rand_y = random.uniform(6, 10)
+        dataset.append([rand_x, rand_y])
+
+
+    eps = 1.0
+    min_pts = 5
+
+    clusters = dbscan(dataset, eps, min_pts)
+    plot_clusters(dataset, clusters)
diff --git a/least_squares_regression.cpp b/least_squares_regression.cpp
@@ -0,0 +1,34 @@
+#include <bits/stdc++.h>
+
+using namespace std;
+
+
+void regression(vector<float> x, vector<float> y)
+{
+    int n = x.size();
+    float mx = accumulate(x.begin(), x.end(), 0)/n;
+    float my = accumulate(y.begin(), y.end(), 0)/n;
+
+    float sx = accumulate(x.begin(), x.end(), 0);
+    float sy = accumulate(y.begin(), y.end(), 0);
+    float sx_sy = 0;
+    float sx_x = 0;
+    for(int i =0; i<n; i++)
+    {
+        sx_sy += x[i]*y[i];
+        sx_x += x[i]*x[i];
+    }
+    float b = (n*sx_sy-sx*sy)/(n*sx_x-sx*sx);
+
+    float a = my - b*mx;
+
+    printf("y= %.3f + %.3f*x", a, b);
+}
+
+int main(void)
+{
+    vector<float> x = { 5, 7, 12, 16, 20 };
+    vector<float> y = { 40, 120, 180, 210, 240 };
+    regression(x,y);
+    return 0;
+}