Skip to content
105 changes: 105 additions & 0 deletions quantum/quantum_kmeans_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import cirq
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler


def generate_data(n_samples=100, n_features=2, n_clusters=2):

Check failure on line 8 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

quantum/quantum_kmeans_clustering.py:1:1: I001 Import block is un-sorted or un-formatted
data, labels = make_blobs(n_samples=n_samples, centers=n_clusters, n_features=n_features, random_state=42)

Check failure on line 9 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

quantum/quantum_kmeans_clustering.py:9:89: E501 Line too long (110 > 88)
return MinMaxScaler().fit_transform(data), labels

def quantum_distance(point1, point2):
"""
Computes the quantum distance between two points.

:param point1: First point as a numpy array.
:param point2: Second point as a numpy array.
:return: Quantum distance between the two points.

>>> point_a = np.array([1.0, 2.0])
>>> point_b = np.array([1.5, 2.5])
>>> result = quantum_distance(point_a, point_b)
>>> assert isinstance(result, float)
"""
qubit = cirq.LineQubit(0)
diff = np.clip(np.linalg.norm(point1 - point2), 0, 1)
theta = 2 * np.arcsin(diff)

circuit = cirq.Circuit(cirq.ry(theta)(qubit), cirq.measure(qubit, key="result"))

result = cirq.Simulator().run(circuit, repetitions=1000)
return result.histogram(key="result").get(1, 0) / 1000


def initialize_centroids(data: np.ndarray, k: int) -> np.ndarray:
"""
Initializes centroids for k-means clustering.

:param data: The dataset from which to initialize centroids.
:param k: The number of centroids to initialize.
:return: An array of initialized centroids.

>>> data = np.array([[1, 2], [3, 4], [5, 6]])
>>> centroids = initialize_centroids(data, 2)
>>> assert centroids.shape == (2, 2)
"""
return data[np.random.choice(len(data), k, replace=False)]

Check failure on line 47 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

quantum/quantum_kmeans_clustering.py:47:17: NPY002 Replace legacy `np.random.choice` call with `np.random.Generator`

def assign_clusters(data, centroids):
clusters = [[] for _ in range(len(centroids))]
for point in data:
closest = min(
range(len(centroids)), key=lambda i: quantum_distance(point, centroids[i])
)
clusters[closest].append(point)
return clusters

def recompute_centroids(clusters):
return np.array([np.mean(cluster, axis=0) for cluster in clusters if cluster])

def quantum_kmeans(data, k, max_iters=10):
centroids = initialize_centroids(data, k)

for _ in range(max_iters):
clusters = assign_clusters(data, centroids)
new_centroids = recompute_centroids(clusters)
if np.allclose(new_centroids, centroids):
break
centroids = new_centroids

return centroids, clusters


# Main execution
n_samples, n_clusters = 10, 2
data, labels = generate_data(n_samples, n_clusters=n_clusters)

plt.figure(figsize=(12, 5))

plt.subplot(121)
plt.scatter(data[:, 0], data[:, 1], c=labels)
plt.title("Generated Data")

final_centroids, final_clusters = quantum_kmeans(data, n_clusters)

plt.subplot(122)
for i, cluster in enumerate(final_clusters):
cluster = np.array(cluster)
plt.scatter(cluster[:, 0], cluster[:, 1], label=f"Cluster {i+1}")
plt.scatter(
final_centroids[:, 0],
final_centroids[:, 1],
color="red",
marker="x",
s=200,
linewidths=3,
label="Centroids",
)
plt.title("Quantum k-Means Clustering with Cirq")
plt.legend()

plt.tight_layout()
plt.show()

print(f"Final Centroids:\n{final_centroids}")

Check failure on line 105 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W292)

quantum/quantum_kmeans_clustering.py:105:46: W292 No newline at end of file
Loading