SC-SGS
diff --git a/‎.figures/plssvm_bokeh.gif‎
2.61 MB b/‎.figures/plssvm_bokeh.gif‎
2.61 MB
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/python/interactive/README.md‎
Lines changed: 37 additions & 0 deletions b/‎examples/python/interactive/README.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/python/interactive/svc/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/python/interactive/svc/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/python/interactive/svc/classification_report.py‎
Lines changed: 58 additions & 0 deletions b/‎examples/python/interactive/svc/classification_report.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎examples/python/interactive/svc/confusion_matrix.py‎
Lines changed: 86 additions & 0 deletions b/‎examples/python/interactive/svc/confusion_matrix.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎examples/python/interactive/svc/data_generation.py‎
Lines changed: 153 additions & 0 deletions b/‎examples/python/interactive/svc/data_generation.py‎
Lines changed: 153 additions & 0 deletions
@@ -84,6 +84,8 @@ The main highlights of our SVM implementations are:
 7. Multi-GPU support for **all** kernel functions and GPU backends for `fit` as well as `predict/score` (**note**: no multi-GPU support for the stdpar backend even if run on a GPU!).
 8. Python bindings as drop-in replacement for `sklearn.SVC` and `sklearn.SVR` (some features currently not implemented).
 
+To see the full power of Support Vector Machines, have a look at our live visualization examples in 
+[examples/python/interactive](examples/python/interactive/README.md).
 
 ## Getting Started
 
 
@@ -0,0 +1,37 @@
+# Interactive live comparison between `sklearn.svm` and PLSSVM
+
+This directory contains a bokeh application that can be used to compare `sklearn.svm`'s and PLSSVM's classification and 
+regression implementation directly besides each other. 
+It is possible to change all available hyperparameters, e.g., kernel function, decision function shape, or the 
+respective kernel function parameters. Additionally, the number of  classes and datapoints as well as the used dataset 
+can be changed on the fly.
+
+![Example of our bokeh application visualization between sklearn.svm and PLSSVM.](https://github.com/SC-SGS/PLSSVM/raw/regression/.figures/plssvm_bokeh.gif)
+
+# Requirements
+
+In order to run our interactive comparison, the following packages must be installed:
+
+```bash
+pip install numpy pandas bokeh scikit-learn plssvm
+```
+
+# Running
+
+To start the bokeh server locally, it is sufficient to call (in the current directory):
+
+```bash
+bokeh serve svm.py
+```
+
+This will output something like:
+
+```bash
+2025-02-14 17:47:49,341 Starting Bokeh server version 3.6.3 (running on Tornado 6.4.2)
+2025-02-14 17:47:49,343 User authentication hooks NOT provided (default user enabled)
+2025-02-14 17:47:49,346 Bokeh app running at: http://localhost:5006/svm
+2025-02-14 17:47:49,346 Starting Bokeh server with process id: 184614
+```
+
+You then simply have to open the prompted URL (in this example `http://localhost:5006/svm`) in a browser and enjoy our 
+live comparison between `sklearn.svm` and PLSSVM!
@@ -0,0 +1,2 @@
+# init package functions
+from .svc import create_svc_layout
@@ -0,0 +1,58 @@
+import pandas as pd
+import sklearn
+
+from bokeh.models import ColumnDataSource, DataTable, TableColumn, HTMLTemplateFormatter
+
+
+def classification_report_as_dataframe(y_true, y_pred):
+    """Compute the regression report using y_true and y_pred and convert it to a Pandas DataFrame usable in bokeh."""
+    # calculate the classification report
+    report_dict = sklearn.metrics.classification_report(y_true, y_pred, output_dict=True, zero_division=0)
+
+    # convert to DataFrame
+    df = pd.DataFrame(report_dict).transpose()
+
+    # convert numeric values and round to 3 decimal places
+    numeric_cols = ["precision", "recall", "f1-score"]
+    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce").round(3)
+    df["support"] = df["support"].astype(int)
+    df.at['accuracy', 'support'] = len(y_true)
+
+    # format text for display
+    df["precision"] = df["precision"].astype(str)
+    df["recall"] = df["recall"].astype(str)
+    df["f1-score"] = df["f1-score"].astype(str)
+    df.at['accuracy', 'precision'] = ""
+    df.at['accuracy', 'recall'] = ""
+    df.at['accuracy', 'f1-score'] = f"<b>{df.at['accuracy', 'f1-score']}</b>"
+
+    df = df.reset_index().rename(columns={"index": ""})
+
+    return df
+
+
+def update_classification_report_plot(source, y_true, y_pred):
+    """Update the already existing classification report table using y_true and y_pred."""
+    source.data = classification_report_as_dataframe(y_true, y_pred)
+
+
+def create_classification_report_plot(y_true, y_pred):
+    """Create a new classification report table using y_true and y_pred."""
+    # create the Pandas DataFrame representing a classification report
+    df = classification_report_as_dataframe(y_true, y_pred)
+
+    # convert DataFrame to ColumnDataSource
+    source = ColumnDataSource(df)
+
+    # define the HTML formatter for the 'Name' column
+    name_formatter = HTMLTemplateFormatter(template='<div><%= value %></div>')
+    # create table columns (hide "Class" header by setting title to "")
+    total_width = 300
+    main_column_width = 96  # baded on largest string in column
+    minor_column_width = (total_width - main_column_width) // 4
+    columns = [TableColumn(field=col, title=col, width=main_column_width if col == "" else minor_column_width, formatter=name_formatter) for col in df.columns]
+
+    # Create DataTable
+    classification_table = DataTable(source=source, columns=columns, index_position=None, sizing_mode='fixed', width=total_width, height=200)
+
+    return classification_table, source
@@ -0,0 +1,86 @@
+import numpy as np
+import pandas as pd
+import sklearn
+
+from bokeh.plotting import figure
+from bokeh.models import ColumnDataSource, LinearColorMapper, LinearAxis
+from bokeh.palettes import Viridis256
+
+
+def confusion_matrix_as_dataframe(y_true, y_pred):
+    """Compute the confusion matrix using y_true and y_pred and convert it to a Pandas DataFrame usable in bokeh."""
+    # calculate the confusion matrix
+    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
+
+    # get the unique class names
+    unique_classes = [str(i) for i in np.unique(np.vstack((y_true, y_pred)))]
+    y_true_range = sorted(unique_classes, reverse=True)
+    y_pred_range = sorted(unique_classes)
+
+    # convert to DataFrame for easy handling
+    df = pd.DataFrame(confusion_matrix, index=unique_classes, columns=unique_classes)
+    # reshape for bokeh
+    df = df.stack().reset_index(name='value')
+
+    # define a threshold for text color change
+    threshold = confusion_matrix.max() * 0.5
+
+    # assign white text for dark backgrounds and black text for bright backgrounds
+    df["text_color"] = ["white" if val < threshold else "black" for val in df["value"]]
+
+    # set alpha values -> per default everything is visible
+    df["alpha"] = [1] * len(df["value"])
+
+    return df, y_true_range, y_pred_range
+
+
+def update_confusion_matrix_plot(fig, source, y_true, y_pred):
+    """Update the already existing confusion matrix plot using y_true and y_pred."""
+    # create the Pandas DataFrame representing a confusion matrix
+    df, y_true_range, y_pred_range = confusion_matrix_as_dataframe(y_true, y_pred)
+
+    # check if the number of unique classes has changed
+    old_classes = fig.x_range.factors
+
+    if set(y_true_range) != set(old_classes):
+        # if classes changed, update axes ranges
+        fig.x_range.factors = y_pred_range
+        fig.y_range.factors = y_true_range
+
+    # replace entire data dictionary
+    source.data = df
+
+
+def create_confusion_matrix_plot(y_true, y_pred):
+    """Create a new confusion matrix plot using y_true and y_pred."""
+    # create the Pandas DataFrame representing a confusion matrix
+    df, y_true_range, y_pred_range = confusion_matrix_as_dataframe(y_true, y_pred)
+
+    # create a ColumnDataSource for dynamic updates
+    source = ColumnDataSource(df)
+
+    # create a color mapper
+    mapper = LinearColorMapper(palette=Viridis256)
+
+    # create figure
+    fig = figure(x_range=y_pred_range, y_range=y_true_range, toolbar_location=None, title="Confusion Matrix",
+                 x_axis_label="y_pred", y_axis_label="y_true", x_axis_location="above",
+                 sizing_mode='fixed', width=300, height=300)
+    # disable grid lines (only visible if alpha is 0 anyway)
+    fig.xgrid.grid_line_color = None
+    fig.ygrid.grid_line_color = None
+    # disable dragging of the plot
+    fig.toolbar.active_drag = None
+
+    # draw rectangles
+    fig.rect(x="level_1", y="level_0", width=1, height=1, source=source,
+             fill_color={'field': 'value', 'transform': mapper}, line_color="white", fill_alpha="alpha")
+
+    # add text labels
+    fig.text(x="level_1", y="level_0", text="value", source=source,
+             text_align="center", text_baseline="middle", text_color="text_color", text_font_size="10pt", text_alpha="alpha")
+
+    fig.axis.major_label_text_font_size = "10pt"
+    fig.axis.major_label_standoff = 1
+
+    return fig, source
@@ -0,0 +1,153 @@
+import numpy as np
+from sklearn.datasets import make_classification, make_circles, make_moons, make_blobs, make_gaussian_quantiles
+
+
+def generate_classification_dataset(dataset_type, n_samples):
+    """Generate a classification dataset. Returns X (datapoints), y (labels)"""
+    n_classes = 4
+
+    if dataset_type == "classification":
+        return make_classification(n_samples=n_samples, n_features=2, n_classes=n_classes,
+                                   n_clusters_per_class=1, n_redundant=0)
+    elif dataset_type == "aniso":
+        return make_classification(n_samples=n_samples, n_features=2, n_classes=n_classes, n_informative=2, n_redundant=0,
+                                   n_clusters_per_class=1, class_sep=2)
+    elif dataset_type == "blobs":
+        return make_blobs(n_samples=n_samples, centers=n_classes)
+    elif dataset_type == "varied_density":
+        return make_blobs(n_samples=n_samples, centers=n_classes, cluster_std=np.random.choice([0.5, 1.0, 2.0, 0.1], n_classes))
+    elif dataset_type == "outliers_with_clusters":
+        X, y = make_blobs(n_samples=n_samples, centers=n_classes)
+        X[:20] += 10  # Add outliers
+        return X, y
+    elif dataset_type == "star_cluster":
+        X, y = [], []
+        angles = np.linspace(0, 2 * np.pi, n_classes, endpoint=False)
+
+        for i, angle in enumerate(angles):
+            r = np.random.uniform(0.5, 1.5, n_samples // n_classes)
+            x1 = r * np.cos(angle) + np.random.normal(0, 0.2, size=r.shape)
+            x2 = r * np.sin(angle) + np.random.normal(0, 0.2, size=r.shape)
+
+            X.append(np.column_stack((x1, x2)))
+            y.append(np.full_like(x1, i))
+
+        X = np.vstack(X)
+        y = np.concatenate(y).astype(int)
+        return X, y
+    elif dataset_type == "checkerboard":
+        X = np.random.rand(n_samples, 2)
+        if n_classes == 4:
+            y = []
+            for datapoint in X:
+                if datapoint[0] < 0.5 and datapoint[1] < 0.5:
+                    y.append(0)
+                elif datapoint[0] < 0.5 and datapoint[1] >= 0.5:
+                    y.append(1)
+                elif datapoint[0] >= 0.5 and datapoint[1] < 0.5:
+                    y.append(2)
+                elif datapoint[0] >= 0.5 and datapoint[1] >= 0.5:
+                    y.append(3)
+            y = np.asarray(y)
+        else:
+            y = ((np.floor(X[:, 0] * 2) + np.floor(X[:, 1] * 2)) % n_classes).astype(int)
+        return X, y
+    elif dataset_type == "concentric_rings":
+        radii = np.linspace(0.5, 2.0, n_classes)
+        X, y = [], []
+
+        for i, r in enumerate(radii[:n_classes]):  # Support up to 4 classes
+            theta = np.linspace(0, 2 * np.pi, n_samples // n_classes)
+            x1 = r * np.cos(theta) + np.random.normal(0, 0.1, size=theta.shape)
+            x2 = r * np.sin(theta) + np.random.normal(0, 0.1, size=theta.shape)
+            X.append(np.column_stack((x1, x2)))
+            y.append(np.full_like(x1, i))
+
+        X = np.vstack(X)
+        y = np.concatenate(y).astype(int)
+        return X, y
+    elif dataset_type == "ball":
+        return make_gaussian_quantiles(n_samples=n_samples, n_features=2, n_classes=n_classes)
+    elif dataset_type == "moons":
+        X_1, y_1 = make_moons(n_samples=n_samples, noise=0.1, random_state=42)
+        X_2, y_2 = make_moons(n_samples=n_samples, noise=0.1, random_state=42)
+        for idx, datapoint in enumerate(X_2):
+            if y_2[idx] == 0:
+                datapoint[1] += 1
+            else:
+                datapoint[1] -= 1
+        y_2 += 2
+
+        X = np.concatenate((X_1, X_2), axis=0)
+        y = np.concatenate((y_1, y_2), axis=0)
+        return X, y
+    elif dataset_type == "wavy_clusters":
+        X, y = [], []
+        x1 = np.linspace(-1, 1, n_samples // n_classes)
+
+        for i in range(n_classes):
+            x2 = np.sin(5 * np.pi * x1) + np.random.normal(0, 0.1, size=x1.shape) + 2 * i
+            X.append(np.column_stack((x1, x2)))
+            y.append(np.full_like(x1, i))
+
+        X = np.vstack(X)
+        y = np.concatenate(y).astype(int)
+        return X, y
+    elif dataset_type == "s_curves":
+        x1 = np.linspace(-1, 1, n_samples // n_classes)
+        X, y = [], []
+
+        for i in range(n_classes):
+            x2 = np.sin(2 * np.pi * x1) + np.random.normal(0, 0.1, size=x1.shape) + i
+            X.append(np.column_stack((x1, x2)))
+            y.append(np.full_like(x1, i))
+
+        X = np.vstack(X)
+        y = np.concatenate(y).astype(int)
+        return X, y
+    elif dataset_type == "spiral":
+        theta = np.linspace(0, 4 * np.pi, n_samples)
+        r = np.linspace(0, 1, n_samples)
+        X = np.column_stack([r * np.sin(theta), r * np.cos(theta)])
+        y = np.zeros(n_samples, dtype=int)
+        if n_classes == 2:
+            y[r > 0.5] = 1
+        elif n_classes == 3:
+            y[r > 0.33] = 1
+            y[r > 0.66] = 2
+        elif n_classes == 4:
+            y[r > 0.25] = 1
+            y[r > 0.5] = 2
+            y[r > 0.75] = 3
+        return X, y
+    elif dataset_type == "multiple_spirals":
+        n_samples_per_class = n_samples // n_classes
+        X, y = [], []
+
+        centers = [(i * 1, i * 1) for i in range(n_classes)]  # Different starting centers
+
+        for i, (cx, cy) in enumerate(centers):
+            t = np.linspace(0, 2 * np.pi, n_samples_per_class)  # Spiral shape
+            x = cx + t * np.cos(t) + 0.1 * np.random.randn(n_samples_per_class)
+            y_coord = cy + t * np.sin(t) + 0.1 * np.random.randn(n_samples_per_class)
+            X.append(np.column_stack((x, y_coord)))
+            y.append(np.full(n_samples_per_class, i))
+
+        X = np.vstack(X)
+        y = np.hstack(y).astype(int)
+        return X, y
+    elif dataset_type == "multiarm_spiral":
+        n_samples_per_class = n_samples // n_classes
+        X, y = [], []
+
+        for i in range(n_classes):
+            t = np.linspace(0, 3 * 2 * np.pi, n_samples_per_class)  # Spiral shape
+            angle_offset = (i / n_classes) * (2 * np.pi)  # Offset each spiral arm
+            x = (t + 1) * np.cos(t + angle_offset) + 0.1 * np.random.randn(n_samples_per_class)
+            y_coord = (t + 1) * np.sin(t + angle_offset) + 0.1 * np.random.randn(n_samples_per_class)
+            X.append(np.column_stack((x, y_coord)))
+            y.append(np.full(n_samples_per_class, i))
+
+        X = np.vstack(X)
+        y = np.hstack(y).astype(int)
+        return X, y
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# init package functions`
	`2`	`+from .svc import create_svc_layout`