diff --git a/autoBOTLib/learning/hyperparameter_configurations.py b/autoBOTLib/learning/hyperparameter_configurations.py index aa1d79f..d99f505 100644 --- a/autoBOTLib/learning/hyperparameter_configurations.py +++ b/autoBOTLib/learning/hyperparameter_configurations.py @@ -8,7 +8,8 @@ "hidden_layer_size": [64, 128, 256], "num_hidden": [1, 2, 4], "dropout": [0.01, 0.1, 0.3, 0.4], - "device": ["cpu"] + "device": ["cpu"], + "compile_model": [False, True] } scikit_default = { diff --git a/autoBOTLib/learning/torch_sparse_nn.py b/autoBOTLib/learning/torch_sparse_nn.py index 5cb2700..6b918a6 100644 --- a/autoBOTLib/learning/torch_sparse_nn.py +++ b/autoBOTLib/learning/torch_sparse_nn.py @@ -1,4 +1,5 @@ # Some Torch-based FFNNs - Skrlj 2021 +# torch.compile support added in 2025 for PyTorch 2.8+ import torch import torch.nn as nn @@ -162,6 +163,33 @@ def get_importances(self): class SFNN: + """ + Sparse Feedforward Neural Network for binary classification. + + Parameters: + ----------- + batch_size : int, default=32 + Batch size for training + num_epochs : int, default=32 + Number of training epochs + learning_rate : float, default=0.001 + Learning rate for optimizer + stopping_crit : int, default=10 + Early stopping criterion (number of epochs without improvement) + hidden_layer_size : int, default=64 + Size of hidden layers + dropout : float, default=0.2 + Dropout probability + num_hidden : int, default=2 + Number of hidden layers + device : str, default="cpu" + Device to run on ("cpu" or "cuda") + verbose : int, default=0 + Verbosity level + compile_model : bool, default=False + Whether to compile the model with torch.compile for optimization. + Requires PyTorch 2.0+ and can provide significant speedup. + """ def __init__(self, batch_size=32, num_epochs=32, @@ -172,6 +200,7 @@ def __init__(self, num_hidden=2, device="cpu", verbose=0, + compile_model=False, *args, **kwargs): self.device = device @@ -184,6 +213,7 @@ def __init__(self, self.hidden_layer_size = hidden_layer_size self.num_hidden = num_hidden self.learning_rate = learning_rate + self.compile_model = compile_model self.model = None self.optimizer = None self.num_params = None @@ -222,6 +252,12 @@ def fit(self, features, labels): num_hidden=self.num_hidden, dropout=self.dropout, device=self.device).to(self.device) + + # Apply torch.compile if enabled + if self.compile_model: + if self.verbose: + logging.info("Compiling model with torch.compile") + self.model = torch.compile(self.model) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate) self.num_params = sum(p.numel() for p in self.model.parameters()) @@ -286,7 +322,7 @@ def predict_proba(self, features): predictions = [] self.model.eval() with torch.no_grad(): - for features, _ in test_dataset: + for features in test_dataset: features = features.float().to(self.device) representation = self.model.forward(features) pred = representation.detach().cpu().numpy()[0] diff --git a/docs/TORCH_COMPILE.md b/docs/TORCH_COMPILE.md new file mode 100644 index 0000000..cc17d41 --- /dev/null +++ b/docs/TORCH_COMPILE.md @@ -0,0 +1,62 @@ +# torch.compile Support in autoBOT + +autoBOT now supports PyTorch's `torch.compile` feature for optimized neural network training and inference. + +## What is torch.compile? + +`torch.compile` is PyTorch's JIT compilation feature introduced in PyTorch 2.0 that can provide significant speedups by compiling PyTorch models into optimized code. + +## Usage + +### Through Hyperparameter Configuration + +The `compile_model` parameter is now available in the torch hyperparameter configuration: + +```python +import autoBOTLib + +custom_hyperparams = { + "batch_size": [16], + "num_epochs": [100], + "learning_rate": [0.001], + "compile_model": [True] # Enable torch.compile +} + +autobot = autoBOTLib.GAlearner( + texts, + labels, + framework="torch", + custom_hyperparameters=custom_hyperparams +) +``` + +### Direct Usage with SFNN + +```python +from autoBOTLib.learning.torch_sparse_nn import SFNN + +# Create model with torch.compile enabled +model = SFNN( + batch_size=16, + num_epochs=50, + compile_model=True # Enable compilation +) + +model.fit(X_train, y_train) +predictions = model.predict(X_test) +``` + +## Benefits + +- **Performance**: Can provide significant speedup during training and inference +- **Automatic**: Works transparently with existing code +- **Backward Compatible**: Default behavior unchanged (`compile_model=False`) + +## Requirements + +- PyTorch 2.0 or later +- Compatible hardware (some hardware may not support all compilation features) + +## Example + +See `examples/minimal_torch_compile.py` for a complete example comparing compiled vs non-compiled models. \ No newline at end of file diff --git a/examples/minimal_torch_compile.py b/examples/minimal_torch_compile.py new file mode 100644 index 0000000..eb6ea46 --- /dev/null +++ b/examples/minimal_torch_compile.py @@ -0,0 +1,93 @@ +## A simple example showcasing autoBOTLib with torch.compile support + +import autoBOTLib +import pandas as pd +from cluster_utils import output_classification_results +import os + +#TOKENIZERS_PARALLELISM=(true | false) +os.environ['TOKENIZERS_PARALLELISM'] = "false" + + +def run(): + ## Load example data frame + dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:] + train_sequences = dataframe['text_a'] + train_targets = dataframe['label'] + reptype = "neurosymbolic" + + # Example 1: Run without torch.compile (default behavior) + print("=== Running without torch.compile ===") + autoBOTLibObj = autoBOTLib.GAlearner( + train_sequences, + train_targets, + representation_type=reptype, + n_fold_cv=3, + framework="torch", + memory_storage="memory", + learner_preset="default", + verbose=1, + sparsity=0.1, + upsample=False, + time_constraint=1).evolve( + strategy="evolution", + nind=3) + + # Make predictions + dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t") + test_sequences = dataframe2['text_a'] + predictions = autoBOTLibObj.predict(test_sequences) + test_classes = dataframe2['label'].values.tolist() + + output_classification_results(predictions, + test_classes, + f"./predictions/TORCH_regular.json", + model_spec={"compile": False}) + + # Example 2: Test with torch.compile enabled (custom hyperparameters) + print("\n=== Running with torch.compile enabled ===") + # Custom hyperparameters that include torch.compile + custom_torch_hyperparams = { + "batch_size": [16], + "num_epochs": [50], + "learning_rate": [0.001], + "stopping_crit": [5], + "hidden_layer_size": [128], + "num_hidden": [2], + "dropout": [0.1], + "device": ["cpu"], + "compile_model": [True] # Enable torch.compile + } + + autoBOTLibObj_compiled = autoBOTLib.GAlearner( + train_sequences, + train_targets, + representation_type=reptype, + n_fold_cv=3, + framework="torch", + custom_hyperparameters=custom_torch_hyperparams, + memory_storage="memory", + learner_preset="default", + verbose=1, + sparsity=0.1, + upsample=False, + time_constraint=1).evolve( + strategy="evolution", + nind=3) + + # Make predictions with compiled model + predictions_compiled = autoBOTLibObj_compiled.predict(test_sequences) + + output_classification_results(predictions_compiled, + test_classes, + f"./predictions/TORCH_compiled.json", + model_spec={"compile": True}) + + print("\n=== Comparison ===") + print(f"Regular model predictions: {len(predictions)} samples") + print(f"Compiled model predictions: {len(predictions_compiled)} samples") + print("Both models completed successfully!") + + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/tests/test_torch_compile.py b/tests/test_torch_compile.py new file mode 100644 index 0000000..b446753 --- /dev/null +++ b/tests/test_torch_compile.py @@ -0,0 +1,115 @@ +"""Test torch.compile functionality with autoBOT neural networks.""" + +import pytest +import numpy as np +import torch +from scipy.sparse import random +from scipy import stats +from numpy.random import default_rng + +from autoBOTLib.learning.torch_sparse_nn import SFNN, hyper_opt_neural + + +class TestTorchCompile: + """Tests for torch.compile integration.""" + + def setup_method(self): + """Setup test data.""" + rng = default_rng(42) + rvs = stats.uniform().rvs + # Create sparse test data + self.X = random(1000, 100, density=0.1, random_state=rng, data_rvs=rvs).tocsr() + # Create binary target + self.y = np.random.RandomState(42).randint(0, 2, 1000) + + def test_sfnn_with_compile_disabled(self): + """Test SFNN with compile_model=False (default behavior).""" + model = SFNN( + batch_size=16, + num_epochs=5, + learning_rate=0.01, + stopping_crit=2, + hidden_layer_size=32, + compile_model=False, + verbose=1 + ) + + # Should not raise any errors + model.fit(self.X, self.y) + predictions = model.predict(self.X[:10]) + proba_predictions = model.predict_proba(self.X[:10]) + + assert len(predictions) == 10 + assert len(proba_predictions) == 10 + assert hasattr(model, 'coef_') + + def test_sfnn_with_compile_enabled(self): + """Test SFNN with compile_model=True.""" + model = SFNN( + batch_size=16, + num_epochs=5, + learning_rate=0.01, + stopping_crit=2, + hidden_layer_size=32, + compile_model=True, + verbose=1 + ) + + # Should not raise any errors + model.fit(self.X, self.y) + predictions = model.predict(self.X[:10]) + proba_predictions = model.predict_proba(self.X[:10]) + + assert len(predictions) == 10 + assert len(proba_predictions) == 10 + assert hasattr(model, 'coef_') + + def test_hyper_opt_neural_with_compile(self): + """Test hyperoptimization with compile_model parameter.""" + # Create a custom hyperparameter space with compile_model + custom_hyperparam_space = { + "batch_size": [16], + "num_epochs": [5], + "learning_rate": [0.01], + "stopping_crit": [2], + "hidden_layer_size": [32], + "num_hidden": [1], + "dropout": [0.1], + "device": ["cpu"], + "compile_model": [True, False] + } + + from autoBOTLib.learning.torch_sparse_nn import HyperParamNeuralObject + + hyper_opt_obj = HyperParamNeuralObject( + custom_hyperparam_space, + verbose=1, + device="cpu", + metric="f1_macro" + ) + + # Test with n_configs=2 to test both compiled and non-compiled versions + hyper_opt_obj.fit(self.X, self.y, refit=False, n_configs=2) + + assert len(hyper_opt_obj.history) == 2 + assert hyper_opt_obj.best_score >= 0 + + # Verify that compile_model was included in the configuration history + compile_values = [config.get('compile_model', False) for config in hyper_opt_obj.history] + assert len(set(compile_values)) >= 1 # At least one unique value + + @pytest.mark.skipif(not hasattr(torch, 'compile'), reason="torch.compile not available") + def test_torch_compile_availability(self): + """Verify that torch.compile is available in the environment.""" + assert hasattr(torch, 'compile'), "torch.compile should be available" + + # Test basic torch.compile functionality + def simple_function(x): + return x * 2 + + compiled_fn = torch.compile(simple_function) + x = torch.tensor([1.0, 2.0, 3.0]) + result = compiled_fn(x) + expected = torch.tensor([2.0, 4.0, 6.0]) + + assert torch.allclose(result, expected) \ No newline at end of file