Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nanotabpfn/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from nanotabpfn.interface import NanoTabPFNClassifier, NanoTabPFNRegressor
from nanotabpfn.ensembles import EnsembleClassifer
40 changes: 40 additions & 0 deletions nanotabpfn/ensembles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Any
import numpy as np
import torch.nn.functional as F

from nanotabpfn.interface import get_feature_preprocessor
from nanotabpfn.preprocessors import IdentityPreprocessor, Preprocessor, sample_preprocessors

class EnsembleClassifer:
def __init__(self, classifier: Any, num_preprocessors: int = 4, preprocess_features: bool = True):
self.classifier = classifier
self.model = self.classifier.model
self.num_preprocessors = num_preprocessors
self.preprocess_features = preprocess_features

def fit(self, X_train: np.ndarray, y_train: np.ndarray):
""" stores X_train and y_train for later use, also computes the highest class number occuring in num_classes """
self.X_train = X_train
if self.preprocess_features:
self.feature_preprocessor = get_feature_preprocessor(X_train)
self.X_train: np.ndarray = self.feature_preprocessor.fit_transform(self.X_train) # type:ignore
self.y_train = y_train
self.num_classes = max(set(y_train))+1
self.preprocessors: list[Preprocessor] = [IdentityPreprocessor()] + sample_preprocessors(self.num_preprocessors, self.X_train)

def predict(self, X_test: np.ndarray) -> np.ndarray:
""" calls predit_proba and picks the class with the highest probability for each datapoint """
predicted_probabilities = self.predict_proba(X_test)
return predicted_probabilities.argmax(axis=1)

def predict_proba(self, X_test: np.ndarray) -> np.ndarray:
if self.preprocess_features:
X_test = self.feature_preprocessor.transform(X_test) # type:ignore
all_probabilities = []
for preprocessor in self.preprocessors:
preprocessor.fit(self.X_train)
X_train_preprocessed = preprocessor.transform(self.X_train)
X_test_preprocessed = preprocessor.transform(X_test)
self.classifier.fit(X_train_preprocessed, self.y_train)
all_probabilities.append(self.classifier.predict_proba(X_test_preprocessed))
return np.average(np.stack(all_probabilities, axis=0), axis=0)
10 changes: 8 additions & 2 deletions nanotabpfn/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, r2_score
from sklearn.preprocessing import LabelEncoder

from nanotabpfn.ensembles import EnsembleClassifer
from nanotabpfn.interface import NanoTabPFNRegressor, NanoTabPFNClassifier

TOY_TASKS_REGRESSION = [
Expand Down Expand Up @@ -35,7 +36,7 @@
@torch.no_grad()
def get_openml_predictions(
*,
model: NanoTabPFNRegressor | NanoTabPFNClassifier,
model: NanoTabPFNRegressor | NanoTabPFNClassifier | EnsembleClassifer,
tasks: list[int] | str = "tabarena-v0.1",
max_n_features: int = 500,
max_n_samples: int = 10_000,
Expand Down Expand Up @@ -133,6 +134,8 @@ def get_openml_predictions(
parser = argparse.ArgumentParser()
parser.add_argument("-model_type", type=str, choices=["regression", "classification"], required=True,
help="Whether to use the regressor or classifier model")
parser.add_argument("-ensemble_size", type=int, default=None,
help="Set the number of preprocessors to ensemble with. If None, then no ensembling is applied.")
parser.add_argument("-checkpoint", type=str, default=None,
help="Path to load the model weights from. If None, default weights are used.")
parser.add_argument("-dist_path", type=str, default=None,
Expand All @@ -150,7 +153,10 @@ def get_openml_predictions(
args = parser.parse_args()

if args.model_type == "classification":
model = NanoTabPFNClassifier(model=args.checkpoint, num_mem_chunks=args.num_mem_chunks)
if args.ensemble_size is None:
model = NanoTabPFNClassifier(model=args.checkpoint, num_mem_chunks=args.num_mem_chunks)
else:
model = EnsembleClassifer(NanoTabPFNClassifier(model=args.checkpoint, num_mem_chunks=args.num_mem_chunks, preprocess_features=False), num_preprocessors=args.ensemble_size)
else:
model = NanoTabPFNRegressor(model=args.checkpoint, dist=args.dist_path, num_mem_chunks=args.num_mem_chunks)
model.model.eval()
Expand Down
13 changes: 9 additions & 4 deletions nanotabpfn/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_feature_preprocessor(X: np.ndarray | pd.DataFrame) -> ColumnTransformer:

class NanoTabPFNClassifier():
""" scikit-learn like interface """
def __init__(self, model: NanoTabPFNModel|str|None = None, device: None|str|torch.device = None, num_mem_chunks: int = 8):
def __init__(self, model: NanoTabPFNModel|str|None = None, device: None|str|torch.device = None, num_mem_chunks: int = 8, preprocess_features: bool = True):
if device is None:
device = get_default_device()
if model is None:
Expand All @@ -95,11 +95,14 @@ def __init__(self, model: NanoTabPFNModel|str|None = None, device: None|str|torc
self.model = model.to(device)
self.device = device
self.num_mem_chunks = num_mem_chunks
self.preprocess_features = preprocess_features

def fit(self, X_train: np.ndarray, y_train: np.ndarray):
""" stores X_train and y_train for later use, also computes the highest class number occuring in num_classes """
self.feature_preprocessor = get_feature_preprocessor(X_train)
self.X_train = self.feature_preprocessor.fit_transform(X_train)
self.X_train = X_train
if self.preprocess_features:
self.feature_preprocessor = get_feature_preprocessor(self.X_train)
self.X_train: np.ndarray = self.feature_preprocessor.fit_transform(self.X_train) # type:ignore
self.y_train = y_train
self.num_classes = max(set(y_train))+1

Expand All @@ -113,7 +116,9 @@ def predict_proba(self, X_test: np.ndarray) -> np.ndarray:
creates (x,y), runs it through our PyTorch Model, cuts off the classes that didn't appear in the training data
and applies softmax to get the probabilities
"""
x = np.concatenate((self.X_train, self.feature_preprocessor.transform(X_test)))
if self.preprocess_features:
X_test = self.feature_preprocessor.transform(X_test) # type:ignore
x = np.concatenate((self.X_train, X_test))
y = self.y_train
with torch.no_grad():
x = torch.from_numpy(x).unsqueeze(0).to(torch.float).to(self.device) # introduce batch size 1
Expand Down
66 changes: 66 additions & 0 deletions nanotabpfn/preprocessors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from abc import ABC, abstractmethod
from typing import Any, override
import numpy as np
from sklearn.pipeline import FunctionTransformer


class Preprocessor(ABC):
@abstractmethod
def fit(self, X: np.ndarray) -> None:
pass

@abstractmethod
def transform(self, X: np.ndarray) -> np.ndarray:
pass


class IdentityPreprocessor(Preprocessor):
@override
def fit(self, X: np.ndarray) -> None:
pass

@override
def transform(self, X: np.ndarray) -> np.ndarray:
return X


class SklearnPreprocessor(Preprocessor):
def __init__(self, transformer: Any):
self.transformer = transformer

@override
def fit(self, X: np.ndarray) -> None:
self.transformer.fit(X)

@override
def transform(self, X: np.ndarray) -> np.ndarray:
return np.nan_to_num(self.transformer.transform(X))


class LogPreprocessor(SklearnPreprocessor):
"""log1p for right-skewed, non-negative features."""
@override
def __init__(self):
super().__init__(FunctionTransformer(func=np.log1p, feature_names_out="one-to-one"))


class AsinhPreprocessor(SklearnPreprocessor):
"""Signed log-like transform: linear near 0, log for large |x|; works with negatives."""
@override
def __init__(self):
super().__init__(FunctionTransformer(func=np.arcsinh, feature_names_out="one-to-one"))


def sample_preprocessors(num_preprocessors: int, X_train: np.ndarray) -> list[Preprocessor]:
"""
Return a *good* set of preprocessors for tabular transformers.
"""
picks: list[Preprocessor] = []

picks.append(AsinhPreprocessor())

# For strictly non-negative, log1p is fine
if np.nanmin(X_train) >= 0:
picks.append(LogPreprocessor())

return picks[:num_preprocessors]