fix: black formatting

Accrame · Accrame · commit 7bd7ec0aa6ab · 2025-02-11T11:35:42.000+01:00
diff --git a/src/data/dataset.py b/src/data/dataset.py
@@ -13,8 +13,14 @@
 class FraudDataset(InMemoryDataset):
     """Wraps transaction data into a PyG InMemoryDataset."""
 
-    def __init__(self, root, transactions=None, transform=None,
-                 pre_transform=None, pre_filter=None):
+    def __init__(
+        self,
+        root,
+        transactions=None,
+        transform=None,
+        pre_transform=None,
+        pre_filter=None,
+    ):
         self.transactions = transactions
         super().__init__(root, transform, pre_transform, pre_filter)
         self.load(self.processed_paths[0])
@@ -62,8 +68,9 @@ def __repr__(self):
         return f"{self.__class__.__name__}()"
 
 
-def create_synthetic_fraud_data(num_users=1000, num_merchants=200,
-                                num_transactions=10000, fraud_rate=0.05, seed=42):
+def create_synthetic_fraud_data(
+    num_users=1000, num_merchants=200, num_transactions=10000, fraud_rate=0.05, seed=42
+):
     """Generate fake transaction data for testing."""
     np.random.seed(seed)
 
@@ -135,7 +142,9 @@ def load_kaggle_fraud_data(path):
     return df[["user_id", "merchant_id", "amount", "timestamp", "is_fraud"]]
 
 
-def split_temporal(transactions, timestamp_col="timestamp", train_ratio=0.7, val_ratio=0.15):
+def split_temporal(
+    transactions, timestamp_col="timestamp", train_ratio=0.7, val_ratio=0.15
+):
     """Split by time so we don't leak future data into training."""
     df = transactions.sort_values(timestamp_col).reset_index(drop=True)
 
diff --git a/src/data/features.py b/src/data/features.py
@@ -7,8 +7,13 @@
 class FeatureExtractor:
     """Extracts behavioral, temporal and network features from transaction data."""
 
-    def __init__(self, user_col="user_id", merchant_col="merchant_id",
-                 amount_col="amount", timestamp_col="timestamp"):
+    def __init__(
+        self,
+        user_col="user_id",
+        merchant_col="merchant_id",
+        amount_col="amount",
+        timestamp_col="timestamp",
+    ):
         self.user_col = user_col
         self.merchant_col = merchant_col
         self.amount_col = amount_col
@@ -215,8 +220,9 @@ def get_feature_names(self):
         ]
 
 
-def compute_velocity_features(transactions, user_col="user_id",
-                              timestamp_col="timestamp", windows=[1, 6, 24]):
+def compute_velocity_features(
+    transactions, user_col="user_id", timestamp_col="timestamp", windows=[1, 6, 24]
+):
     """Transaction frequency in rolling time windows."""
     df = transactions.copy()
     df["timestamp"] = pd.to_datetime(df[timestamp_col])
@@ -236,7 +242,9 @@ def compute_velocity_features(transactions, user_col="user_id",
     return df
 
 
-def compute_graph_features(transactions, user_col="user_id", merchant_col="merchant_id"):
+def compute_graph_features(
+    transactions, user_col="user_id", merchant_col="merchant_id"
+):
     """Graph-based features using NetworkX (degree, clustering coeff)."""
     import networkx as nx
 
diff --git a/src/data/graph_builder.py b/src/data/graph_builder.py
@@ -10,8 +10,14 @@ class TransactionGraphBuilder:
     """Builds transaction graphs from tabular data. Users and merchants become
     nodes, transactions become edges."""
 
-    def __init__(self, user_col="user_id", merchant_col="merchant_id",
-                 amount_col="amount", timestamp_col="timestamp", label_col="is_fraud"):
+    def __init__(
+        self,
+        user_col="user_id",
+        merchant_col="merchant_id",
+        amount_col="amount",
+        timestamp_col="timestamp",
+        label_col="is_fraud",
+    ):
         self.user_col = user_col
         self.merchant_col = merchant_col
         self.amount_col = amount_col
@@ -181,7 +187,9 @@ def _compute_node_features(self, transactions, num_nodes):
 
         return features
 
-    def get_train_test_masks(self, num_samples, train_ratio=0.7, val_ratio=0.15, seed=42):
+    def get_train_test_masks(
+        self, num_samples, train_ratio=0.7, val_ratio=0.15, seed=42
+    ):
         """Create train/val/test masks."""
         np.random.seed(seed)
         indices = np.random.permutation(num_samples)
@@ -203,7 +211,9 @@ def get_train_test_masks(self, num_samples, train_ratio=0.7, val_ratio=0.15, see
 
         return train_mask, val_mask, test_mask
 
-    def build_hetero_graph(self, transactions, device_col="device_id", include_features=True):
+    def build_hetero_graph(
+        self, transactions, device_col="device_id", include_features=True
+    ):
         """Build heterogeneous graph with user/merchant/device node types."""
         data = HeteroData()
 
@@ -374,4 +384,3 @@ def _compute_merchant_features(self, transactions, num_merchants):
                 features[:, i] = (col - col.mean()) / col.std()
 
         return features
-
diff --git a/src/inference/explainer.py b/src/inference/explainer.py
@@ -148,7 +148,7 @@ def _compute_feature_importance(self, edge_idx):
 
         return importance
 
-    def _get_prediction(self, node_idx: int) -> Dict:
+    def _get_prediction(self, node_idx: int) -> dict:
         """Get prediction for a node."""
         with torch.no_grad():
             out = self.model(
@@ -164,7 +164,7 @@ def _get_prediction(self, node_idx: int) -> Dict:
             "confidence": probs.max().item(),
         }
 
-    def _get_edge_prediction(self, edge_idx: int) -> Dict:
+    def _get_edge_prediction(self, edge_idx: int) -> dict:
         """Get prediction for an edge."""
         with torch.no_grad():
             out = self.model(
diff --git a/src/models/__init__.py b/src/models/__init__.py
@@ -1,4 +1,4 @@
-from .gat import FraudGAT
-from .gin import FraudGIN
-from .graphsage import FraudGraphSAGE
-from .hetero_gnn import HeteroFraudGNN
+from .gat import FraudGAT  # noqa: F401
+from .gin import FraudGIN  # noqa: F401
+from .graphsage import FraudGraphSAGE  # noqa: F401
+from .hetero_gnn import HeteroFraudGNN  # noqa: F401
diff --git a/src/models/gat.py b/src/models/gat.py
@@ -10,8 +10,16 @@ class FraudGAT(nn.Module):
     """Multi-head attention GNN. The attention weights are useful
     for interpretability — you can see which neighbors matter most."""
 
-    def __init__(self, in_channels, hidden_channels, out_channels,
-                 num_layers=3, heads=4, dropout=0.3, attention_dropout=0.3):
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        out_channels,
+        num_layers=3,
+        heads=4,
+        dropout=0.3,
+        attention_dropout=0.3,
+    ):
         super().__init__()
 
         self.num_layers = num_layers
@@ -119,8 +127,15 @@ def get_attention_weights(self, x, edge_index):
 class EdgeFraudGAT(nn.Module):
     """Edge-level GAT — classifies transactions as fraud/legit."""
 
-    def __init__(self, in_channels, hidden_channels, edge_channels=0,
-                 num_layers=3, heads=4, dropout=0.3):
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        edge_channels=0,
+        num_layers=3,
+        heads=4,
+        dropout=0.3,
+    ):
         super().__init__()
 
         self.node_encoder = FraudGAT(
diff --git a/src/models/gin.py b/src/models/gin.py
@@ -13,8 +13,15 @@
 
 class FraudGIN(nn.Module):
 
-    def __init__(self, in_channels, hidden_channels, out_channels,
-                 num_layers=3, dropout=0.3, train_eps=True):
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        out_channels,
+        num_layers=3,
+        dropout=0.3,
+        train_eps=True,
+    ):
         super().__init__()
 
         self.num_layers = num_layers
@@ -73,8 +80,15 @@ class GINWithJK(nn.Module):
     """GIN + Jumping Knowledge — concatenates representations from all layers.
     Helps when graph has varying depths/diameters."""
 
-    def __init__(self, in_channels, hidden_channels, out_channels,
-                 num_layers=3, dropout=0.3, jk_mode="cat"):
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        out_channels,
+        num_layers=3,
+        dropout=0.3,
+        jk_mode="cat",
+    ):
         super().__init__()
 
         self.num_layers = num_layers
@@ -141,8 +155,9 @@ def forward(self, x, edge_index, edge_attr=None):
 class EdgeFraudGIN(nn.Module):
     """Edge-level GIN."""
 
-    def __init__(self, in_channels, hidden_channels, edge_channels=0,
-                 num_layers=3, dropout=0.3):
+    def __init__(
+        self, in_channels, hidden_channels, edge_channels=0, num_layers=3, dropout=0.3
+    ):
         super().__init__()
 
         self.node_encoder = FraudGIN(
diff --git a/src/models/graphsage.py b/src/models/graphsage.py
@@ -9,8 +9,15 @@
 class FraudGraphSAGE(nn.Module):
     """GraphSAGE for node-level fraud detection."""
 
-    def __init__(self, in_channels, hidden_channels, out_channels,
-                 num_layers=3, dropout=0.3, aggregator="mean"):
+    def __init__(
+        self,
+        in_channels,
+        hidden_channels,
+        out_channels,
+        num_layers=3,
+        dropout=0.3,
+        aggregator="mean",
+    ):
         super().__init__()
 
         self.num_layers = num_layers
@@ -65,8 +72,9 @@ def get_embeddings(self, x, edge_index):
 class EdgeFraudGraphSAGE(nn.Module):
     """Edge-level fraud detection — predicts per-transaction."""
 
-    def __init__(self, in_channels, hidden_channels, edge_channels=0,
-                 num_layers=3, dropout=0.3):
+    def __init__(
+        self, in_channels, hidden_channels, edge_channels=0, num_layers=3, dropout=0.3
+    ):
         super().__init__()
 
         self.node_encoder = FraudGraphSAGE(
diff --git a/src/models/hetero_gnn.py b/src/models/hetero_gnn.py
@@ -14,8 +14,15 @@
 class HeteroFraudGNN(nn.Module):
     """Heterogeneous GNN with per-type message passing."""
 
-    def __init__(self, node_types, edge_types, hidden_channels, out_channels,
-                 num_layers=3, dropout=0.3):
+    def __init__(
+        self,
+        node_types,
+        edge_types,
+        hidden_channels,
+        out_channels,
+        num_layers=3,
+        dropout=0.3,
+    ):
         super().__init__()
 
         self.node_types = node_types
@@ -118,9 +125,16 @@ def get_embeddings(self, x_dict, edge_index_dict):
 class HeteroEdgeFraudGNN(nn.Module):
     """Edge-level hetero GNN — classifies transaction edges."""
 
-    def __init__(self, node_types, edge_types, hidden_channels, edge_channels=0,
-                 num_layers=3, dropout=0.3,
-                 target_edge_type=("user", "transacts", "merchant")):
+    def __init__(
+        self,
+        node_types,
+        edge_types,
+        hidden_channels,
+        edge_channels=0,
+        num_layers=3,
+        dropout=0.3,
+        target_edge_type=("user", "transacts", "merchant"),
+    ):
         super().__init__()
 
         self.target_edge_type = target_edge_type
diff --git a/src/training/__init__.py b/src/training/__init__.py
@@ -1,2 +1,2 @@
-from .losses import FocalLoss
-from .trainer import GNNTrainer
+from .losses import FocalLoss  # noqa: F401
+from .trainer import GNNTrainer  # noqa: F401
diff --git a/src/training/trainer.py b/src/training/trainer.py
@@ -1,6 +1,5 @@
 """Training loop for GNN models."""
 
-import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -20,9 +19,17 @@
 class GNNTrainer:
     """Handles training, validation, early stopping and checkpointing."""
 
-    def __init__(self, model, data, learning_rate=0.001, weight_decay=1e-5,
-                 class_weights=None, use_focal_loss=True, focal_gamma=2.0,
-                 device="auto"):
+    def __init__(
+        self,
+        model,
+        data,
+        learning_rate=0.001,
+        weight_decay=1e-5,
+        class_weights=None,
+        use_focal_loss=True,
+        focal_gamma=2.0,
+        device="auto",
+    ):
         # TODO: add gradient clipping as a parameter
         if device == "auto":
             self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -49,15 +56,21 @@ def __init__(self, model, data, learning_rate=0.001, weight_decay=1e-5,
         # Training state
         self.best_val_auc = 0
         self.patience_counter = 0
-        self.history: Dict[str, List[float]] = {
+        self.history: dict[str, list[float]] = {
             "train_loss": [],
             "val_loss": [],
             "val_auc": [],
             "val_f1": [],
         }
 
-    def train(self, epochs=100, patience=20, min_delta=0.001,
-              checkpoint_path=None, verbose=True):
+    def train(
+        self,
+        epochs=100,
+        patience=20,
+        min_delta=0.001,
+        checkpoint_path=None,
+        verbose=True,
+    ):
         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
             self.optimizer, mode="max", factor=0.5, patience=5, verbose=verbose
         )
@@ -133,7 +146,7 @@ def _train_epoch(self) -> float:
 
         return loss.item()
 
-    def _validate(self) -> Tuple[float, Dict[str, float]]:
+    def _validate(self) -> tuple[float, dict[str, float]]:
         """Validate the model."""
         self.model.eval()