aidotse · johanos1 · Jan 27, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/README.md b/README.md
@@ -158,25 +158,60 @@ The spatial stage creates the transaction network topology. This determines whic
 
 #### 1. Scale-Free Network Blueprint
 
-AMLGentex generates scale-free networks where node degree follows a truncated discrete power-law distribution:
+AMLGentex generates scale-free networks where node degree follows a truncated discrete power-law distribution with exponential cutoff:
 
-$$P(K=k) \propto k^{-\gamma}, \quad k \in \{k_{\min}, \ldots, k_{\max}\}$$
+$$P(K=k) \propto k^{-\gamma} \exp\left(-\frac{2k}{k_{\max}}\right), \quad k \in \{k_{\min}, \ldots, k_{\max}\}$$
+
+The exponential cutoff provides softer tail truncation, more realistic for finite-size networks and preventing extreme degree concentration near k_max.
 
 **Parameters:**
 - **`kmin`**: Minimum degree (default: 1)
-- **`kmax`**: Maximum degree (default: n-1, capped for simple graphs)
+- **`kmax`**: Maximum degree (default: floor(√n), capped at n-1 for simple graphs)
 - **`gamma`**: Power-law exponent (optional - solved from average_degree if not provided)
 - **`average_degree`**: Target mean degree (specify this OR gamma)
 
 **Computing gamma from average_degree:**
 
 The expected degree for a given γ is:
 
-$$\mu(\gamma) = \frac{\sum_{k=k_{\min}}^{k_{\max}} k^{1-\gamma}}{\sum_{k=k_{\min}}^{k_{\max}} k^{-\gamma}}$$
+$$\mu(\gamma) = \frac{\sum_{k=k_{\min}}^{k_{\max}} k \cdot k^{-\gamma} \exp(-2k/k_{\max})}{\sum_{k=k_{\min}}^{k_{\max}} k^{-\gamma} \exp(-2k/k_{\max})}$$
 
 This function is strictly decreasing in γ: smaller γ → heavier tail → larger mean; larger γ → mass concentrates at k_min → smaller mean.
 
-Given a target average degree, we solve μ(γ) = target using Brent's method. Truncation (finite k_max) guarantees a finite mean for any γ > 0, and monotonicity ensures a unique solution.
+Given a target average degree, we solve μ(γ) = target using Brent's method. The exponential cutoff ensures smooth decay near k_max, and monotonicity guarantees a unique solution.
+
+**Example: n=10,000 nodes (kmax=100)**
+
+| Target Mean | γ |
+|-------------|-------|
+| 1.5 | 2.67 |
+| 2.0 | 2.23 |
+| 3.0 | 1.85 |
+| 5.0 | 1.49 |
+| 10.0 | 1.06 |
+| 20.0 | 0.58 |
+
+**Survival function P(K > k):**
+
+| k | μ=1.5 | μ=2.0 | μ=3.0 | μ=5.0 | μ=10.0 | μ=20.0 |
+|-----|-------|-------|-------|-------|--------|--------|
+| 1 | 21.1% | 30.3% | 41.4% | 54.5% | 72.2% | 88.7% |
+| 5 | 2.2% | 5.2% | 11.0% | 20.9% | 40.2% | 66.7% |
+| 10 | 0.6% | 1.9% | 5.0% | 11.3% | 26.3% | 51.7% |
+| 20 | 0.1% | 0.6% | 1.9% | 5.1% | 14.4% | 34.1% |
+| 50 | <0.1% | 0.1% | 0.3% | 1.0% | 3.7% | 11.3% |
+| 90 | <0.1% | <0.1% | <0.1% | 0.1% | 0.3% | 1.2% |
+
+**Expected nodes with degree > k (n=10,000):**
+
+| k | μ=1.5 | μ=2.0 | μ=3.0 | μ=5.0 | μ=10.0 | μ=20.0 |
+|-----|-------|-------|-------|-------|--------|--------|
+| 10 | 62 | 192 | 498 | 1,133 | 2,633 | 5,175 |
+| 20 | 14 | 58 | 186 | 509 | 1,441 | 3,415 |
+| 50 | 1 | 7 | 29 | 99 | 365 | 1,128 |
+| 90 | <1 | <1 | 2 | 8 | 33 | 121 |
+
+The exponential cutoff smoothly suppresses extreme degrees rather than imposing a hard wall at kmax.
 
 #### 2. Pattern Injection
 

diff --git a/experiments/template_experiment/config/data.yaml b/experiments/template_experiment/config/data.yaml
@@ -75,13 +75,15 @@ optimisation_bounds:
   # ML Selector parameters (controls biased account selection for AML patterns)
   ml_selector:
     structure_weights:
-      degree: [0.1, 0.5]
-      betweenness: [0.1, 0.5]
-      pagerank: [0.1, 0.5]
+      degree: [-0.3, 0.3]
+      betweenness: [-0.3, 0.3]
+      pagerank: [-0.3, 0.3]
     kyc_weights:
-      init_balance: [0.0, 0.2]
-      salary: [0.0, 0.1]
-      age: [0.0, 0.1]
+      init_balance: [-0.2, 0.2]  # Negative = prefer low-balance accounts
+      salary: [-0.1, 0.1]
+      age: [-0.1, 0.1]
+    propagation_weights:
+      city: [0.0, 0.5]  # Controls geographic/network clustering of SAR accounts
     participation_decay: [0.1, 0.3]
 scale-free:
   # Truncated discrete power law parameters: P(K=k) ∝ k^(-gamma), k ∈ {kmin, ..., kmax}
@@ -90,6 +92,10 @@ scale-free:
   # kmax: null         # Maximum degree (defaults to n-1 if not specified)
   # gamma: 2.0         # Power law exponent (if not provided, solved from average_degree)
   average_degree: 2.0  # Target mean degree (used to solve for gamma if gamma not provided)
+normal_patterns:
+  # Controls how main accounts are selected for normal transaction patterns.
+  # Participation decay prevents high-degree nodes from dominating pattern membership.
+  participation_decay: 0.5  # Factor applied after each selection (0.5 = halve weight each time)
 ml_selector:
   # Money Laundering Account Selector Configuration
   # Controls biased selection of accounts for AML typology injection

diff --git a/src/data_creation/generator.py b/src/data_creation/generator.py
@@ -100,7 +100,7 @@ def run_spatial(self, force=False):
 
             # Step 1: Generate degree distribution if needed (goes to spatial output)
             degree_path = spatial_output / degree_file
-            if not degree_path.exists():
+            if force or not degree_path.exists():
                 logger.info(f"  [1/2] Generating degree distribution...")
                 start = time.time()
                 # Call directly with config dict (has absolute paths)
@@ -223,7 +223,7 @@ def run_spatial_baseline(self, force=False):
 
             # Step 1: Generate degree distribution if needed (goes to spatial output)
             degree_path = Path(output_dir) / degree_file
-            if not degree_path.exists():
+            if force or not degree_path.exists():
                 logger.info(f"  [1/2] Generating degree distribution...")
                 start = time.time()
                 generate_scalefree.generate_degree_file_from_config(self.config)

diff --git a/src/data_creation/spatial_simulation/generate_scalefree.py b/src/data_creation/spatial_simulation/generate_scalefree.py
@@ -4,7 +4,12 @@
 This module implements proper discrete power-law sampling for directed graphs,
 following the specification for truncated discrete power law on k = kmin..kmax.
 
-P(K=k) ∝ k^(-gamma), k ∈ {kmin, ..., kmax}
+Distribution forms:
+- Pure power law:          P(K=k) ∝ k^(-gamma)
+- With exponential cutoff: P(K=k) ∝ k^(-gamma) * exp(-2k/kmax)
+
+The exponential cutoff provides a softer tail truncation, more realistic for
+finite-size networks and preventing extreme degree concentration at kmax.
 """
 
 import numpy as np
@@ -22,7 +27,12 @@
 
 def truncated_discrete_powerlaw_pmf(k_values: np.ndarray, gamma: float) -> np.ndarray:
     """
-    Compute PMF for truncated discrete power law: P(K=k) ∝ k^(-gamma).
+    Compute PMF for truncated discrete power law with exponential cutoff.
+
+    P(K=k) ∝ k^(-gamma) * exp(-2k/kmax)
+
+    where kmax is inferred from max(k_values). The exponential cutoff provides
+    a softer tail truncation, more realistic for finite networks.
 
     Args:
         k_values: Array of integer degree values (support of distribution)
@@ -34,13 +44,17 @@ def truncated_discrete_powerlaw_pmf(k_values: np.ndarray, gamma: float) -> np.nd
     if gamma <= 0.0:
         raise ValueError(f"gamma must be > 0.0, got {gamma}")
 
-    unnormalized = np.power(k_values.astype(float), -gamma)
+    kmax = k_values.max()
+
+    # Power law with exponential cutoff
+    unnormalized = np.power(k_values.astype(float), -gamma) * np.exp(-2.0 * k_values / kmax)
+
     return unnormalized / unnormalized.sum()
 
 
 def truncated_discrete_powerlaw_mean(kmin: int, kmax: int, gamma: float) -> float:
     """
-    Compute mean of truncated discrete power law.
+    Compute mean of truncated discrete power law with exponential cutoff.
 
     Args:
         kmin: Minimum degree (inclusive)
@@ -59,7 +73,7 @@ def solve_gamma_for_mean(
     kmin: int,
     kmax: int,
     target_mean: float,
-    gamma_bounds: Tuple[float, float] = (1.01, 20.0)
+    gamma_bounds: Tuple[float, float] = (0.01, 20.0)
 ) -> float:
     """
     Solve for gamma such that truncated discrete power law has target mean.
@@ -70,7 +84,7 @@ def solve_gamma_for_mean(
         kmin: Minimum degree
         kmax: Maximum degree
         target_mean: Desired mean degree
-        gamma_bounds: Search bounds for gamma (default: 1.01 to 20.0)
+        gamma_bounds: Search bounds for gamma (default: 0.01 to 20.0)
 
     Returns:
         gamma value that achieves target_mean
@@ -110,7 +124,7 @@ def sample_truncated_discrete_powerlaw(
     rng: np.random.Generator
 ) -> np.ndarray:
     """
-    Sample n values from truncated discrete power law.
+    Sample n values from truncated discrete power law with exponential cutoff.
 
     Args:
         n: Number of samples
@@ -233,10 +247,12 @@ def discrete_powerlaw_degree_distribution(
     Samples in-degrees and out-degrees separately from the same distribution,
     then balances sums with integer-only adjustments.
 
+    Distribution: P(K=k) ∝ k^(-gamma) * exp(-2k/kmax)
+
     Args:
         n: Number of nodes
         kmin: Minimum degree (default: 1)
-        kmax: Maximum degree (default: n-1)
+        kmax: Maximum degree (default: floor(sqrt(n)))
         gamma: Power law exponent. If None, solved from average_degree.
         average_degree: Target mean degree. Required if gamma is None.
         seed: Random seed
@@ -256,7 +272,7 @@ def discrete_powerlaw_degree_distribution(
     if kmin < 0:
         raise ValueError(f"kmin must be >= 0, got {kmin}")
     if kmax is None:
-        kmax = n - 1
+        kmax = min(int(np.floor(np.sqrt(n))), n - 1)
     if kmax < kmin:
         raise ValueError(f"kmax ({kmax}) must be >= kmin ({kmin})")
     if kmax > n - 1:
@@ -379,7 +395,7 @@ def generate_degree_file_from_config(config: dict) -> dict:
     scale_free_params = config["scale-free"]
     gamma = scale_free_params.get("gamma", None)
     kmin = int(scale_free_params.get("kmin", scale_free_params.get("loc", 1)))
-    kmax = scale_free_params.get("kmax", n - 1)
+    kmax = scale_free_params.get("kmax", None)
     if kmax is not None:
         kmax = int(kmax)
     average_degree = scale_free_params.get("average_degree", None)

diff --git a/src/data_creation/spatial_simulation/ml_account_selector.py b/src/data_creation/spatial_simulation/ml_account_selector.py
@@ -419,7 +419,7 @@ def _compute_final_weights(self):
         # Note: age uses no log (not heavy-tailed), salary/balance use log
         z_kyc = {}
         for kyc_feature, weight in self.kyc_weights.items():
-            if weight > 0:
+            if weight != 0:
                 kyc_dict = {}
                 for node in nodes:
                     kyc_value = self.g.nodes[node].get(kyc_feature, 0.0)
@@ -437,20 +437,20 @@ def _compute_final_weights(self):
 
             # Add structural component (z-scored)
             for feature, weight in self.structure_weights.items():
-                if weight > 0 and feature in z_structural.get(node, {}):
+                if weight != 0 and feature in z_structural.get(node, {}):
                     score += weight * z_structural[node][feature]
 
             # Add propagation component (z-scored global locality fields)
             for label_type in self.propagate_labels:
                 global_key = f"{label_type}_global"
                 prop_weight = self.propagation_weights.get(label_type, 0.0)
 
-                if prop_weight > 0 and global_key in z_propagation:
+                if prop_weight != 0 and global_key in z_propagation:
                     score += prop_weight * z_propagation[global_key].get(node, 0.0)
 
             # Add direct KYC component (z-scored)
             for kyc_feature, weight in self.kyc_weights.items():
-                if weight > 0 and kyc_feature in z_kyc:
+                if weight != 0 and kyc_feature in z_kyc:
                     score += weight * z_kyc[kyc_feature].get(node, 0.0)
 
             scores[node] = score

diff --git a/src/data_creation/spatial_simulation/transaction_graph_generator.py b/src/data_creation/spatial_simulation/transaction_graph_generator.py
@@ -638,7 +638,11 @@ def read_normal_models(self, reader):
         """
         header = next(reader)
 
-        self.nominator = Nominator(self.g)
+        # Get participation decay from config (default 1.0 = no decay for backward compatibility)
+        normal_patterns_conf = self.conf.get('normal_patterns', {})
+        participation_decay = normal_patterns_conf.get('participation_decay', 1.0)
+
+        self.nominator = Nominator(self.g, participation_decay=participation_decay)
 
         for row in reader:
             count = int(row[header.index('count')])