SamuelJoseph23
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 26 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 21 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/benchmark.py‎
Lines changed: 7 additions & 4 deletions b/‎src/benchmark.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/data_utils.py‎
Lines changed: 17 additions & 9 deletions b/‎src/data_utils.py‎
Lines changed: 17 additions & 9 deletions
diff --git a/‎src/differential_privacy.py‎
Lines changed: 19 additions & 9 deletions b/‎src/differential_privacy.py‎
Lines changed: 19 additions & 9 deletions
diff --git a/‎src/federated_client.py‎
Lines changed: 26 additions & 12 deletions b/‎src/federated_client.py‎
Lines changed: 26 additions & 12 deletions
diff --git a/‎src/federated_server.py‎
Lines changed: 15 additions & 11 deletions b/‎src/federated_server.py‎
Lines changed: 15 additions & 11 deletions
@@ -0,0 +1,26 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Run tests
+        run: |
+          pytest -q
@@ -0,0 +1,21 @@
+# Core Dependencies
+numpy>=1.24.0
+scikit-learn>=1.3.0
+pandas>=2.0.0
+
+# Post-Quantum Cryptography
+dilithium-py>=1.1.0
+kyber-py>=0.1.0
+cryptography>=41.0.0
+
+# Homomorphic Encryption
+phe>=1.5.0
+
+# Visualization
+matplotlib>=3.7.0
+
+# Optional
+scipy>=1.11.0
+
+# Dev / Tests
+pytest>=7.4.0
@@ -51,8 +51,8 @@ def measure_payload_size(payload):
     """
     Measure the size of a payload in bytes.
     """
-    import pickle
-    return len(pickle.dumps(payload))
+    import json
+    return len(json.dumps(payload, sort_keys=True, default=str).encode("utf-8"))
 
 
 def run_experiment(use_he, num_clients=3, num_rounds=5):
@@ -143,19 +143,22 @@ def run_experiment(use_he, num_clients=3, num_rounds=5):
             # Prepare Update
             model_update = {
                 "client_id": cid,
-                "encrypted_gradients": {"W": new_W, "b": new_b},
+                "model_params": {"W": new_W, "b": new_b},
                 "num_samples": len(X_local)
             }
 
             # Secure Send (measure time and size)
             info = registry_info[cid]
+            info.setdefault("counter", 0)
+            info["counter"] += 1
 
             encrypt_start = time.time()
             payload = client.secure_send_update(
                 model_update, 
                 info["server_kyber_pk"], 
                 info["session_key"],
-                use_he=use_he
+                use_he=use_he,
+                msg_counter=info["counter"],
             )
             encrypt_time = time.time() - encrypt_start
             round_encryption_time += encrypt_time
 
@@ -7,7 +7,7 @@
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.datasets import fetch_openml
 
-def load_and_preprocess_data(n_clients: int):
+def load_and_preprocess_data(n_clients: int, split_strategy: str = "iid", non_iid_label_skew: bool = False):
     """
     Loads the Adult Income dataset, preprocesses it, and splits it for federated clients.
     
@@ -38,24 +38,32 @@ def load_and_preprocess_data(n_clients: int):
     numeric_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
     X = X[numeric_features]
 
-    # 4. Scale features
-    scaler = StandardScaler()
-    X = scaler.fit_transform(X)
+    # Split into train and test FIRST (avoid preprocessing leakage)
+    X_train_df, X_test_df, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
 
-    # Split into train and test
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # 4. Scale features (fit on train only)
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train_df)
+    X_test = scaler.transform(X_test_df)
 
     # Split training data among clients
     # We'll do a simple IID split (random shuffle is implicit in train_test_split)
     client_datasets = []
     chunk_size = len(X_train) // n_clients
+    indices = np.arange(len(X_train))
+
+    # Optional simple non-IID label skew for demo purposes:
+    # sort by label and split contiguous blocks, producing label-imbalanced clients.
+    if non_iid_label_skew or split_strategy.lower() in ("non-iid", "noniid", "label_skew"):
+        indices = indices[np.argsort(np.array(y_train))]
 
     for i in range(n_clients):
         start = i * chunk_size
         end = (i + 1) * chunk_size
-        X_c = X_train[start:end]
-        y_c = y_train[start:end]
-        client_datasets.append((X_c, y_c.values)) # Convert y to numpy array
+        idx = indices[start:end]
+        X_c = X_train[idx]
+        y_c = np.array(y_train)[idx]
+        client_datasets.append((X_c, y_c)) # y already numpy
 
     print(f"Data loaded. {n_clients} clients, {chunk_size} samples per client.")
     return client_datasets, (X_test, y_test)
 
@@ -16,8 +16,8 @@ class DifferentialPrivacy:
     Based on: Dwork et al., "Differential Privacy: A Survey of Results"
     """
 
-    def __init__(self, epsilon: float = 1.0, delta: float = 1e-5, 
-                 sensitivity: float = 2.0, noise_type: str = 'laplace'):
+    def __init__(self, epsilon: float = 1.0, delta: float = 1e-5,
+                 sensitivity: float = 1.0, noise_type: str = 'gaussian'):
         """
         Initialize DP mechanism.
         
@@ -70,7 +70,21 @@ def _calculate_scale(self) -> float:
         else:
             raise ValueError(f"Unknown noise type: {self.noise_type}")
 
-    def add_noise(self, gradient: np.ndarray) -> np.ndarray:
+    def set_sensitivity(self, sensitivity: float) -> None:
+        """
+        Update sensitivity (typically tied to clipping norm) and recompute scale.
+        """
+        self.sensitivity = float(sensitivity)
+        self.scale = self._calculate_scale()
+
+    def account_step(self, epsilon_spent: float | None = None) -> None:
+        """
+        Account for one privacy mechanism application.
+        """
+        self.privacy_spent += float(self.epsilon if epsilon_spent is None else epsilon_spent)
+        self.rounds_executed += 1
+
+    def add_noise(self, gradient: np.ndarray, *, account: bool = False) -> np.ndarray:
         """
         Add differential privacy noise to gradient.
         
@@ -88,13 +102,9 @@ def add_noise(self, gradient: np.ndarray) -> np.ndarray:
             # Gaussian distribution: N(0, scale^2)
             noise = np.random.normal(0, self.scale, size=gradient.shape)
 
-        # Add noise to gradient
         noisy_gradient = gradient + noise
-        
-        # Update privacy budget (add epsilon spent in this round)
-        self.privacy_spent += self.epsilon
-        self.rounds_executed += 1
-        
+        if account:
+            self.account_step()
         return noisy_gradient
 
     def add_noise_to_dict(self, gradient_dict: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
 
@@ -1,4 +1,3 @@
-import pickle
 import numpy as np
 from typing import Dict, Any, Union
 from pqc_auth import PQCAuthenticator
@@ -81,15 +80,26 @@ def apply_differential_privacy(self,
 
         # 3. Add Noise using the DP Engine
         # The engine handles the noise generation based on epsilon
-        noisy_delta = self.dp_engine.add_noise(delta)
+        # Tie DP sensitivity to clipping bound and avoid double-accounting:
+        # - We add noise to multiple tensors (W and b), but account once per "client update".
+        self.dp_engine.set_sensitivity(clipping_norm)
+        noisy_delta = self.dp_engine.add_noise(delta, account=False)
 
         # 4. Return differentially private weights
         return global_weights + noisy_delta
 
-    def secure_send_update(self, model_update: Dict[str, Any], 
-                          server_kyber_pk: str, 
+    def account_privacy_step(self) -> None:
+        """
+        Account for one DP mechanism application per client update.
+        """
+        if self.use_dp:
+            self.dp_engine.account_step()
+
+    def secure_send_update(self, model_update: Dict[str, Any],
+                          server_kyber_pk: str,
                           session_key: bytes = None,
-                          use_he: bool = True) -> Dict[str, Any]:
+                          use_he: bool = True,
+                          msg_counter: int = 0) -> Dict[str, Any]:
         """
         Encrypt and sign the model update.
         
@@ -116,8 +126,8 @@ def secure_send_update(self, model_update: Dict[str, Any],
             temp_he.public_key = self.he_public_key
 
             # Encrypt gradients with Paillier
-            encrypted_gradients = {}
-            for param_name, param_value in model_update['encrypted_gradients'].items():
+            encrypted_params = {}
+            for param_name, param_value in model_update['model_params'].items():
 
                 # --- FIX: Ensure input is always a NumPy array ---
                 if isinstance(param_value, np.ndarray):
@@ -127,13 +137,13 @@ def secure_send_update(self, model_update: Dict[str, Any],
                 # -----------------------------------------------
 
                 encrypted_list = temp_he.encrypt_vector(vec, self.he_public_key)
-                # Serialize the encrypted list for transmission
-                encrypted_gradients[param_name] = pickle.dumps(encrypted_list)
+                # Serialize encrypted vector to JSON-safe payload
+                encrypted_params[param_name] = temp_he.serialize_encrypted_vector(encrypted_list)
 
             # Replace plaintext gradients with encrypted ones
             model_update_to_send = {
                 "client_id": model_update["client_id"],
-                "encrypted_gradients": encrypted_gradients,
+                "model_params": encrypted_params,
                 "num_samples": model_update["num_samples"]
             }
         else:
@@ -143,11 +153,15 @@ def secure_send_update(self, model_update: Dict[str, Any],
         signed_package = self.authenticator.sign_update(model_update_to_send, self.private_key)
 
         # Step 3: Encrypt the Signed Package (Confidentiality)
-        data_bytes = pickle.dumps(signed_package)
-        encrypted_data = self.secure_channel.encrypt_message(data_bytes, current_session_key)
+        encrypted_data = self.secure_channel.encrypt_json(
+            signed_package,
+            session_key=current_session_key,
+            aad={"client_id": self.client_id, "counter": int(msg_counter), "type": "model_update"},
+        )
 
         # Final Payload
         payload_structure['client_id'] = self.client_id
+        payload_structure['counter'] = int(msg_counter)
         payload_structure['encrypted_payload'] = encrypted_data
         payload_structure['session_key'] = current_session_key
 
 
@@ -1,4 +1,3 @@
-import pickle
 from typing import Dict, Any
 import numpy as np
 
@@ -59,6 +58,7 @@ def register_client(self, client_id: str, client_public_key: str) -> Dict[str, A
         self.clients[client_id] = {
             "dilithium_pk": client_public_key,
             "session_key": None,
+            "last_counter": -1,
         }
 
         print(f"✓ Registered client '{client_id}'")
@@ -82,6 +82,9 @@ def receive_update(self, payload: Dict[str, Any]) -> Dict[str, Any]:
             return {"status": "error", "reason": "unregistered_client"}
 
         client_meta = self.clients[client_id]
+        msg_counter = int(payload.get("counter", -1))
+        if msg_counter <= client_meta.get("last_counter", -1):
+            return {"status": "error", "reason": "replay_detected"}
 
         # 1) Establish/retrieve session key
         kyber_ciphertext = payload.get("kyber_ciphertext")
@@ -99,14 +102,15 @@ def receive_update(self, payload: Dict[str, Any]) -> Dict[str, Any]:
         # 2) Decrypt the signed update with AES-GCM
         encrypted_payload = payload.get("encrypted_payload")
         try:
-            signed_bytes = self.secure_channel.decrypt_message(
-                encrypted_payload, session_key
+            signed_update = self.secure_channel.decrypt_json(
+                encrypted_payload,
+                session_key=session_key,
+                aad={"client_id": client_id, "counter": msg_counter, "type": "model_update"},
             )
         except Exception as e:
             print(f"✗ Decryption failed for client '{client_id}': {e}")
             return {"status": "error", "reason": "decryption_failed"}
-        
-        signed_update = pickle.loads(signed_bytes)
+        client_meta["last_counter"] = msg_counter
 
         # 3) Verify Dilithium signature
         public_key_hex = client_meta["dilithium_pk"]
@@ -118,17 +122,17 @@ def receive_update(self, payload: Dict[str, Any]) -> Dict[str, Any]:
 
         # 4) Extract model update
         model_update = signed_update["model_update"]
-        client_update = model_update.get("encrypted_gradients")
+        client_update = model_update.get("model_params")
         num_samples = model_update.get("num_samples", 0)
 
         # 5) Process based on HE mode
         if self.use_he:
-            # HE Mode: client_update contains encrypted values (pickled)
-            # Deserialize the encrypted values
+            # HE Mode: client_update contains JSON-safe encrypted vectors
             deserialized_update = {}
-            for param_name, encrypted_bytes in client_update.items():
-                deserialized_update[param_name] = pickle.loads(encrypted_bytes)
-            
+            for param_name, encrypted_payload_list in client_update.items():
+                deserialized_update[param_name] = self.he_manager.deserialize_encrypted_vector(
+                    self.he_manager.public_key, encrypted_payload_list
+                )
             self.he_aggregator.add_client_update(deserialized_update, num_samples)
         else:
             # Plaintext Mode: client_update contains raw arrays