diff --git a/docs/merge_methods.md b/docs/merge_methods.md index 7ab132ad..31ebef37 100644 --- a/docs/merge_methods.md +++ b/docs/merge_methods.md @@ -347,6 +347,10 @@ Finally, the (variance-selected, calculated-weighted, and sign-agreed) task vect - `scale` (per-model, optional): A scalar to multiply the tensor by. Useful for scaling specific layers, e.g., `{"filter": "down_proj", "value": 0.5}` +- `noise_scale` (per-model, optional) +- `noise_seed` (per-model, optional) +- `noise_variance` (per-model, optional): Boolean toggle whether to scale the noise based on the tensor's standard deviation. + --- ## Summary diff --git a/mergekit/merge_methods/passthrough.py b/mergekit/merge_methods/passthrough.py index a2a0e45b..57bd8518 100644 --- a/mergekit/merge_methods/passthrough.py +++ b/mergekit/merge_methods/passthrough.py @@ -31,6 +31,27 @@ def execute(self, tensors: Dict[ModelReference, torch.Tensor]) -> torch.Tensor: if scale is not None: tensor = tensor * scale + noise_scale = self.tensor_parameters[model].data.get("noise_scale", None) + if noise_scale is not None and noise_scale != 0.0: + noise_seed = self.tensor_parameters[model].data.get("noise_seed", 42) + noise_generator = torch.Generator() + if noise_seed is not None: + noise_generator = noise_generator.manual_seed(int(noise_seed)) + print("applying noise_seed") + + print(f"Noise Generator Seed: {noise_generator.initial_seed()}") + random_tensor = torch.empty_like(tensor).normal_(generator=noise_generator) + noisy_tensor = random_tensor * noise_scale + + noise_variance = self.tensor_parameters[model].data.get("noise_variance", False) + if noise_variance is not None and noise_variance != 0.0: + noisy_tensor = noisy_tensor * (tensor.std() * noise_variance) + print("applying noise_variance") + + tensor = tensor + noisy_tensor + + print(f"noise_scale={noise_scale}, noise_seed={noise_seed}, noise_variance={noise_variance}") + return tensor def group_label(self) -> Optional[str]: @@ -46,7 +67,12 @@ def pretty_name(self) -> Optional[str]: return "Passthrough" def tensor_parameters(self) -> List[ConfigParameterDef]: - return [ConfigParameterDef(name="scale", required=False, default_value=None)] + return [ + ConfigParameterDef(name="scale", required=False, default_value=None), + ConfigParameterDef(name="noise_scale", required=False, default_value=None), + ConfigParameterDef(name="noise_variance", required=False, default_value=None), + ConfigParameterDef(name="noise_seed", required=False, default_value=None) + ] def make_task( self,