-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy pathconfig.example.yaml
More file actions
30 lines (27 loc) · 1.11 KB
/
config.example.yaml
File metadata and controls
30 lines (27 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
model: "Qwen/Qwen3-0.6B"
output_dir: "/path/to/my/model"
inference:
device: "cuda"
batch_size: 8
max_lengh: 512
flash_attn: false
measurements:
# load_path: "./measurements.pt" # Optional, load measurements from a file, will skip refusal direction computing.
# save_path: "./measurements.pt" # Optional, save measurements to a file for later use.
harmful_prompts: "./data/harmful.parquet"
harmless_prompts: "./data/harmless.parquet"
clip: 1.0
ablation:
method: "full" # Choices: "simple", "biprojection", "norm-preserving", "full". See README.md for details.
sparsify_method: "percentile" # Choices: "percentile", "magnitude".
quantile: 0.995 # Only used when sparsify_method is "percentile".
magnitude_threshold: 0.05 # Only used when sparsify_method is "magnitude".
top_k: 3 # Top K refusal directions to use.
global_scale: 1.0 # Global scale factor for the ablation.
layer_overrides:
24: # Layer index
scale: 0.5 # Overrides global_scale.
source_layer: 23 # Use refusal direction calculated from this layer instead of global direction.
114:
scale: 191
source_layer: 514