LinguaLens/config_template.py at main · THU-KEG/LinguaLens · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""
Configuration template for LinguaLens.

Copy this file to config.py and update the paths according to your setup.
"""

# =============================================================================
# Model Paths Configuration
# =============================================================================

# Path to your LLaMA-3.1-8B model
MODEL_PATH = "/path/to/your/llama-3.1-8b-model"

# Template for SAE checkpoint paths (layer number will be formatted in)
SAE_PATH_TEMPLATE = "/path/to/your/sae/layer_{:02d}"

# Available SAE layers (update according to your checkpoints)
AVAILABLE_SAE_LAYERS = [0, 1, 8, 15, 24, 29, 30]

# =============================================================================
# Hardware Configuration
# =============================================================================

# Default device for computation
DEFAULT_DEVICE = "cuda:0"  # or "cpu" if no GPU available

# =============================================================================
# Analysis Configuration
# =============================================================================

# Default layers to analyze for cross-layer studies
DEFAULT_ANALYSIS_LAYERS = [0, 1, 8, 15, 24, 30]

# Default number of top features to extract
DEFAULT_TOP_K = 10

# =============================================================================
# Data Paths
# =============================================================================

# Directory containing linguistic feature data
FEATURE_DATA_DIR = "data/features"

# Directory containing vector data for bilingual analysis
VECTOR_DATA_DIR = "data/vectors"

# Output directory for analysis results
OUTPUT_DIR = "outputs"

# =============================================================================
# Usage Examples
# =============================================================================

if __name__ == "__main__":
    print("LinguaLens Configuration Template")
    print("=" * 40)
    print(f"Model Path: {MODEL_PATH}")
    print(f"SAE Template: {SAE_PATH_TEMPLATE}")
    print(f"Available Layers: {AVAILABLE_SAE_LAYERS}")
    print(f"Default Device: {DEFAULT_DEVICE}")
    print("\nPlease update these paths according to your setup!")