lm-intervention/utils_num_agreement.py at master · mattf1n/lm-intervention · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
from copy import deepcopy


def batch(iterable, bsize=1):
    total_len = len(iterable)
    for ndx in range(0, total_len, bsize):
        yield list(iterable[ndx:min(ndx + bsize, total_len)])


def convert_results_to_pd(interventions, intervention_results, layer_fixed=None, neuron_fixed=None):
    """
    Convert intervention results to data frame

    Args:
        interventions: dictionary from word (e.g., profession) to intervention
        intervention_results: dictionary from word to intervention results
    """

    results = []
    for word in intervention_results:
        intervention = interventions[word]
        candidate1_base_prob, candidate2_base_prob,\
            candidate1_alt1_prob, candidate2_alt1_prob,\
            candidate1_probs, candidate2_probs = intervention_results[word]
        # we have results for all layers and all neurons
        results_base = {# strings
                      'word': word,
                      'base_string': intervention.base_strings[0],
                      'alt_string1': intervention.base_strings[1],
                      'candidate1': intervention.candidates[0],
                      'candidate2': intervention.candidates[1],

                      # base probs
                      'candidate1_base_prob': float(candidate1_base_prob),
                      'candidate2_base_prob': float(candidate2_base_prob),
                      'candidate1_alt1_prob': float(candidate1_alt1_prob),
                      'candidate2_alt1_prob': float(candidate2_alt1_prob)}
        if layer_fixed is None:
          for layer in range(candidate1_probs.size(0)):
              for neuron in range(candidate1_probs.size(1)):
                  c1_prob, c2_prob = candidate1_probs[layer][neuron], candidate2_probs[layer][neuron]
                  results_single = deepcopy(results_base)
                  results_single.update({# strings
                      # intervention probs
                      'candidate1_prob': float(c1_prob),
                      'candidate2_prob': float(c2_prob),
                      'layer': layer,
                      'neuron': neuron})
                  results.append(results_single)
        # we have results for all neurons in one layer
        elif neuron_fixed is None:
            for neuron in range(candidate1_probs.size(1)):
              c1_prob, c2_prob = candidate1_probs[0][neuron], candidate2_probs[0][neuron]
              results_single = deepcopy(results_base)
              results_single.update({# strings
                  # intervention probs
                  'candidate1_prob': float(c1_prob),
                  'candidate2_prob': float(c2_prob),
                  'layer': layer_fixed,
                  'neuron': neuron})
              results.append(results_single)
        # we have result for a specific neuron and layer
        else:
          c1_prob, c2_prob = candidate1_probs, candidate2_probs
          results_single = deepcopy(results_base)
          results_single.update({# strings
              # intervention probs
              'candidate1_prob': float(c1_prob),
              'candidate2_prob': float(c2_prob),
              'layer': layer_fixed,
              'neuron': neuron_fixed})
          results.append(results_single)
    return pd.DataFrame(results)