Skip to content

Commit e6670ae

Browse files
authored
Merge pull request #20 from krstopro/master
Fix bug when two nodes had the same std, extend BAHC to work with multiple children, add margin
2 parents 5c35825 + c052f1a commit e6670ae

File tree

9 files changed

+1063
-70
lines changed

9 files changed

+1063
-70
lines changed

HBAC_scan/helper_functions.py

Lines changed: 492 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import os
2+
import pandas as pd
3+
from aif360.datasets import StandardDataset
4+
5+
6+
default_mappings = {
7+
"label_maps": [{0: "Good Credit", 1: "Bad Credit"}],
8+
"protected_attribute_maps": [
9+
{1.0: "Male", 0.0: "Female"},
10+
{1.0: "Old", 0.0: "Young"},
11+
],
12+
}
13+
14+
15+
def default_preprocessing(df):
16+
"""Adds a derived sex attribute based on personal_status."""
17+
# TODO: ignores the value of privileged_classes for 'sex'
18+
status_map = {
19+
"A91": "male",
20+
"A93": "male",
21+
"A94": "male",
22+
"A92": "female",
23+
"A95": "female",
24+
}
25+
df["sex"] = df["personal_status"].replace(status_map)
26+
27+
return df
28+
29+
30+
class GermanDataset(StandardDataset):
31+
"""German credit Dataset.
32+
33+
See :file:`aif360/data/raw/german/README.md`.
34+
"""
35+
36+
def __init__(
37+
self,
38+
label_name="credit",
39+
favorable_classes=[0],
40+
protected_attribute_names=[],
41+
privileged_classes=[],
42+
instance_weights_name=None,
43+
categorical_features=[
44+
"status",
45+
"credit_history",
46+
"purpose",
47+
"savings",
48+
"employment",
49+
"other_debtors",
50+
"property",
51+
"installment_plans",
52+
"housing",
53+
"skill_level",
54+
"telephone",
55+
"foreign_worker",
56+
],
57+
features_to_keep=[],
58+
features_to_drop=["personal_status"],
59+
na_values=[],
60+
custom_preprocessing=default_preprocessing,
61+
metadata=default_mappings,
62+
):
63+
"""See :obj:`StandardDataset` for a description of the arguments.
64+
65+
By default, this code converts the 'age' attribute to a binary value
66+
where privileged is `age > 25` and unprivileged is `age <= 25` as
67+
proposed by Kamiran and Calders [1]_.
68+
69+
References:
70+
.. [1] F. Kamiran and T. Calders, "Classifying without
71+
discriminating," 2nd International Conference on Computer,
72+
Control and Communication, 2009.
73+
74+
Examples:
75+
In some cases, it may be useful to keep track of a mapping from
76+
`float -> str` for protected attributes and/or labels. If our use
77+
case differs from the default, we can modify the mapping stored in
78+
`metadata`:
79+
80+
>>> label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
81+
>>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
82+
>>> gd = GermanDataset(protected_attribute_names=['sex'],
83+
... privileged_classes=[['male']], metadata={'label_map': label_map,
84+
... 'protected_attribute_maps': protected_attribute_maps})
85+
86+
Now this information will stay attached to the dataset and can be
87+
used for more descriptive visualizations.
88+
"""
89+
90+
# change path
91+
filepath = "../../data/GermanCredit_dataset/german.data"
92+
93+
# as given by german.doc
94+
column_names = [
95+
"status",
96+
"month",
97+
"credit_history",
98+
"purpose",
99+
"credit_amount",
100+
"savings",
101+
"employment",
102+
"investment_as_income_percentage",
103+
"personal_status",
104+
"other_debtors",
105+
"residence_since",
106+
"property",
107+
"age",
108+
"installment_plans",
109+
"housing",
110+
"number_of_credits",
111+
"skill_level",
112+
"people_liable_for",
113+
"telephone",
114+
"foreign_worker",
115+
"credit",
116+
]
117+
try:
118+
df = pd.read_csv(
119+
filepath, sep=" ", header=None, names=column_names, na_values=na_values
120+
)
121+
except IOError as err:
122+
print("IOError: {}".format(err))
123+
print("To use this class, please download the following files:")
124+
print(
125+
"\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
126+
)
127+
print(
128+
"\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc"
129+
)
130+
print("\nand place them, as-is, in the folder:")
131+
print(
132+
"\n\t{}\n".format(
133+
os.path.abspath(
134+
os.path.join(
135+
os.path.abspath(__file__),
136+
"..",
137+
"..",
138+
"data",
139+
"raw",
140+
"german",
141+
)
142+
)
143+
)
144+
)
145+
import sys
146+
147+
sys.exit(1)
148+
149+
super(GermanDataset, self).__init__(
150+
df=df,
151+
label_name=label_name,
152+
favorable_classes=favorable_classes,
153+
protected_attribute_names=protected_attribute_names,
154+
privileged_classes=privileged_classes,
155+
instance_weights_name=instance_weights_name,
156+
categorical_features=categorical_features,
157+
features_to_keep=features_to_keep,
158+
features_to_drop=features_to_drop,
159+
na_values=na_values,
160+
custom_preprocessing=custom_preprocessing,
161+
metadata=metadata,
162+
)
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import os
2+
import pandas as pd
3+
from aif360.datasets import StandardDataset
4+
5+
6+
def default_preprocessing(df):
7+
# default: 1, no default: 0
8+
df["credit"] = df["credit"].replace({1.0: 0, 2.0: 1})
9+
10+
# sex
11+
# male: 0, female: 1
12+
status_map = {"A91": 0, "A93": 0, "A94": 0, "A92": 1, "A95": 1}
13+
df["sex"] = df["personal_status"].replace(status_map)
14+
15+
return df
16+
17+
18+
class GermanDataset(StandardDataset):
19+
"""German credit Dataset.
20+
See :file:`aif360/data/raw/german/README.md`.
21+
"""
22+
23+
def __init__(
24+
self,
25+
label_name="credit",
26+
favorable_classes=[1],
27+
protected_attribute_names=["sex", "age"],
28+
privileged_classes=[],
29+
instance_weights_name=None,
30+
categorical_features=[
31+
"status",
32+
"credit_history",
33+
"purpose",
34+
"savings",
35+
"employment",
36+
"other_debtors",
37+
"property",
38+
"installment_plans",
39+
"housing",
40+
"skill_level",
41+
"telephone",
42+
"foreign_worker",
43+
],
44+
features_to_keep=[],
45+
features_to_drop=["personal_status"],
46+
na_values=[],
47+
custom_preprocessing=default_preprocessing,
48+
metadata=None,
49+
):
50+
"""See :obj:`StandardDataset` for a description of the arguments.
51+
By default, this code converts the 'age' attribute to a binary value
52+
where privileged is `age > 25` and unprivileged is `age <= 25` as
53+
proposed by Kamiran and Calders [1]_.
54+
References:
55+
.. [1] F. Kamiran and T. Calders, "Classifying without
56+
discriminating," 2nd International Conference on Computer,
57+
Control and Communication, 2009.
58+
Examples:
59+
In some cases, it may be useful to keep track of a mapping from
60+
`float -> str` for protected attributes and/or labels. If our use
61+
case differs from the default, we can modify the mapping stored in
62+
`metadata`:
63+
>>> label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
64+
>>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
65+
>>> gd = GermanDataset(protected_attribute_names=['sex'],
66+
... privileged_classes=[['male']], metadata={'label_map': label_map,
67+
... 'protected_attribute_maps': protected_attribute_maps})
68+
Now this information will stay attached to the dataset and can be
69+
used for more descriptive visualizations.
70+
"""
71+
72+
filepath = os.path.join(
73+
os.path.dirname(os.path.abspath(__file__)),
74+
"..",
75+
"..",
76+
"data",
77+
"GermanCredit_dataset",
78+
"german.data",
79+
)
80+
# as given by german.doc
81+
column_names = [
82+
"status",
83+
"month",
84+
"credit_history",
85+
"purpose",
86+
"credit_amount",
87+
"savings",
88+
"employment",
89+
"investment_as_income_percentage",
90+
"personal_status",
91+
"other_debtors",
92+
"residence_since",
93+
"property",
94+
"age",
95+
"installment_plans",
96+
"housing",
97+
"number_of_credits",
98+
"skill_level",
99+
"people_liable_for",
100+
"telephone",
101+
"foreign_worker",
102+
"credit",
103+
]
104+
try:
105+
df = pd.read_csv(
106+
filepath, sep=" ", header=None, names=column_names, na_values=na_values
107+
)
108+
except IOError as err:
109+
print("IOError: {}".format(err))
110+
print("To use this class, please download the following files:")
111+
print(
112+
"\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
113+
)
114+
print(
115+
"\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc"
116+
)
117+
print("\nand place them, as-is, in the folder:")
118+
print(
119+
"\n\t{}\n".format(
120+
os.path.abspath(
121+
os.path.join(
122+
os.path.abspath(__file__),
123+
"..",
124+
"..",
125+
"data",
126+
"raw",
127+
"german",
128+
)
129+
)
130+
)
131+
)
132+
import sys
133+
134+
sys.exit(1)
135+
136+
super(GermanDataset, self).__init__(
137+
df=df,
138+
label_name=label_name,
139+
favorable_classes=favorable_classes,
140+
protected_attribute_names=protected_attribute_names,
141+
privileged_classes=privileged_classes,
142+
instance_weights_name=instance_weights_name,
143+
categorical_features=categorical_features,
144+
features_to_keep=features_to_keep,
145+
features_to_drop=features_to_drop,
146+
na_values=na_values,
147+
custom_preprocessing=custom_preprocessing,
148+
metadata=metadata,
149+
)

0 commit comments

Comments
 (0)