Skip to content

Commit be9c7dd

Browse files
committed
feat: add fuction for applying a given transformation
1 parent 312abed commit be9c7dd

File tree

11 files changed

+184
-7
lines changed

11 files changed

+184
-7
lines changed

.idea/.gitignore

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/anjana.iml

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

anjana/anonymity/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
apply_hierarchy,
2121
check_gen_level,
2222
get_transformation,
23+
apply_transformation,
2324
generate_intervals,
2425
)
2526

@@ -28,5 +29,6 @@
2829
"apply_hierarchy",
2930
"check_gen_level",
3031
"get_transformation",
32+
"apply_transformation",
3133
"generate_intervals",
3234
]

anjana/anonymity/utils/utils.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,45 @@ def apply_hierarchy(
7979
return data_anon
8080

8181

82+
@beartype()
83+
def apply_hierarchy_current(
84+
data: typing.Union[typing.List, np.ndarray],
85+
hierarchies: dict,
86+
level: int,
87+
actual: int,
88+
) -> typing.Union[typing.List, np.ndarray]:
89+
"""Apply certain level of a hierarchy for a quasi-identifier given the current level.
90+
91+
:param data: data under study.
92+
:type data: list, numpy array
93+
94+
:param hierarchies: hierarchies for generalizing a given QI.
95+
:type hierarchies: dictionary with the hierarchies and the levels
96+
97+
:param level: level of the hierarchy to be applied.
98+
:type level: int
99+
100+
:param actual: current level of the hierarchy applied.
101+
:type actual: int
102+
103+
:return: column with the given level of hierarchy applied.
104+
:rtype: numpy array
105+
"""
106+
num_level = len(hierarchies.keys()) - 1
107+
if level > num_level:
108+
raise ValueError("Error, invalid hierarchy level")
109+
if not isinstance(hierarchies[level], pd.Series):
110+
hierarchies[level] = pd.Series(hierarchies[level])
111+
if not isinstance(hierarchies[actual], pd.Series):
112+
hierarchies[actual] = pd.Series(hierarchies[actual])
113+
114+
pos = []
115+
for elem in data:
116+
pos.append(np.where(hierarchies[actual].values == elem)[0][0])
117+
data_anon = hierarchies[level].values[pos]
118+
return data_anon
119+
120+
82121
@beartype()
83122
def check_gen_level(
84123
data: pd.DataFrame,
@@ -120,7 +159,7 @@ def get_transformation(
120159
) -> list:
121160
"""Get the transformation applied for anonymizing the data.
122161
123-
Example: a transformation (0,1,2,0) means:
162+
Example: a transformation [0,1,2,0] means:
124163
- Level 0 of generalization for th 1st QI
125164
- Level 1 of generalization for th 2nd QI
126165
- Level 2 of generalization for th 3rd QI
@@ -151,6 +190,48 @@ def get_transformation(
151190
return transformation
152191

153192

193+
@beartype()
194+
def apply_transformation(
195+
data_anon: pd.DataFrame,
196+
quasi_ident: typing.Union[typing.List, np.ndarray],
197+
hierarchies: dict,
198+
transformation: list,
199+
) -> pd.DataFrame:
200+
"""Apply a given transformation to the data.
201+
202+
:param data_anon: data under study.
203+
:type data_anon: pandas dataframe
204+
205+
:param quasi_ident: list with the name of the columns of the dataframe
206+
that are quasi-identifiers.
207+
:type quasi_ident: list of strings
208+
209+
:param hierarchies: hierarchies for generalizing the QI.
210+
:type hierarchies: dictionary containing one dictionary for QI
211+
with the hierarchies and the levels
212+
213+
:param transformation: transformation to be applied
214+
:type transformation: list
215+
216+
:return: dataset generalized with the transformation given
217+
:rtype: pandas dataframe
218+
"""
219+
actual_transform = check_gen_level(data_anon, quasi_ident, hierarchies)
220+
for i, qi in enumerate(quasi_ident):
221+
hierarchy_qi = hierarchies[qi]
222+
level = transformation[i]
223+
if level > max(hierarchies[qi].keys()):
224+
raise ValueError("Error, invalid hierarchy level")
225+
actual = actual_transform[qi]
226+
if level != actual:
227+
column = apply_hierarchy_current(
228+
data_anon[qi].values, hierarchy_qi, level, actual
229+
)
230+
data_anon[qi] = column
231+
232+
return data_anon
233+
234+
154235
@beartype()
155236
def generate_intervals(
156237
quasi_ident: typing.Union[typing.List, np.ndarray],

examples/hospital_get_transformation.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,18 @@
7575
print(transformation_raw) # [0, 0, 0]
7676
transformation_anon = utils.get_transformation(data_anon, quasi_ident, hierarchies)
7777
print(transformation_anon) # [2, 0, 1]
78+
79+
# Testing the function apply_transformation
80+
data_transform1 = utils.apply_transformation(data, quasi_ident, hierarchies, [1, 1, 1])
81+
print(data_transform1)
82+
print(utils.get_transformation(data_transform1, quasi_ident, hierarchies)) # [1, 1, 1]
83+
84+
data_transform2 = utils.apply_transformation(data, quasi_ident, hierarchies, [5, 1, 1])
85+
print(data_transform2)
86+
print(utils.get_transformation(data_transform2, quasi_ident, hierarchies)) # [5, 1, 1]
87+
88+
data_transform3 = utils.apply_transformation(
89+
data_anon, quasi_ident, hierarchies, [5, 1, 1]
90+
)
91+
print(data_transform3)
92+
print(utils.get_transformation(data_transform3, quasi_ident, hierarchies)) # [5, 1, 1]

0 commit comments

Comments
 (0)