Skip to content

Commit 5968524

Browse files
authored
Merge pull request #94 from IFCA-Advanced-Computing/develop
feat: add new fuction for applying a given transformation
2 parents 08d50e5 + 65a3906 commit 5968524

File tree

13 files changed

+223
-9
lines changed

13 files changed

+223
-9
lines changed

.idea/.gitignore

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/anjana.iml

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

anjana/anonymity/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
apply_hierarchy,
2121
check_gen_level,
2222
get_transformation,
23+
apply_transformation,
2324
generate_intervals,
2425
)
2526

@@ -28,5 +29,6 @@
2829
"apply_hierarchy",
2930
"check_gen_level",
3031
"get_transformation",
32+
"apply_transformation",
3133
"generate_intervals",
3234
]

anjana/anonymity/utils/utils.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import pandas as pd
2121
from beartype import beartype
2222
from beartype import typing
23+
from copy import copy
2324

2425

2526
@beartype()
@@ -79,6 +80,45 @@ def apply_hierarchy(
7980
return data_anon
8081

8182

83+
@beartype()
84+
def apply_hierarchy_current(
85+
data: typing.Union[typing.List, np.ndarray],
86+
hierarchies: dict,
87+
level: int,
88+
actual: int,
89+
) -> typing.Union[typing.List, np.ndarray]:
90+
"""Apply certain level of a hierarchy for a quasi-identifier given the current one.
91+
92+
:param data: data under study.
93+
:type data: list, numpy array
94+
95+
:param hierarchies: hierarchies for generalizing a given QI.
96+
:type hierarchies: dictionary with the hierarchies and the levels
97+
98+
:param level: level of the hierarchy to be applied.
99+
:type level: int
100+
101+
:param actual: current level of the hierarchy applied.
102+
:type actual: int
103+
104+
:return: column with the given level of hierarchy applied.
105+
:rtype: numpy array
106+
"""
107+
num_level = len(hierarchies.keys()) - 1
108+
if level > num_level:
109+
raise ValueError("Error, invalid hierarchy level")
110+
if not isinstance(hierarchies[level], pd.Series):
111+
hierarchies[level] = pd.Series(hierarchies[level])
112+
if not isinstance(hierarchies[actual], pd.Series):
113+
hierarchies[actual] = pd.Series(hierarchies[actual])
114+
115+
pos = []
116+
for elem in data:
117+
pos.append(np.where(hierarchies[actual].values == elem)[0][0])
118+
data_anon = hierarchies[level].values[pos]
119+
return data_anon
120+
121+
82122
@beartype()
83123
def check_gen_level(
84124
data: pd.DataFrame,
@@ -120,7 +160,7 @@ def get_transformation(
120160
) -> list:
121161
"""Get the transformation applied for anonymizing the data.
122162
123-
Example: a transformation (0,1,2,0) means:
163+
Example: a transformation [0,1,2,0] means:
124164
- Level 0 of generalization for th 1st QI
125165
- Level 1 of generalization for th 2nd QI
126166
- Level 2 of generalization for th 3rd QI
@@ -151,6 +191,51 @@ def get_transformation(
151191
return transformation
152192

153193

194+
@beartype()
195+
def apply_transformation(
196+
data: pd.DataFrame,
197+
quasi_ident: typing.Union[typing.List, np.ndarray],
198+
hierarchies: dict,
199+
transformation: list,
200+
) -> pd.DataFrame:
201+
"""Apply a given transformation to the data.
202+
203+
:param data: data under study.
204+
:type data: pandas dataframe
205+
206+
:param quasi_ident: list with the name of the columns of the dataframe
207+
that are quasi-identifiers.
208+
:type quasi_ident: list of strings
209+
210+
:param hierarchies: hierarchies for generalizing the QI.
211+
:type hierarchies: dictionary containing one dictionary for QI
212+
with the hierarchies and the levels
213+
214+
:param transformation: transformation to be applied
215+
:type transformation: list
216+
217+
:return: dataset generalized with the transformation given
218+
:rtype: pandas dataframe
219+
"""
220+
data_anon = copy(data)
221+
actual_transform = check_gen_level(data_anon, quasi_ident, hierarchies)
222+
for i, qi in enumerate(quasi_ident):
223+
hierarchy_qi = hierarchies[qi]
224+
level = transformation[i]
225+
if level < 0:
226+
raise ValueError("Error, invalid hierarchy level")
227+
if level > max(hierarchies[qi].keys()):
228+
raise ValueError("Error, invalid hierarchy level")
229+
actual = actual_transform[qi]
230+
if level != actual:
231+
column = apply_hierarchy_current(
232+
data_anon[qi].values, hierarchy_qi, level, actual
233+
)
234+
data_anon[qi] = column
235+
236+
return data_anon
237+
238+
154239
@beartype()
155240
def generate_intervals(
156241
quasi_ident: typing.Union[typing.List, np.ndarray],

examples/hospital_get_transformation.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,18 @@
7575
print(transformation_raw) # [0, 0, 0]
7676
transformation_anon = utils.get_transformation(data_anon, quasi_ident, hierarchies)
7777
print(transformation_anon) # [2, 0, 1]
78+
79+
# Testing the function apply_transformation
80+
data_transform1 = utils.apply_transformation(data, quasi_ident, hierarchies, [1, 1, 1])
81+
print(data_transform1)
82+
print(utils.get_transformation(data_transform1, quasi_ident, hierarchies)) # [1, 1, 1]
83+
84+
data_transform2 = utils.apply_transformation(data, quasi_ident, hierarchies, [5, 1, 1])
85+
print(data_transform2)
86+
print(utils.get_transformation(data_transform2, quasi_ident, hierarchies)) # [5, 1, 1]
87+
88+
data_transform3 = utils.apply_transformation(
89+
data_anon, quasi_ident, hierarchies, [5, 1, 1]
90+
)
91+
print(data_transform3)
92+
print(utils.get_transformation(data_transform3, quasi_ident, hierarchies)) # [5, 1, 1]

0 commit comments

Comments
 (0)