Skip to content

Commit 64a33f0

Browse files
Zoe Papakiposfacebook-github-bot
authored andcommitted
Add swap_gendered_words augmentation (#110)
Summary: Pull Request resolved: #110 Fairness augmentation which uses predefined aligned lists of male- and female-gendered words to swap the gender of words in the input text. Reviewed By: jbitton Differential Revision: D30341486 fbshipit-source-id: 9ba832d078a730728a2e6643b87e60cfa0f64389
1 parent bbc7233 commit 64a33f0

File tree

12 files changed

+496
-3
lines changed

12 files changed

+496
-3
lines changed

.github/workflows/lint_python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
- run: pip install bandit black codespell flake8 isort pyre-check pytest pyupgrade safety
1414
- run: bandit --recursive --skip B101,B301,B303,B311,B403 .
1515
- run: black --check . || true
16-
- run: codespell --ignore-words-list="tha" --skip="*/text_tests,*/misspelling.json"
16+
- run: codespell --ignore-words-list="tha" --skip="*/text_tests,*assets/text/*"
1717
- run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
1818
- run: flake8 . --count --exit-zero --max-complexity=15 --max-line-length=90 --show-source --statistics
1919
- run: isort --check-only --profile black . || true
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
{
2+
"abbot":"abbess",
3+
"abbess": "abbot",
4+
"abbots": "abesses",
5+
"abesses": "abbots",
6+
"uncle": "auntie",
7+
"aunt": "uncle",
8+
"uncles": "aunts",
9+
"aunts": "uncles",
10+
"bachelor": "spinster",
11+
"bachelorette": "bachelor",
12+
"bachelors": "spinsters",
13+
"bachelorettes": "bachelors",
14+
"baron": "baroness",
15+
"baroness": "baron",
16+
"barons": "baronesses",
17+
"baronesses": "barons",
18+
"groom": "bride",
19+
"bride": "groom",
20+
"grooms": "brides",
21+
"brides": "grooms",
22+
"chairman": "chairwoman",
23+
"chairwoman": "chairman",
24+
"chairmen": "chairwomen",
25+
"chairwomen": "chairmen",
26+
"congressman": "congresswoman",
27+
"congresswoman": "congressman",
28+
"congressmen": "congresswomen",
29+
"congresswomen": "congressmen",
30+
"monastary": "nunnery",
31+
"convent": "monastary",
32+
"monastaries": "nunneries",
33+
"convents": "monastaries",
34+
"cowboy": "cowgirl",
35+
"cowgirl": "cowboy",
36+
"cowboys": "cowgirls",
37+
"cowgirls": "cowboys",
38+
"geezer": "hag",
39+
"crone": "geezer",
40+
"geezers": "hags",
41+
"crones": "geezers",
42+
"czar": "czarina",
43+
"czarina": "czar",
44+
"sir": "mademoiselle",
45+
"dame": "sir",
46+
"son": "daughter",
47+
"daughter": "son",
48+
"sons": "daughters",
49+
"daughters": "sons",
50+
"duke": "dutchess",
51+
"dutchess": "duke",
52+
"dukes": "dutchesses",
53+
"dutchesses": "dukes",
54+
"emperor": "empress",
55+
"empress": "emperor",
56+
"emperors": "empresses",
57+
"empresses": "emperors",
58+
"enchantor": "enchantress",
59+
"enchantress": "enchantor",
60+
"enchantors": "enchantresses",
61+
"enchantresses": "enchantors",
62+
"male": "female",
63+
"female": "male",
64+
"male's": "female's",
65+
"female's": "male's",
66+
"males": "females",
67+
"females": "males",
68+
"masculine": "feminine",
69+
"feminine": "masculine",
70+
"masculinity": "femininity",
71+
"femininity": "masculinity",
72+
"bro": "sis",
73+
"gal": "guy",
74+
"bros": "gals",
75+
"gals": "guys",
76+
"boy": "girl",
77+
"girl": "boy",
78+
"boy's": "girl's",
79+
"girl's": "boy's",
80+
"boyfriend": "girlfriend",
81+
"girlfriend": "boyfriend",
82+
"boyfriends": "girlfriends",
83+
"girlfriends": "boyfriends",
84+
"boyhood": "girlhood",
85+
"girlhood": "boyhood",
86+
"boyish": "girly",
87+
"girlish": "boyish",
88+
"boys": "girls",
89+
"girls": "boys",
90+
"girly": "boyish",
91+
"godfather": "godmother",
92+
"godmother": "godfather",
93+
"godfathers": "godmothers",
94+
"godmothers": "godfathers",
95+
"gramps": "grams",
96+
"grams": "gramps",
97+
"grandpa": "grandma",
98+
"grandma": "grandpa",
99+
"grandpas": "grandmas",
100+
"grandmas": "grandpas",
101+
"grandfather": "grandmother",
102+
"grandmother": "grandfather",
103+
"grandfathers": "grandmothers",
104+
"grandmothers": "grandfathers",
105+
"grandnephew": "grandniece",
106+
"grandniece": "grandnephew",
107+
"grandnephews": "grandnieces",
108+
"grandnieces": "grandnephews",
109+
"hag": "geezer",
110+
"hags": "geezers",
111+
"his": "her",
112+
"her": "him",
113+
"himself": "herself",
114+
"herself": "himself",
115+
"househusband": "housewife",
116+
"housewife": "househusband",
117+
"househusbands": "housewives",
118+
"housewives": "househusbands",
119+
"gentlemen": "ladies",
120+
"ladies": "gentlemen",
121+
"gentleman": "lady",
122+
"lady": "gentleman",
123+
"gentleman's": "lady's",
124+
"lady's": "gentleman's",
125+
"landlords": "landladies",
126+
"landladies": "landlords",
127+
"landlord": "landlady",
128+
"landlady": "landlord",
129+
"lad": "lass",
130+
"lass": "lad",
131+
"laddie": "lassie",
132+
"lassie": "laddie",
133+
"pa": "ma",
134+
"ma": "pa",
135+
"ma'am": "sir",
136+
"maam": "sir",
137+
"madam": "sir",
138+
"maiden": "bachelor",
139+
"maidens": "bachelors",
140+
"manservant": "maidservant",
141+
"maidservant": "manservant",
142+
"manservants": "maidservants",
143+
"maidservants": "manservants",
144+
"paternal": "maternal",
145+
"maternal": "paternal",
146+
"paternity": "maternity",
147+
"maternity": "paternity",
148+
"patriarch": "matriach",
149+
"matriach": "patriarch",
150+
"patriarchs": "matriachs",
151+
"matriachs": "patriarchs",
152+
"mister": "missus",
153+
"missus": "mister",
154+
"dad": "mom",
155+
"mom": "dad",
156+
"daddies": "mommies",
157+
"mommies": "daddies",
158+
"daddy": "mommy",
159+
"mommy": "daddy",
160+
"dads": "moms",
161+
"moms": "dads",
162+
"father": "mother",
163+
"mother": "father",
164+
"fathered": "mothered",
165+
"mothered": "fathered",
166+
"fatherhood": "motherhood",
167+
"motherhood": "fatherhood",
168+
"fatherly": "motherly",
169+
"motherly": "fatherly",
170+
"fathers": "mothers",
171+
"mothers": "fathers",
172+
"mr": "ms",
173+
"mrs": "mr",
174+
"ms": "mr",
175+
"nephew": "niece",
176+
"niece": "nephew",
177+
"nephews": "nieces",
178+
"nieces": "nephews",
179+
"monk": "nun",
180+
"nun": "friar",
181+
"friar": "nun",
182+
"nunneries": "monastaries",
183+
"nunnery": "monastary",
184+
"monks": "nuns",
185+
"nuns": "friars",
186+
"friars": "nuns",
187+
"priest": "priestess",
188+
"priestess": "priest",
189+
"priests": "priestesses",
190+
"priestesses": "priests",
191+
"prince": "princess",
192+
"princess": "prince",
193+
"princes": "princesses",
194+
"princesses": "princes",
195+
"king": "queen",
196+
"queen": "king",
197+
"kings": "queens",
198+
"queens": "kings",
199+
"schoolboy": "schoolgirl",
200+
"schoolgirl": "schoolboy",
201+
"schoolboys": "schoolgirls",
202+
"schoolgirls": "schoolboys",
203+
"he": "she",
204+
"she": "he",
205+
"he'd": "she'd",
206+
"she'd": "he'd",
207+
"he'll": "she'll",
208+
"she'll": "he'll",
209+
"he's": "she's",
210+
"she's": "he's",
211+
"hes": "shes",
212+
"shes": "hes",
213+
"sis": "bro",
214+
"brother": "sister",
215+
"sister": "brother",
216+
"brotherhood": "sisterhood",
217+
"sisterhood": "brotherhood",
218+
"fraternal": "sisterly",
219+
"sisterly": "brotherly",
220+
"brotherly": "sisterly",
221+
"brothers": "sisters",
222+
"sisters": "brothers",
223+
"sorceror": "sorceress",
224+
"sorceress": "sorceror",
225+
"sorcerors": "sorceresses",
226+
"sorceresses": "sorcerors",
227+
"fraternity": "sorority",
228+
"sorority": "fraternity",
229+
"spinster": "bachelor",
230+
"spinsters": "bachelors",
231+
"stepson": "stepdaughter",
232+
"stepdaughter": "stepson",
233+
"stepsons": "stepdaughters",
234+
"stepdaughters": "stepsons",
235+
"stepfather": "stepmother",
236+
"stepmother": "stepfather",
237+
"stepfathers": "stepmothers",
238+
"stepmothers": "stepfathers",
239+
"widower": "widow",
240+
"widow": "widower",
241+
"widowers": "widows",
242+
"widows": "widowers",
243+
"husband": "wife",
244+
"wife": "husband",
245+
"wizard": "witch",
246+
"witch": "warlock",
247+
"warlock": "witch",
248+
"warlocks": "witches",
249+
"witches": "wizards",
250+
"wizards": "witches",
251+
"husbands": "wives",
252+
"wives": "husbands",
253+
"man": "woman",
254+
"woman": "man",
255+
"man's": "woman's",
256+
"woman's": "man's",
257+
"manhood": "womanhood",
258+
"womanhood": "manhood",
259+
"manly": "womanly",
260+
"womanly": "manly",
261+
"men": "women",
262+
"women": "men",
263+
"mens": "womens",
264+
"womens": "mens",
265+
"him": "her",
266+
"auntie": "uncle",
267+
"mademoiselle": "sir",
268+
"macho": "femme",
269+
"femme": "macho",
270+
"guy": "gal",
271+
"guys": "gals"
272+
}

augly/tests/text_tests/functional_unit_test.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@ def test_import(self) -> None:
1919
def setUp(self):
2020
random.seed(123)
2121
self.texts = [
22-
"The quick brown 'fox' couldn't jump over the green, grassy hill."
22+
"The quick brown 'fox' couldn't jump over the green, grassy hill.",
2323
]
2424
self.priority_words = ["green", "grassy", "hill"]
2525

26+
self.fairness_texts = [
27+
"The king and queen have a son named Raj and a daughter named Amanda.",
28+
]
29+
2630
def test_apply_lambda(self) -> None:
2731
augmented_apply_lambda = txtaugs.apply_lambda(self.texts)
2832
self.assertTrue(augmented_apply_lambda[0] == self.texts[0])
@@ -407,6 +411,15 @@ def test_split_words(self) -> None:
407411
== "The quick brown 'fox' couldn't jump ov er the g reen, g rassy hi ll."
408412
)
409413

414+
def test_swap_gendered_words(self) -> None:
415+
augmented_gender_swap_words = txtaugs.swap_gendered_words(
416+
self.fairness_texts[0], aug_word_p=0.3
417+
)
418+
self.assertTrue(
419+
augmented_gender_swap_words
420+
== "The queen and king have a daughter named Raj and a son named Amanda.",
421+
)
422+
410423

411424
if __name__ == "__main__":
412425
unittest.main()

augly/tests/text_tests/transforms_unit_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ def setUpClass(cls):
6666
]
6767
cls.priority_words = ["green", "grassy", "hill"]
6868

69+
cls.fairness_texts = [
70+
"The king and queen have a son named Raj and a daughter named Amanda.",
71+
]
72+
6973
def test_ApplyLambda(self) -> None:
7074
augmented_apply_lambda = txtaugs.ApplyLambda()(
7175
self.texts, metadata=self.metadata
@@ -286,6 +290,19 @@ def test_SplitWords(self) -> None:
286290
are_equal_metadata(self.metadata, self.expected_metadata["split_words"]),
287291
)
288292

293+
def test_SwapGenderedWords(self) -> None:
294+
augmented_words = txtaugs.SwapGenderedWords()(
295+
self.fairness_texts, metadata=self.metadata
296+
)
297+
298+
self.assertTrue(
299+
augmented_words[0]
300+
== "The queen and king have a daughter named Raj and a son named Amanda.",
301+
)
302+
self.assertTrue(
303+
are_equal_metadata(self.metadata, self.expected_metadata["swap_gendered_words"]),
304+
)
305+
289306

290307
if __name__ == "__main__":
291308
unittest.main()

augly/text/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
replace_words,
1818
simulate_typos,
1919
split_words,
20+
swap_gendered_words,
2021
)
2122

2223
from augly.text.intensity import (
@@ -33,6 +34,7 @@
3334
replace_words_intensity,
3435
simulate_typos_intensity,
3536
split_words_intensity,
37+
swap_gendered_words_intensity,
3638
)
3739

3840
from augly.text.transforms import (
@@ -49,6 +51,7 @@
4951
ReplaceWords,
5052
SimulateTypos,
5153
SplitWords,
54+
SwapGenderedWords,
5255
)
5356

5457
__all__ = [
@@ -67,6 +70,7 @@
6770
"ReplaceWords",
6871
"SimulateTypos",
6972
"SplitWords",
73+
"SwapGenderedWords",
7074
"apply_lambda",
7175
"get_baseline",
7276
"insert_punctuation_chars",
@@ -80,6 +84,7 @@
8084
"replace_words",
8185
"simulate_typos",
8286
"split_words",
87+
"swap_gendered_words",
8388
"apply_lambda_intensity",
8489
"get_baseline_intensity",
8590
"insert_punctuation_chars_intensity",
@@ -93,4 +98,5 @@
9398
"replace_words_intensity",
9499
"simulate_typos_intensity",
95100
"split_words_intensity",
101+
"swap_gendered_words_intensity",
96102
]

0 commit comments

Comments
 (0)