Skip to content

Commit 93cbdf8

Browse files
authored
Merge pull request #50 from brootware/dev
Dev
2 parents c0d47e8 + b7f374a commit 93cbdf8

File tree

16 files changed

+425
-483
lines changed

16 files changed

+425
-483
lines changed

README.md

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<br />
66
<i>CLI tool to redact and unredact sensitive information like ip addresses, emails and domains.</i>
77
<br/>
8-
<code>pip install --upgrade pyredactkit && pyredactor</code>
8+
<code>pip install --upgrade pyredactkit && prk</code>
99
</p>
1010

1111
<p align="center">
@@ -71,28 +71,6 @@ Redact using custom regex pattern
7171
pyredactkit -f file -c custom.json
7272
```
7373

74-
<!-- Install nltk data for redacting names
75-
76-
```bash
77-
python -c "import nltk
78-
import ssl
79-
80-
try:
81-
_create_unverified_https_context = ssl._create_unverified_context
82-
except AttributeError:
83-
pass
84-
else:
85-
ssl._create_default_https_context =_create_unverified_https_context
86-
87-
nltk.download('popular')"
88-
```
89-
90-
Redact names from a text file
91-
92-
```bash
93-
pyredactkit test.txt -t name
94-
``` -->
95-
9674
### Use from github source
9775

9876
Clone the repo

images/afterRefactor/classes.png

42.8 KB
Loading

images/afterRefactor/packages.png

26.6 KB
Loading

poetry.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "pyredactkit"
7-
version = "0.3.2"
7+
version = "0.3.3"
88
description = "Python cli tool to redact sensitive data"
99
authors = ["brootware <[email protected]>"]
1010
license = "GPL-3.0-or-later"
@@ -51,7 +51,7 @@ pytest-cov = "^3.0.0"
5151

5252
[tool.poetry.scripts]
5353
pyredactkit = "pyredactkit.pyredactkit:main"
54-
pyredactor = "pyredactkit.pyredactkit:main"
54+
prk = "pyredactkit.pyredactkit:main"
5555

5656
[tool.pytest.ini_options]
5757
minversion = "2.0"

pyredactkit/common_jobs.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
""" Common jobs class implementation """
2+
import os
3+
import sys
4+
import math
5+
import json
6+
7+
8+
from pyredactkit.identifiers import Identifier
9+
id_object = Identifier()
10+
11+
12+
class CommonJobs:
13+
"""Common Jobs class
14+
Class containing all methods to support redaction
15+
of sensitive data
16+
17+
Static variables:
18+
block (unicode string): To redact sensitive data
19+
"""
20+
dir_create = " directory does not exist, creating it."
21+
22+
def __init__(self) -> None:
23+
"""
24+
Class Initialization
25+
Args:
26+
None
27+
28+
Returns:
29+
None
30+
"""
31+
return None
32+
33+
def write_hashmap(self, hash_map: dict, filename: str, savedir="./") -> dict:
34+
"""Function that writes a .hashshadow_file.txt.json to os directory.
35+
Args:
36+
hash_map (dictionary): dictionary object to be written to file.
37+
filename (str): name of supplied file
38+
39+
Returns:
40+
Writes .hashshadow_file.txt.json to os directory
41+
"""
42+
with open(f"{savedir}.hashshadow_{os.path.basename(filename)}.json", "w", encoding="utf-8") as file:
43+
json.dump(hash_map, file)
44+
45+
def valid_options(self) -> tuple:
46+
"""Function to read in valid options from Identifier.regexes
47+
Args:
48+
None
49+
50+
Returns:
51+
option_tupe (tuple): redacted line
52+
"""
53+
option_tuple = ()
54+
for id in id_object.regexes:
55+
option_tuple += id['type']
56+
return option_tuple
57+
58+
def process_report(self, filename: str):
59+
"""Function to process calculate and generate report of man hour saved.
60+
Args:
61+
filename (str): File to count the words
62+
63+
Returns:
64+
Creates a report on estimated man hours/minutes saved.
65+
"""
66+
try:
67+
# Open a file read pointer as target_file
68+
with open(filename, encoding="utf-8") as target_file:
69+
text_chunk = target_file.read()
70+
71+
# Words per minute
72+
WPM = 75
73+
74+
word_length = 5
75+
total_words = 0
76+
for current_text in text_chunk:
77+
total_words += len(current_text)/word_length
78+
79+
total_words = math.ceil(total_words)
80+
81+
# Divide total words by words per minute read to get minutes and hour estimate.
82+
reading_minutes = math.ceil(total_words/WPM)
83+
reading_hours = math.floor(reading_minutes/60)
84+
85+
word_report = f"[+] Estimated total words : {total_words}"
86+
minutes_saved = f"[+] Estimated total minutes saved : {reading_minutes}"
87+
man_hours_saved = f"[+] Estimated total man hours saved : {reading_hours}"
88+
89+
print(word_report)
90+
print(minutes_saved)
91+
print(man_hours_saved)
92+
93+
except UnicodeDecodeError:
94+
os.remove(f"manhour_saved_report_{os.path.basename(filename)}")
95+
print("[-] Removed incomplete report")
96+
sys.exit("[-] Unable to read target file")

pyredactkit/core_redactor.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
""" Core redactor engine class implementation """
2+
from pyredactkit.common_jobs import CommonJobs
3+
from pyredactkit.identifiers import Identifier
4+
import os
5+
import sys
6+
import re
7+
import uuid
8+
9+
# Instantiate identifier and commonjobs objects
10+
id_object = Identifier()
11+
cj_object = CommonJobs()
12+
""" Coreredactor library """
13+
14+
15+
class CoreRedactorEngine:
16+
"""CoreRedactorEngine class
17+
Class containing all methods to support redaction
18+
of core sensitive data type
19+
20+
Static variables:
21+
block (unicode string): To redact sensitive data
22+
"""
23+
24+
block = "\u2588" * 15
25+
dir_create = " directory does not exist, creating it."
26+
27+
def __init__(self) -> None:
28+
"""
29+
Class Initialization
30+
Args:
31+
None
32+
33+
Returns:
34+
None
35+
"""
36+
return None
37+
38+
def redact_all(self, line: str) -> tuple:
39+
"""Function to redact specific option
40+
Args:
41+
line (str) : line to be supplied to redact
42+
43+
Returns:
44+
line (str): redacted line
45+
kv_pair (dict) : key value pair of uuid to sensitive data.
46+
"""
47+
hash_map = {}
48+
for id in id_object.regexes:
49+
redact_pattern = id['pattern']
50+
if re.search(redact_pattern, line):
51+
pattern_string = re.search(redact_pattern, line)
52+
pattern_string = pattern_string.group(0)
53+
masked_data = str(uuid.uuid4())
54+
hash_map.update({masked_data: pattern_string})
55+
line = re.sub(redact_pattern, masked_data, line)
56+
return line, hash_map
57+
58+
def process_text(self, text: str, savedir="./"):
59+
"""Function to process supplied text from cli.
60+
Args:
61+
text (str): string to redact
62+
savedir (str): [Optional] directory to place results
63+
64+
Returns:
65+
Creates redacted file.
66+
"""
67+
hash_map = {}
68+
generated_file = f"redacted_file_{str(uuid.uuid1())}.txt"
69+
with open(
70+
f"{generated_file}",
71+
"w",
72+
encoding="utf-8",
73+
) as result:
74+
for line in text:
75+
data = self.redact_all(line)
76+
redacted_line = data[0]
77+
kv_pairs = data[1]
78+
hash_map.update(kv_pairs)
79+
result.write(f"{redacted_line}\n")
80+
cj_object.write_hashmap(hash_map, generated_file, savedir)
81+
print(
82+
f"[+] .hashshadow_{os.path.basename(generated_file)}.json file generated. Keep this safe if you need to undo the redaction.")
83+
print(
84+
f"[+] Redacted and results saved to {os.path.basename(generated_file)}")
85+
86+
def process_core_file(self, filename: str, savedir="./"):
87+
"""Function to process supplied file from cli.
88+
Args:
89+
filename (str): File to redact
90+
savedir (str): [Optional] directory to place results
91+
92+
Returns:
93+
Creates redacted file.
94+
"""
95+
count = 0
96+
hash_map = {}
97+
try:
98+
# Open a file read pointer as target_file
99+
with open(filename, encoding="utf-8") as target_file:
100+
if savedir != "./" and savedir[-1] != "/":
101+
savedir = savedir + "/"
102+
103+
# created the directory if not present
104+
if not os.path.exists(os.path.dirname(savedir)):
105+
print(
106+
"[+] "
107+
+ os.path.dirname(savedir)
108+
+ f"{self.dir_create}"
109+
)
110+
os.makedirs(os.path.dirname(savedir))
111+
112+
print(
113+
"[+] Processing starts now. This may take some time "
114+
"depending on the file size. Monitor the redacted file "
115+
"size to monitor progress"
116+
)
117+
118+
# Open a file write pointer as result
119+
with open(
120+
f"{savedir}redacted_{os.path.basename(filename)}",
121+
"w",
122+
encoding="utf-8",
123+
) as result:
124+
# Check if any redaction type option is given in argument. If none, will redact all sensitive data.
125+
print("[+] No custom regex pattern supplied, will be redacting all the core sensitive data supported")
126+
hash_map = {}
127+
for line in target_file:
128+
# count elements to be redacted
129+
for id in id_object.regexes:
130+
if re.search(id['pattern'], line):
131+
count += 1
132+
# redact all and write hashshadow
133+
data = self.redact_all(line)
134+
redacted_line = data[0]
135+
kv_pairs = data[1]
136+
hash_map.update(kv_pairs)
137+
result.write(redacted_line)
138+
cj_object.write_hashmap(hash_map, filename, savedir)
139+
print(
140+
f"[+] .hashshadow_{os.path.basename(filename)}.json file generated. Keep this safe if you need to undo the redaction.")
141+
print(f"[+] Redacted {count} targets...")
142+
print(
143+
f"[+] Redacted results saved to {savedir}redacted_{os.path.basename(filename)}")
144+
cj_object.process_report(filename)
145+
146+
except UnicodeDecodeError:
147+
os.remove(f"{savedir}redacted_{os.path.basename(filename)}")
148+
print("[-] Removed incomplete redact file")
149+
sys.exit("[-] Unable to read file")

0 commit comments

Comments
 (0)