brootware
diff --git a/‎README.md‎
Lines changed: 1 addition & 23 deletions b/‎README.md‎
Lines changed: 1 addition & 23 deletions
diff --git a/‎images/afterRefactor/classes.png‎
42.8 KB b/‎images/afterRefactor/classes.png‎
42.8 KB
diff --git a/‎images/afterRefactor/packages.png‎
26.6 KB b/‎images/afterRefactor/packages.png‎
26.6 KB
diff --git a/‎images/classes.png‎ ‎images/beforeRefactor/classes.png‎images/classes.png renamed to images/beforeRefactor/classes.png b/‎images/classes.png‎ ‎images/beforeRefactor/classes.png‎images/classes.png renamed to images/beforeRefactor/classes.png
diff --git a/‎images/packages.png‎ ‎images/beforeRefactor/packages.png‎images/packages.png renamed to images/beforeRefactor/packages.png b/‎images/packages.png‎ ‎images/beforeRefactor/packages.png‎images/packages.png renamed to images/beforeRefactor/packages.png
diff --git a/‎poetry.lock‎
Lines changed: 6 additions & 6 deletions b/‎poetry.lock‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyredactkit/common_jobs.py‎
Lines changed: 96 additions & 0 deletions b/‎pyredactkit/common_jobs.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎pyredactkit/core_redactor.py‎
Lines changed: 149 additions & 0 deletions b/‎pyredactkit/core_redactor.py‎
Lines changed: 149 additions & 0 deletions
@@ -5,7 +5,7 @@
 <br />
 <i>CLI tool to redact and unredact sensitive information like ip addresses, emails and domains.</i>
 <br/>
-<code>pip install --upgrade pyredactkit && pyredactor</code>
+<code>pip install --upgrade pyredactkit && prk</code>
 </p>
 
 <p align="center">
@@ -71,28 +71,6 @@ Redact using custom regex pattern
 pyredactkit -f file -c custom.json
 ```
 
-<!-- Install nltk data for redacting names
-
-```bash
-python -c "import nltk
-import ssl
-
-try:
-    _create_unverified_https_context = ssl._create_unverified_context
-except AttributeError:
-    pass
-else:
-    ssl._create_default_https_context =_create_unverified_https_context
-
-nltk.download('popular')"
-```
-
-Redact names from a text file
-
-```bash
-pyredactkit test.txt -t name
-``` -->
-
 ### Use from github source
 
 Clone the repo
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "pyredactkit"
-version = "0.3.2"
+version = "0.3.3"
 description = "Python cli tool to redact sensitive data"
 authors = ["brootware <[email protected]>"]
 license = "GPL-3.0-or-later"
@@ -51,7 +51,7 @@ pytest-cov = "^3.0.0"
 
 [tool.poetry.scripts]
 pyredactkit = "pyredactkit.pyredactkit:main"
-pyredactor = "pyredactkit.pyredactkit:main"
+prk = "pyredactkit.pyredactkit:main"
 
 [tool.pytest.ini_options]
 minversion = "2.0"
 
@@ -0,0 +1,96 @@
+""" Common jobs class implementation """
+import os
+import sys
+import math
+import json
+
+
+from pyredactkit.identifiers import Identifier
+id_object = Identifier()
+
+
+class CommonJobs:
+    """Common Jobs class
+    Class containing all methods to support redaction
+    of sensitive data
+
+    Static variables:
+        block (unicode string): To redact sensitive data
+    """
+    dir_create = " directory does not exist, creating it."
+
+    def __init__(self) -> None:
+        """
+        Class Initialization
+        Args:
+            None
+
+        Returns:
+            None
+        """
+        return None
+
+    def write_hashmap(self, hash_map: dict, filename: str, savedir="./") -> dict:
+        """Function that writes a .hashshadow_file.txt.json to os directory.
+        Args:
+            hash_map (dictionary): dictionary object to be written to file.
+            filename (str): name of supplied file
+
+        Returns:
+            Writes .hashshadow_file.txt.json to os directory
+        """
+        with open(f"{savedir}.hashshadow_{os.path.basename(filename)}.json", "w", encoding="utf-8") as file:
+            json.dump(hash_map, file)
+
+    def valid_options(self) -> tuple:
+        """Function to read in valid options from Identifier.regexes
+        Args:
+            None
+
+        Returns:
+            option_tupe (tuple): redacted line
+        """
+        option_tuple = ()
+        for id in id_object.regexes:
+            option_tuple += id['type']
+        return option_tuple
+
+    def process_report(self, filename: str):
+        """Function to process calculate and generate report of man hour saved.
+        Args:
+            filename (str): File to count the words
+
+        Returns:
+            Creates a report on estimated man hours/minutes saved.
+        """
+        try:
+            # Open a file read pointer as target_file
+            with open(filename, encoding="utf-8") as target_file:
+                text_chunk = target_file.read()
+
+                # Words per minute
+                WPM = 75
+
+                word_length = 5
+                total_words = 0
+                for current_text in text_chunk:
+                    total_words += len(current_text)/word_length
+
+                total_words = math.ceil(total_words)
+
+                # Divide total words by words per minute read to get minutes and hour estimate.
+                reading_minutes = math.ceil(total_words/WPM)
+                reading_hours = math.floor(reading_minutes/60)
+
+                word_report = f"[+] Estimated total words : {total_words}"
+                minutes_saved = f"[+] Estimated total minutes saved : {reading_minutes}"
+                man_hours_saved = f"[+] Estimated total man hours saved : {reading_hours}"
+
+                print(word_report)
+                print(minutes_saved)
+                print(man_hours_saved)
+
+        except UnicodeDecodeError:
+            os.remove(f"manhour_saved_report_{os.path.basename(filename)}")
+            print("[-] Removed incomplete report")
+            sys.exit("[-] Unable to read target file")
@@ -0,0 +1,149 @@
+""" Core redactor engine class implementation """
+from pyredactkit.common_jobs import CommonJobs
+from pyredactkit.identifiers import Identifier
+import os
+import sys
+import re
+import uuid
+
+# Instantiate identifier and commonjobs objects
+id_object = Identifier()
+cj_object = CommonJobs()
+""" Coreredactor library """
+
+
+class CoreRedactorEngine:
+    """CoreRedactorEngine class
+    Class containing all methods to support redaction
+    of core sensitive data type
+
+    Static variables:
+        block (unicode string): To redact sensitive data
+    """
+
+    block = "\u2588" * 15
+    dir_create = " directory does not exist, creating it."
+
+    def __init__(self) -> None:
+        """
+        Class Initialization
+        Args:
+            None
+
+        Returns:
+            None
+        """
+        return None
+
+    def redact_all(self, line: str) -> tuple:
+        """Function to redact specific option
+        Args:
+            line (str) : line to be supplied to redact
+
+        Returns:
+            line (str): redacted line
+            kv_pair (dict) : key value pair of uuid to sensitive data.
+        """
+        hash_map = {}
+        for id in id_object.regexes:
+            redact_pattern = id['pattern']
+            if re.search(redact_pattern, line):
+                pattern_string = re.search(redact_pattern, line)
+                pattern_string = pattern_string.group(0)
+                masked_data = str(uuid.uuid4())
+                hash_map.update({masked_data: pattern_string})
+                line = re.sub(redact_pattern, masked_data, line)
+        return line, hash_map
+
+    def process_text(self, text: str, savedir="./"):
+        """Function to process supplied text from cli.
+        Args:
+            text (str): string to redact
+            savedir (str): [Optional] directory to place results
+
+        Returns:
+            Creates redacted file.
+        """
+        hash_map = {}
+        generated_file = f"redacted_file_{str(uuid.uuid1())}.txt"
+        with open(
+            f"{generated_file}",
+            "w",
+            encoding="utf-8",
+        ) as result:
+            for line in text:
+                data = self.redact_all(line)
+                redacted_line = data[0]
+                kv_pairs = data[1]
+                hash_map.update(kv_pairs)
+                result.write(f"{redacted_line}\n")
+            cj_object.write_hashmap(hash_map, generated_file, savedir)
+            print(
+                f"[+] .hashshadow_{os.path.basename(generated_file)}.json file generated. Keep this safe if you need to undo the redaction.")
+            print(
+                f"[+] Redacted and results saved to {os.path.basename(generated_file)}")
+
+    def process_core_file(self, filename: str, savedir="./"):
+        """Function to process supplied file from cli.
+        Args:
+            filename (str): File to redact
+            savedir (str): [Optional] directory to place results
+
+        Returns:
+            Creates redacted file.
+        """
+        count = 0
+        hash_map = {}
+        try:
+            # Open a file read pointer as target_file
+            with open(filename, encoding="utf-8") as target_file:
+                if savedir != "./" and savedir[-1] != "/":
+                    savedir = savedir + "/"
+
+                # created the directory if not present
+                if not os.path.exists(os.path.dirname(savedir)):
+                    print(
+                        "[+] "
+                        + os.path.dirname(savedir)
+                        + f"{self.dir_create}"
+                    )
+                    os.makedirs(os.path.dirname(savedir))
+
+                print(
+                    "[+] Processing starts now. This may take some time "
+                    "depending on the file size. Monitor the redacted file "
+                    "size to monitor progress"
+                )
+
+                # Open a file write pointer as result
+                with open(
+                    f"{savedir}redacted_{os.path.basename(filename)}",
+                    "w",
+                    encoding="utf-8",
+                ) as result:
+                    # Check if any redaction type option is given in argument. If none, will redact all sensitive data.
+                    print("[+] No custom regex pattern supplied, will be redacting all the core sensitive data supported")
+                    hash_map = {}
+                    for line in target_file:
+                        # count elements to be redacted
+                        for id in id_object.regexes:
+                            if re.search(id['pattern'], line):
+                                count += 1
+                        # redact all and write hashshadow
+                        data = self.redact_all(line)
+                        redacted_line = data[0]
+                        kv_pairs = data[1]
+                        hash_map.update(kv_pairs)
+                        result.write(redacted_line)
+                    cj_object.write_hashmap(hash_map, filename, savedir)
+                    print(
+                        f"[+] .hashshadow_{os.path.basename(filename)}.json file generated. Keep this safe if you need to undo the redaction.")
+                    print(f"[+] Redacted {count} targets...")
+                    print(
+                        f"[+] Redacted results saved to {savedir}redacted_{os.path.basename(filename)}")
+                    cj_object.process_report(filename)
+
+        except UnicodeDecodeError:
+            os.remove(f"{savedir}redacted_{os.path.basename(filename)}")
+            print("[-] Removed incomplete redact file")
+            sys.exit("[-] Unable to read file")