Add scrubber POC (#3)

xinhaoz · web-flow · commit c705f9f79a11 · 2021-03-25T21:18:49.000-04:00
* Remove redundant prettier write

* Create scrubber tool

* Add tag opening and closing braces and allow scrub on directories

* Add test dir

* Add another test file

* Fix carriage return line ending issue and restore prod script

* Require tags in comments
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -14,5 +14,7 @@ module.exports = {
   ],
   rules: {
     "no-console": "off",
+    "no-plusplus": "off",
+    "no-continue": "off",
   },
 };
diff --git a/index.ts b/index.ts
@@ -1,3 +1,19 @@
 import cli from "./cli";
 
+import Scrubber from "./scrubber/scrubber";
+import { ScrubberAction } from "./scrubber/scrubberTypes";
+
+async function scrub() {
+  try {
+    const actions: ScrubberAction[] = [{ type: "remove", tags: ["@remove"] }];
+    const scrubber = new Scrubber();
+
+    await scrubber.parseConfig("scrubber/scrubberConfig.json");
+    await scrubber.start(actions);
+  } catch (err) {
+    console.log(err);
+  }
+}
+
 cli(process.argv);
+scrub();
diff --git a/package.json b/package.json
@@ -9,7 +9,7 @@
   "scripts": {
     "dev": "ts-node index.ts",
     "lint": "eslint . --ext .ts,.js",
-    "lint-fix": "eslint . --ext .ts,.js --fix && prettier --write **/*.ts **/*.js",
+    "lint-fix": "eslint . --ext .ts,.js --fix",
     "prod": "tsc -p . && node bin/index.js"
   },
   "devDependencies": {
diff --git a/scrubber/scrubber.ts b/scrubber/scrubber.ts
@@ -0,0 +1,179 @@
+import fs from "fs";
+import path from "path";
+import {
+  ScrubberAction,
+  TagNameToAction,
+  ScrubberConfig,
+} from "./scrubberTypes";
+
+const TAG_START_CHAR = "{";
+const TAG_END_CHAR = "}";
+
+const FILE_TYPE_COMMENT: { [key: string]: string } = {
+  js: "//",
+  json: "//",
+  ts: "//",
+  py: "#",
+};
+
+function scrubberActionsToDict(actions: ScrubberAction[]): TagNameToAction {
+  const dict: TagNameToAction = {};
+  actions.forEach((action) => {
+    action.tags.forEach((tag: string) => {
+      dict[tag] = action.type;
+    });
+  });
+  return dict;
+}
+
+async function getConfigFile(filename: string): Promise<ScrubberConfig> {
+  try {
+    const configString = await fs.readFileSync(filename, "utf8");
+    return JSON.parse(configString);
+  } catch (err) {
+    console.error("Failed to read file ", filename);
+    throw err;
+  }
+}
+
+async function scrubFile(
+  filePath: string,
+  tags: TagNameToAction,
+  isDryRun: boolean,
+): Promise<void> {
+  return new Promise((resolve, reject) => {
+    fs.readFile(filePath, { encoding: "utf8" }, async (err, text) => {
+      if (err) {
+        reject(err);
+      }
+
+      const ext = filePath.split(".").pop();
+      const commentType = ext && FILE_TYPE_COMMENT[ext];
+      const scrubbedLines: string[] = [];
+      let skip = false;
+
+      const lines: string[] = text.split("\n");
+
+      for (let i = 0; i < lines.length; ++i) {
+        const line = lines[i];
+        let tryProcessTag = true;
+
+        if (line.length === 0) {
+          scrubbedLines.push(line);
+          continue;
+        }
+
+        // Split on whitespace
+        const tokens = line.trim().split(/[ ]+/);
+
+        if (commentType) {
+          if (tokens[0] !== commentType) {
+            tryProcessTag = false;
+          }
+          tokens.shift();
+        }
+
+        if (tryProcessTag) {
+          if (tokens[0] in tags && tokens.length !== 2) {
+            console.warn(
+              `WARNING line ${
+                i + 1
+              }: possible malformed tag; tags must be on their own line preceded by '}' or followed by '{'`,
+            );
+            scrubbedLines.push(line);
+            continue;
+          }
+
+          if (tokens[0] in tags || tokens[1] in tags) {
+            const tag = tokens[0] in tags ? tokens[0] : tokens[1];
+            const brace = tag === tokens[0] ? tokens[1] : tokens[0];
+
+            if (brace === tokens[1] && brace !== TAG_START_CHAR) {
+              throw new Error(
+                `Malformed tag ${filePath}:line ${
+                  i + 1
+                }: expected '{' after tag name`,
+              );
+            }
+
+            if (brace === tokens[0] && brace !== TAG_END_CHAR) {
+              throw new Error(
+                `Malformed tag ${filePath}:line ${
+                  i + 1
+                }: expected '}' before tag name`,
+              );
+            }
+
+            // NOTE: nested tagging is not currently expected and will lead to unexpected behaviour.
+
+            if (tags[tag] === "remove") {
+              skip = brace === TAG_START_CHAR;
+            }
+
+            // We always scrub tags from the final file.
+            continue;
+          }
+        }
+
+        if (skip) {
+          if (isDryRun) {
+            console.log(`Skipping line ${i + 1}`);
+          }
+          continue;
+        }
+
+        scrubbedLines.push(line);
+      }
+
+      if (isDryRun) return;
+
+      fs.writeFileSync(filePath, scrubbedLines.join("\n"));
+
+      resolve();
+    });
+  });
+}
+
+async function scrubDir(dir: string, tags: TagNameToAction, isDryRun: boolean) {
+  const files = await fs.readdirSync(dir);
+  const promises = files.map(
+    async (name: string): Promise<void> => {
+      const filePath = path.join(dir, name);
+      const stat = fs.statSync(filePath);
+      if (stat.isFile()) {
+        return scrubFile(filePath, tags, isDryRun);
+      }
+      if (stat.isDirectory()) {
+        return scrubDir(filePath, tags, isDryRun);
+      }
+      return Promise.resolve();
+    },
+  );
+  await Promise.all(promises);
+}
+
+class Scrubber {
+  tags: TagNameToAction = {};
+
+  dirs: string[] = [];
+
+  async parseConfig(filename: string): Promise<void> {
+    // TODO validate config (e.g.properly formed tag names)
+    const config = await getConfigFile(filename);
+    this.tags = scrubberActionsToDict(config.actions);
+    this.dirs = config.dirs;
+  }
+
+  // Scrub files
+  async start(
+    actions: ScrubberAction[],
+    isDryRun: boolean = false,
+  ): Promise<void> {
+    const tags = { ...this.tags, ...scrubberActionsToDict(actions) };
+
+    // TODO: specify file extensions?
+    await Promise.all(this.dirs.map((dir) => scrubDir(dir, tags, isDryRun)));
+  }
+}
+
+export default Scrubber;
diff --git a/scrubber/scrubberConfig.json b/scrubber/scrubberConfig.json
@@ -0,0 +1,9 @@
+{
+  "actions": [
+    {
+      "type": "keep",
+      "tags": ["@remove", "@remove2"]
+    }
+  ],
+  "dirs": ["test_dir"]
+}
diff --git a/scrubber/scrubberTypes.ts b/scrubber/scrubberTypes.ts
@@ -0,0 +1,17 @@
+/*
+Types required by the scrubber tool.
+*/
+
+export type ScrubberActionType = "remove" | "keep";
+
+export type ScrubberAction = {
+  type: ScrubberActionType;
+  tags: string[];
+};
+
+export type ScrubberConfig = {
+  actions: ScrubberAction[];
+  dirs: string[];
+};
+
+export type TagNameToAction = { [key: string]: ScrubberActionType };
diff --git a/test_dir/test b/test_dir/test
@@ -0,0 +1,5 @@
+@remove {
+    this should be gone
+} @remove
+
+hello world
diff --git a/test_dir/test.js b/test_dir/test.js
@@ -0,0 +1,5 @@
+// @remove {
+console.log("this should be gone");
+// } @remove
+
+console.log("hello world");
diff --git a/test_dir/test.py b/test_dir/test.py
@@ -0,0 +1,5 @@
+# @remove {
+    print('this should be gone')
+# } @remove
+
+print('hello world')
diff --git a/tsconfig.json b/tsconfig.json
@@ -4,7 +4,7 @@
 
     /* Basic Options */
     // "incremental": true,                         /* Enable incremental compilation */
-    "target": "es5",                                /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
+    "target": "es2017",                                /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
     "module": "commonjs",                           /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
     // "lib": [],                                   /* Specify library files to be included in the compilation. */
     // "allowJs": true,                             /* Allow javascript files to be compiled. */