Merge pull request #11152 from ethereum/crowdin-import-update

corwintines · web-flow · commit 4c3e5b525c35 · 2023-10-14T11:26:34.000-06:00
Update script with CSV auto importing
diff --git a/src/scripts/crowdin-import.ts b/src/scripts/crowdin-import.ts
@@ -1,95 +1,69 @@
 // Library requires
 const i18Config = require("../../i18n/config.json")
-const { copyFileSync, existsSync, mkdirSync, readdirSync } = require("fs")
+const {
+  copyFileSync,
+  existsSync,
+  mkdirSync,
+  readdirSync,
+  readFileSync,
+} = require("fs")
 const { resolve, join } = require("path")
 const argv = require("minimist")(process.argv.slice(2))
+
+/******************************
+ * Console flags              *
+ ******************************/
+
 /**
- * Console flags
+ * -b,--buckets    Prints buckets overview and exits
  * -v,--verbose    Prints verbose console logs
  * -f,--full       Prints full name of buckets in summary
+ */
+
+/******************************
+ * Instructions for use       *
+ ******************************/
+
+/**
+ * 1. Run `yarn crowdin-clean` to initialize fresh ./.crowdin folder. This can also be used to erase contents when finished.
  *
+ * 2a. Export/import CSV of languages ready for review:
+ *   1. Open "Website translation board" document in ethereum.org Notion (internal only)
+ *   2. Switch view of "Translation status by language" table to "All reviewed"
+ *   3. Click triple-dot (...) menu in TOP right corner of the entire app
+ *   4. Select "Export" > "Export as CSV"
+ *      Export format: Markdown & CSV
+ *      Include databases: Current view
+ *      Include content: No files or images
+ *      Include subpages: Off
+ *      Click "Export" > Save zip file
+ *   5. Unzip contents into (or copy into) ./.crowdin folder in the root of this repo
  *
- * Follow these steps to import translations from Crowdin export:
- *
- * 1. Copy languages folder from Crowdin export to ./.crowdin
- *    ie. ./.crowdin/{lang-codes}
- *    Tip: Run `yarn crowdin-clean` to initialize the `.crowdin` folder. Can
- *    also be used to erase contents when finished.
- *
- * 2. Select buckets to import by adding the number of the corresponding
- *    content bucket to the chosen language array below
- *    ie. `es: [1, 10],` would import the "Homepage" and "Learn" buckets for Spanish
+ * 2b. Alternatively, you can manually add buckets to import to the USER_OVERRIDE object below.
+ *   1. Add the number of the corresponding content bucket to the chosen language array below
+ *      ie. `es: [1, 10],` would import the "Homepage" and "Learn" buckets for Spanish
+ *   2. Save file without committing*
  *
- * 3. Save file without committing
+ * Optionally: To view summary of buckets from CSV, run `yarn crowdin-import --buckets` or `yarn crowdin-import -b`
+ *   Any items in USER_OVERRIDE will override the CSV import
  *
- * 4. Execute script by running `yarn crowdin-import`
+ * 3. Export translated content from Crowdin and import into ./.crowdin folder:
+ *   1. Export latest translated content from Crowdin and unzip
+ *   2. Copy languages folder from Crowdin export to ./.crowdin
+ *      ie. ./.crowdin/{lang-codes}
  *
- * 5. If successful, copy `GATSBY_BUILD_LANGS={langs}` output and paste in
- *    your `.env`, then build site to test results.
+ * 4. Execute script:
+ *   1. Execute script by running `yarn crowdin-import`
+ *   2. If successful, copy `GATSBY_BUILD_LANGS={langs}` output and paste in
+ *      your `.env`, then build site to test results.
  *
- * Remember: Revert working changes to this file before committing Crowdin import
+ * *Remember: Revert any working changes to this file before committing Crowdin import
  */
 
-type UserSelectionObject = { [key: string]: Array<number> }
-const USER_SELECTION: UserSelectionObject = {
-  am: [],
-  ar: [],
-  az: [],
-  be: [],
-  bg: [],
-  bs: [],
-  bn: [],
-  ca: [],
-  cs: [],
-  da: [],
-  de: [],
-  el: [],
-  es: [],
-  fa: [],
-  fi: [],
-  fil: [],
-  fr: [],
-  gl: [],
-  gu: [],
-  hi: [],
-  hr: [],
-  hu: [],
-  hy: [],
-  id: [],
-  ig: [],
-  it: [],
-  ja: [],
-  ka: [],
-  kk: [],
-  km: [],
-  kn: [],
-  ko: [],
-  lt: [],
-  ml: [],
-  ms: [],
-  mr: [],
-  nb: [],
-  nl: [],
-  pcm: [],
-  pl: [],
-  pt: [],
-  "pt-br": [],
-  ro: [],
-  ru: [],
-  se: [],
-  sk: [],
-  sl: [],
-  sr: [],
-  sw: [],
-  ta: [],
-  th: [],
-  tk: [],
-  tr: [],
-  uk: [],
-  uz: [],
-  vi: [],
-  zh: [],
-  "zh-tw": [],
+type BucketsList = { [key: string]: Array<number> }
+const USER_OVERRIDE: BucketsList = {
+  // FORMAT: lang_code: [bucket_number, bucket_number, ...],
+  // EXAMPLE: es: [1, 10, 12, 14],
 }
 
 /******************************
@@ -102,10 +76,15 @@ const USER_SELECTION: UserSelectionObject = {
  * slight from those used in the repo). These folders must be copied into the
  * root `.crowdin` folder of this repo.
  *
- * Using the USER_SELECTION object above, the script iterates through each
- * language chosen, using the dictionary object below to convert the repo lang
- * code to the code used by Crowdin (only if needed, defaults to same). `fs`
- * is used to find matching language folder.
+ * A CSV containing the language buckets that have been "Reviewed" can be exported
+ * from Crowdin to automate the process of importing the needed buckets. See
+ * "Instructions for use" above.
+ *
+ * You can alternatively use the USER_OVERRIDE object above to manually select buckets.
+ *
+ * The script iterates through each language chosen, using the dictionary object
+ * below to convert the repo lang code to the code used by Crowdin (only if
+ * needed, defaults to same). `fs` is used to find matching language folder.
  *
  * The "buckets" chosen (type number[]) are then iterated over, opening the
  * corresponding folder that begins with the same number string (formatted 00).
@@ -124,6 +103,7 @@ const USER_SELECTION: UserSelectionObject = {
 
 // Initialize console arguments
 const VERBOSE = Boolean(argv.v || argv.verbose)
+const BUCKET_GENERATION_ONLY = Boolean(argv.b || argv.buckets)
 const FULL_BUCKET_NAME_SUMMARY = Boolean(argv.f || argv.full)
 
 // Initialize root paths
@@ -137,7 +117,7 @@ if (!existsSync(crowdinRoot)) mkdirSync(crowdinRoot)
  * This is used to convert any codes that may differ when performing folder lookup.
  */
 const getCrowdinCode = (code: string): string =>
-  i18Config.filter((lang) => lang.code === code)?.[0].crowdinCode || code
+  i18Config.filter((lang) => lang.code === code)?.[0]?.crowdinCode || code
 
 /**
  * Names for each bucket in order, zero indexed.
@@ -189,6 +169,90 @@ const trackers: TrackerObject = {
 const log = (message: any, ...optionalParams: any): void => {
   VERBOSE && console.log(message, ...optionalParams)
 }
+
+/**
+ * Fetches CSV exported from Notion "Website translation board" table
+ * See above for details on how to export CSV and import into repo
+ * @returns Object containing language codes as keys, and an array of bucket numbers to be imported
+ */
+const fetchReviewedCsv = (): BucketsList => {
+  const csvDir: string = readdirSync(crowdinRoot).filter((dir: string) =>
+    dir.startsWith("Website translation board")
+  )[0]
+  if (!csvDir) return {}
+  const path = join(crowdinRoot, csvDir)
+  const reviewedCsvPath: Array<string> = readdirSync(path).filter(
+    (file: string) => {
+      const fileParts: Array<string> = file.split(".")
+      return (
+        fileParts[0].startsWith("https") &&
+        !fileParts[0].endsWith("all") &&
+        fileParts[1] === "csv"
+      )
+    }
+  )[0]
+  const bucketsList: BucketsList = {}
+  const csvFile = readFileSync(join(path, reviewedCsvPath), "utf8")
+  if (!csvFile) return {}
+  const data = csvFile.split("\n").map((row: string) => {
+    const quotePair = /"([^"]+)"/g
+    const sanitized = row.replaceAll(quotePair, (match) =>
+      match.replace(",", " ").replace(/"/g, "")
+    )
+    return sanitized.split(",")
+  })
+  const headings = data.shift()
+  const langCodeIndex = headings.indexOf("code")
+  const firstBucketIndex = headings.findIndex((item: string) =>
+    item.startsWith("1)")
+  )
+  data.forEach((rowItems: Array<string>) => {
+    const langCode = rowItems[langCodeIndex].split(" ").at(-1) // "es-EM → es" parses to "es"
+    if (!langCode) return
+    const bucketsForLang: Array<number> = []
+    rowItems.forEach((item: string, idx: number) => {
+      if (item.includes("Reviewed"))
+        bucketsForLang.push(idx - firstBucketIndex + 1)
+    })
+    bucketsList[langCode] = bucketsForLang
+  })
+  return bucketsList
+}
+
+/**
+ * If any buckets are selected in USER_OVERRIDE, use those instead of importing from CSV.
+ */
+const useUserOverRide =
+  Object.values(USER_OVERRIDE).filter((buckets) => buckets.length > 0).length >
+  0
+
+const bucketsToImport: BucketsList = useUserOverRide
+  ? USER_OVERRIDE
+  : fetchReviewedCsv()
+
+const highestBucketNumber: number = Object.values(bucketsToImport).reduce(
+  (prev: number, buckets: Array<number>): number =>
+    buckets[buckets.length - 1] > prev ? buckets[buckets.length - 1] : prev,
+  0
+)
+
+/**
+ * If BUCKET_GENERATION_ONLY (-b, --buckets) flag is enabled, show overview
+ * of all langs and buckets to be imported. Also print a copy/paste ready
+ * object for USER_OVERRIDE, then exit the script early.
+ */
+if (BUCKET_GENERATION_ONLY) {
+  const bucketsOverview = {}
+  Object.entries(bucketsToImport).forEach(([langCode, buckets]) => {
+    bucketsOverview[langCode] = Array(highestBucketNumber - 1)
+      .fill(0)
+      .map((_, i) => (buckets.includes(i + 1) ? i + 1 : ""))
+  })
+  console.table(bucketsOverview)
+  console.log("const USER_OVERRIDE: BucketsList =", bucketsToImport)
+  process.exit(0)
+}
+
 /**
  * Reads `ls` file contents of `_path`, moving .md and .json files
  * to their corresponding destinations in the repo. Function is called
@@ -225,7 +289,11 @@ const scrapeDirectory = (
       copyFileSync(source, jsonDestinationPath)
       // Update .json tracker
       trackers.langs[repoLangCode].jsonCopyCount++
-    } else if (item.endsWith(".md") || item.endsWith(".svg")) {
+    } else if (
+      item.endsWith(".md") ||
+      item.endsWith(".svg") ||
+      item.endsWith(".xlsx")
+    ) {
       const mdDestDirPath: string = join(
         repoRoot,
         "src",
@@ -259,16 +327,16 @@ type SelectionItem = {
   crowdinLangCode: string
   buckets: Array<number>
 }
-const importSelection: Array<SelectionItem> = Object.keys(USER_SELECTION)
+const importSelection: Array<SelectionItem> = Object.keys(bucketsToImport)
   .filter((repoLangCode: string): boolean => {
-    if (!USER_SELECTION[repoLangCode].length) trackers.emptyBuckets++
-    return !!USER_SELECTION[repoLangCode].length
+    if (!bucketsToImport[repoLangCode].length) trackers.emptyBuckets++
+    return !!bucketsToImport[repoLangCode].length
   })
   .map(
     (repoLangCode: string): SelectionItem => ({
       repoLangCode,
       crowdinLangCode: getCrowdinCode(repoLangCode),
-      buckets: USER_SELECTION[repoLangCode],
+      buckets: bucketsToImport[repoLangCode],
     })
   )