Skip to content

Commit 4c3e5b5

Browse files
authored
Merge pull request #11152 from ethereum/crowdin-import-update
Update script with CSV auto importing
2 parents 34294af + 4175480 commit 4c3e5b5

File tree

1 file changed

+155
-87
lines changed

1 file changed

+155
-87
lines changed

src/scripts/crowdin-import.ts

Lines changed: 155 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,69 @@
11
// Library requires
22
const i18Config = require("../../i18n/config.json")
3-
const { copyFileSync, existsSync, mkdirSync, readdirSync } = require("fs")
3+
const {
4+
copyFileSync,
5+
existsSync,
6+
mkdirSync,
7+
readdirSync,
8+
readFileSync,
9+
} = require("fs")
410
const { resolve, join } = require("path")
511
const argv = require("minimist")(process.argv.slice(2))
12+
13+
/******************************
14+
* Console flags *
15+
******************************/
16+
617
/**
7-
* Console flags
18+
* -b,--buckets Prints buckets overview and exits
819
* -v,--verbose Prints verbose console logs
920
* -f,--full Prints full name of buckets in summary
21+
*/
22+
23+
/******************************
24+
* Instructions for use *
25+
******************************/
26+
27+
/**
28+
* 1. Run `yarn crowdin-clean` to initialize fresh ./.crowdin folder. This can also be used to erase contents when finished.
1029
*
30+
* 2a. Export/import CSV of languages ready for review:
31+
* 1. Open "Website translation board" document in ethereum.org Notion (internal only)
32+
* 2. Switch view of "Translation status by language" table to "All reviewed"
33+
* 3. Click triple-dot (...) menu in TOP right corner of the entire app
34+
* 4. Select "Export" > "Export as CSV"
35+
* Export format: Markdown & CSV
36+
* Include databases: Current view
37+
* Include content: No files or images
38+
* Include subpages: Off
39+
* Click "Export" > Save zip file
40+
* 5. Unzip contents into (or copy into) ./.crowdin folder in the root of this repo
1141
*
12-
* Follow these steps to import translations from Crowdin export:
13-
*
14-
* 1. Copy languages folder from Crowdin export to ./.crowdin
15-
* ie. ./.crowdin/{lang-codes}
16-
* Tip: Run `yarn crowdin-clean` to initialize the `.crowdin` folder. Can
17-
* also be used to erase contents when finished.
18-
*
19-
* 2. Select buckets to import by adding the number of the corresponding
20-
* content bucket to the chosen language array below
21-
* ie. `es: [1, 10],` would import the "Homepage" and "Learn" buckets for Spanish
42+
* 2b. Alternatively, you can manually add buckets to import to the USER_OVERRIDE object below.
43+
* 1. Add the number of the corresponding content bucket to the chosen language array below
44+
* ie. `es: [1, 10],` would import the "Homepage" and "Learn" buckets for Spanish
45+
* 2. Save file without committing*
2246
*
23-
* 3. Save file without committing
47+
* Optionally: To view summary of buckets from CSV, run `yarn crowdin-import --buckets` or `yarn crowdin-import -b`
48+
* Any items in USER_OVERRIDE will override the CSV import
2449
*
25-
* 4. Execute script by running `yarn crowdin-import`
50+
* 3. Export translated content from Crowdin and import into ./.crowdin folder:
51+
* 1. Export latest translated content from Crowdin and unzip
52+
* 2. Copy languages folder from Crowdin export to ./.crowdin
53+
* ie. ./.crowdin/{lang-codes}
2654
*
27-
* 5. If successful, copy `GATSBY_BUILD_LANGS={langs}` output and paste in
28-
* your `.env`, then build site to test results.
55+
* 4. Execute script:
56+
* 1. Execute script by running `yarn crowdin-import`
57+
* 2. If successful, copy `GATSBY_BUILD_LANGS={langs}` output and paste in
58+
* your `.env`, then build site to test results.
2959
*
30-
* Remember: Revert working changes to this file before committing Crowdin import
60+
* *Remember: Revert any working changes to this file before committing Crowdin import
3161
*/
3262

33-
type UserSelectionObject = { [key: string]: Array<number> }
34-
const USER_SELECTION: UserSelectionObject = {
35-
am: [],
36-
ar: [],
37-
az: [],
38-
be: [],
39-
bg: [],
40-
bs: [],
41-
bn: [],
42-
ca: [],
43-
cs: [],
44-
da: [],
45-
de: [],
46-
el: [],
47-
es: [],
48-
fa: [],
49-
fi: [],
50-
fil: [],
51-
fr: [],
52-
gl: [],
53-
gu: [],
54-
hi: [],
55-
hr: [],
56-
hu: [],
57-
hy: [],
58-
id: [],
59-
ig: [],
60-
it: [],
61-
ja: [],
62-
ka: [],
63-
kk: [],
64-
km: [],
65-
kn: [],
66-
ko: [],
67-
lt: [],
68-
ml: [],
69-
ms: [],
70-
mr: [],
71-
nb: [],
72-
nl: [],
73-
pcm: [],
74-
pl: [],
75-
pt: [],
76-
"pt-br": [],
77-
ro: [],
78-
ru: [],
79-
se: [],
80-
sk: [],
81-
sl: [],
82-
sr: [],
83-
sw: [],
84-
ta: [],
85-
th: [],
86-
tk: [],
87-
tr: [],
88-
uk: [],
89-
uz: [],
90-
vi: [],
91-
zh: [],
92-
"zh-tw": [],
63+
type BucketsList = { [key: string]: Array<number> }
64+
const USER_OVERRIDE: BucketsList = {
65+
// FORMAT: lang_code: [bucket_number, bucket_number, ...],
66+
// EXAMPLE: es: [1, 10, 12, 14],
9367
}
9468

9569
/******************************
@@ -102,10 +76,15 @@ const USER_SELECTION: UserSelectionObject = {
10276
* slight from those used in the repo). These folders must be copied into the
10377
* root `.crowdin` folder of this repo.
10478
*
105-
* Using the USER_SELECTION object above, the script iterates through each
106-
* language chosen, using the dictionary object below to convert the repo lang
107-
* code to the code used by Crowdin (only if needed, defaults to same). `fs`
108-
* is used to find matching language folder.
79+
* A CSV containing the language buckets that have been "Reviewed" can be exported
80+
* from Crowdin to automate the process of importing the needed buckets. See
81+
* "Instructions for use" above.
82+
*
83+
* You can alternatively use the USER_OVERRIDE object above to manually select buckets.
84+
*
85+
* The script iterates through each language chosen, using the dictionary object
86+
* below to convert the repo lang code to the code used by Crowdin (only if
87+
* needed, defaults to same). `fs` is used to find matching language folder.
10988
*
11089
* The "buckets" chosen (type number[]) are then iterated over, opening the
11190
* corresponding folder that begins with the same number string (formatted 00).
@@ -124,6 +103,7 @@ const USER_SELECTION: UserSelectionObject = {
124103

125104
// Initialize console arguments
126105
const VERBOSE = Boolean(argv.v || argv.verbose)
106+
const BUCKET_GENERATION_ONLY = Boolean(argv.b || argv.buckets)
127107
const FULL_BUCKET_NAME_SUMMARY = Boolean(argv.f || argv.full)
128108

129109
// Initialize root paths
@@ -137,7 +117,7 @@ if (!existsSync(crowdinRoot)) mkdirSync(crowdinRoot)
137117
* This is used to convert any codes that may differ when performing folder lookup.
138118
*/
139119
const getCrowdinCode = (code: string): string =>
140-
i18Config.filter((lang) => lang.code === code)?.[0].crowdinCode || code
120+
i18Config.filter((lang) => lang.code === code)?.[0]?.crowdinCode || code
141121

142122
/**
143123
* Names for each bucket in order, zero indexed.
@@ -189,6 +169,90 @@ const trackers: TrackerObject = {
189169
const log = (message: any, ...optionalParams: any): void => {
190170
VERBOSE && console.log(message, ...optionalParams)
191171
}
172+
173+
/**
174+
* Fetches CSV exported from Notion "Website translation board" table
175+
* See above for details on how to export CSV and import into repo
176+
* @returns Object containing language codes as keys, and an array of bucket numbers to be imported
177+
*/
178+
const fetchReviewedCsv = (): BucketsList => {
179+
const csvDir: string = readdirSync(crowdinRoot).filter((dir: string) =>
180+
dir.startsWith("Website translation board")
181+
)[0]
182+
if (!csvDir) return {}
183+
const path = join(crowdinRoot, csvDir)
184+
const reviewedCsvPath: Array<string> = readdirSync(path).filter(
185+
(file: string) => {
186+
const fileParts: Array<string> = file.split(".")
187+
return (
188+
fileParts[0].startsWith("https") &&
189+
!fileParts[0].endsWith("all") &&
190+
fileParts[1] === "csv"
191+
)
192+
}
193+
)[0]
194+
const bucketsList: BucketsList = {}
195+
const csvFile = readFileSync(join(path, reviewedCsvPath), "utf8")
196+
if (!csvFile) return {}
197+
const data = csvFile.split("\n").map((row: string) => {
198+
const quotePair = /"([^"]+)"/g
199+
const sanitized = row.replaceAll(quotePair, (match) =>
200+
match.replace(",", " ").replace(/"/g, "")
201+
)
202+
return sanitized.split(",")
203+
})
204+
const headings = data.shift()
205+
const langCodeIndex = headings.indexOf("code")
206+
const firstBucketIndex = headings.findIndex((item: string) =>
207+
item.startsWith("1)")
208+
)
209+
data.forEach((rowItems: Array<string>) => {
210+
const langCode = rowItems[langCodeIndex].split(" ").at(-1) // "es-EM → es" parses to "es"
211+
if (!langCode) return
212+
const bucketsForLang: Array<number> = []
213+
rowItems.forEach((item: string, idx: number) => {
214+
if (item.includes("Reviewed"))
215+
bucketsForLang.push(idx - firstBucketIndex + 1)
216+
})
217+
bucketsList[langCode] = bucketsForLang
218+
})
219+
return bucketsList
220+
}
221+
222+
/**
223+
* If any buckets are selected in USER_OVERRIDE, use those instead of importing from CSV.
224+
*/
225+
const useUserOverRide =
226+
Object.values(USER_OVERRIDE).filter((buckets) => buckets.length > 0).length >
227+
0
228+
229+
const bucketsToImport: BucketsList = useUserOverRide
230+
? USER_OVERRIDE
231+
: fetchReviewedCsv()
232+
233+
const highestBucketNumber: number = Object.values(bucketsToImport).reduce(
234+
(prev: number, buckets: Array<number>): number =>
235+
buckets[buckets.length - 1] > prev ? buckets[buckets.length - 1] : prev,
236+
0
237+
)
238+
239+
/**
240+
* If BUCKET_GENERATION_ONLY (-b, --buckets) flag is enabled, show overview
241+
* of all langs and buckets to be imported. Also print a copy/paste ready
242+
* object for USER_OVERRIDE, then exit the script early.
243+
*/
244+
if (BUCKET_GENERATION_ONLY) {
245+
const bucketsOverview = {}
246+
Object.entries(bucketsToImport).forEach(([langCode, buckets]) => {
247+
bucketsOverview[langCode] = Array(highestBucketNumber - 1)
248+
.fill(0)
249+
.map((_, i) => (buckets.includes(i + 1) ? i + 1 : ""))
250+
})
251+
console.table(bucketsOverview)
252+
console.log("const USER_OVERRIDE: BucketsList =", bucketsToImport)
253+
process.exit(0)
254+
}
255+
192256
/**
193257
* Reads `ls` file contents of `_path`, moving .md and .json files
194258
* to their corresponding destinations in the repo. Function is called
@@ -225,7 +289,11 @@ const scrapeDirectory = (
225289
copyFileSync(source, jsonDestinationPath)
226290
// Update .json tracker
227291
trackers.langs[repoLangCode].jsonCopyCount++
228-
} else if (item.endsWith(".md") || item.endsWith(".svg")) {
292+
} else if (
293+
item.endsWith(".md") ||
294+
item.endsWith(".svg") ||
295+
item.endsWith(".xlsx")
296+
) {
229297
const mdDestDirPath: string = join(
230298
repoRoot,
231299
"src",
@@ -259,16 +327,16 @@ type SelectionItem = {
259327
crowdinLangCode: string
260328
buckets: Array<number>
261329
}
262-
const importSelection: Array<SelectionItem> = Object.keys(USER_SELECTION)
330+
const importSelection: Array<SelectionItem> = Object.keys(bucketsToImport)
263331
.filter((repoLangCode: string): boolean => {
264-
if (!USER_SELECTION[repoLangCode].length) trackers.emptyBuckets++
265-
return !!USER_SELECTION[repoLangCode].length
332+
if (!bucketsToImport[repoLangCode].length) trackers.emptyBuckets++
333+
return !!bucketsToImport[repoLangCode].length
266334
})
267335
.map(
268336
(repoLangCode: string): SelectionItem => ({
269337
repoLangCode,
270338
crowdinLangCode: getCrowdinCode(repoLangCode),
271-
buckets: USER_SELECTION[repoLangCode],
339+
buckets: bucketsToImport[repoLangCode],
272340
})
273341
)
274342

0 commit comments

Comments
 (0)