11#!/usr/bin/env node
22
3- import { fileURLToPath } from 'url' ;
4-
53import { selectAll } from 'hast-util-select' ;
6- import { existsSync } from 'node:fs' ;
7- import { mkdir , opendir , readFile , rm , writeFile } from 'node:fs/promises' ;
4+ import { createHash } from 'node:crypto' ;
5+ import { constants as fsConstants , existsSync } from 'node:fs' ;
6+ import { copyFile , mkdir , opendir , readFile , rm , writeFile } from 'node:fs/promises' ;
87import { cpus } from 'node:os' ;
98import * as path from 'node:path' ;
9+ import { fileURLToPath } from 'node:url' ;
1010import { isMainThread , parentPort , Worker , workerData } from 'node:worker_threads' ;
1111import rehypeParse from 'rehype-parse' ;
1212import rehypeRemark from 'rehype-remark' ;
@@ -15,6 +15,16 @@ import remarkStringify from 'remark-stringify';
1515import { unified } from 'unified' ;
1616import { remove } from 'unist-util-remove' ;
1717
18+ function taskFinishHandler ( data ) {
19+ if ( data . failedTasks . length === 0 ) {
20+ console . log ( `✅ Worker[${ data . id } ]: ${ data . success } files successfully.` ) ;
21+ } else {
22+ hasErrors = true ;
23+ console . error ( `❌ Worker[${ data . id } ]: ${ data . failedTasks . length } files failed:` ) ;
24+ console . error ( data . failedTasks ) ;
25+ }
26+ }
27+
1828async function createWork ( ) {
1929 let root = process . cwd ( ) ;
2030 while ( ! existsSync ( path . join ( root , 'package.json' ) ) ) {
@@ -27,6 +37,13 @@ async function createWork() {
2737 const INPUT_DIR = path . join ( root , '.next' , 'server' , 'app' ) ;
2838 const OUTPUT_DIR = path . join ( root , 'public' , 'md-exports' ) ;
2939
40+ const CACHE_VERSION = 1 ;
41+ const CACHE_DIR = path . join ( root , '.next' , 'cache' , 'md-exports' , `v${ CACHE_VERSION } ` ) ;
42+ const noCache = ! existsSync ( CACHE_DIR ) ;
43+ if ( noCache ) {
44+ await mkdir ( CACHE_DIR , { recursive : true } ) ;
45+ }
46+
3047 console . log ( `🚀 Starting markdown generation from: ${ INPUT_DIR } ` ) ;
3148 console . log ( `📁 Output directory: ${ OUTPUT_DIR } ` ) ;
3249
@@ -63,40 +80,54 @@ async function createWork() {
6380 console . log ( `📄 Converting ${ numFiles } files with ${ numWorkers } workers...` ) ;
6481
6582 const selfPath = fileURLToPath ( import . meta. url ) ;
66- const workerPromises = new Array ( numWorkers - 1 ) . fill ( null ) . map ( ( _ , idx ) => {
83+ const workerPromises = new Array ( numWorkers - 1 ) . fill ( null ) . map ( ( _ , id ) => {
6784 return new Promise ( ( resolve , reject ) => {
68- const worker = new Worker ( selfPath , { workerData : workerTasks [ idx ] } ) ;
69- let hasErrors = false ;
70- worker . on ( 'message' , data => {
71- if ( data . failedTasks . length === 0 ) {
72- console . log ( `✅ Worker[${ idx } ]: ${ data . success } files successfully.` ) ;
73- } else {
74- hasErrors = true ;
75- console . error ( `❌ Worker[${ idx } ]: ${ data . failedTasks . length } files failed:` ) ;
76- console . error ( data . failedTasks ) ;
77- }
85+ const worker = new Worker ( selfPath , {
86+ workerData : { id, noCache, cacheDir : CACHE_DIR , tasks : workerTasks [ id ] } ,
7887 } ) ;
88+ let hasErrors = false ;
89+ worker . on ( 'message' , taskFinishHandler ) ;
7990 worker . on ( 'error' , reject ) ;
8091 worker . on ( 'exit' , code => {
8192 if ( code !== 0 ) {
82- reject ( new Error ( `Worker[${ idx } ] stopped with exit code ${ code } ` ) ) ;
93+ reject ( new Error ( `Worker[${ id } ] stopped with exit code ${ code } ` ) ) ;
8394 } else {
84- hasErrors ? reject ( new Error ( `Worker[${ idx } ] had some errors.` ) ) : resolve ( ) ;
95+ hasErrors ? reject ( new Error ( `Worker[${ id } ] had some errors.` ) ) : resolve ( ) ;
8596 }
8697 } ) ;
8798 } ) ;
8899 } ) ;
89100 // The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson
90- workerPromises . push ( processTaskList ( workerTasks [ workerTasks . length - 1 ] ) ) ;
101+ workerPromises . push (
102+ processTaskList ( {
103+ noCache,
104+ cacheDir : CACHE_DIR ,
105+ tasks : workerTasks [ workerTasks . length - 1 ] ,
106+ id : workerTasks . length - 1 ,
107+ } ) . then ( taskFinishHandler )
108+ ) ;
91109
92110 await Promise . all ( workerPromises ) ;
93111
94112 console . log ( `📄 Generated ${ numFiles } markdown files from HTML.` ) ;
95113 console . log ( '✅ Markdown export generation complete!' ) ;
96114}
97115
98- async function genMDFromHTML ( source , target ) {
116+ const md5 = data => createHash ( 'md5' ) . update ( data ) . digest ( 'hex' ) ;
117+
118+ async function genMDFromHTML ( source , target , { cacheDir, noCache} ) {
99119 const text = await readFile ( source , { encoding : 'utf8' } ) ;
120+ const hash = md5 ( text ) ;
121+ const cacheFile = path . join ( cacheDir , hash ) ;
122+ if ( ! noCache ) {
123+ try {
124+ await copyFile ( cacheFile , target , fsConstants . COPYFILE_FICLONE ) ;
125+ return ;
126+ } catch {
127+ // pass
128+ }
129+ }
130+
100131 await writeFile (
101132 target ,
102133 String (
@@ -125,22 +156,26 @@ async function genMDFromHTML(source, target) {
125156 . process ( text )
126157 )
127158 ) ;
159+ await copyFile ( target , cacheFile , fsConstants . COPYFILE_FICLONE ) ;
128160}
129161
130- async function processTaskList ( tasks ) {
162+ async function processTaskList ( { id , tasks, cacheDir , noCache } ) {
131163 const failedTasks = [ ] ;
132164 for ( const { sourcePath, targetPath} of tasks ) {
133165 try {
134- await genMDFromHTML ( sourcePath , targetPath ) ;
166+ await genMDFromHTML ( sourcePath , targetPath , {
167+ cacheDir,
168+ noCache,
169+ } ) ;
135170 } catch ( error ) {
136171 failedTasks . push ( { sourcePath, targetPath, error} ) ;
137172 }
138173 }
139- return { success : tasks . length - failedTasks . length , failedTasks} ;
174+ return { id , success : tasks . length - failedTasks . length , failedTasks} ;
140175}
141176
142- async function doWork ( tasks ) {
143- parentPort . postMessage ( await processTaskList ( tasks ) ) ;
177+ async function doWork ( work ) {
178+ parentPort . postMessage ( await processTaskList ( work ) ) ;
144179}
145180
146181if ( isMainThread ) {
0 commit comments