1
1
import type { GitBookSiteContext } from '@/lib/context' ;
2
- import { throwIfDataError } from '@/lib/data' ;
2
+ import { throwIfDataError } from '@/lib/data/errors ' ;
3
3
import { resolvePagePath } from '@/lib/pages' ;
4
- import { RevisionPageType } from '@gitbook/api' ;
4
+ import { type RevisionPage , type RevisionPageDocument , RevisionPageType } from '@gitbook/api' ;
5
+ import { pMapIterable } from 'p-map' ;
6
+
7
+ // We limit the concurrency to 100 to avoid reaching limit with concurrent requests
8
+ // or file descriptor limits.
9
+ const MAX_CONCURRENCY = 100 ;
5
10
6
11
/**
7
- * Generate a markdown version of a page.
12
+ * Generate a markdown version of a page with streaming for better performance.
13
+ * For pages with many children, this streams the output to avoid memory issues.
8
14
*/
9
15
export async function servePageMarkdown ( context : GitBookSiteContext , pagePath : string ) {
10
16
const pageLookup = resolvePagePath ( context . revision . pages , pagePath ) ;
@@ -18,17 +24,100 @@ export async function servePageMarkdown(context: GitBookSiteContext, pagePath: s
18
24
return new Response ( `Page "${ pagePath } " is not a document` , { status : 404 } ) ;
19
25
}
20
26
27
+ if ( page . hidden ) {
28
+ return new Response ( `Page "${ pagePath } " not found` , { status : 404 } ) ;
29
+ }
30
+
31
+ // Return early if the page has no children.
32
+ if ( ! page . pages . length ) {
33
+ const markdown = await fetchMarkdown ( context , page ) ;
34
+ return new Response ( markdown , {
35
+ headers : {
36
+ 'Content-Type' : 'text/markdown; charset=utf-8' ,
37
+ } ,
38
+ } ) ;
39
+ }
40
+
41
+ // Otherwise, stream the markdown from the page and its children.
42
+ return new Response (
43
+ new ReadableStream < Uint8Array > ( {
44
+ async pull ( controller ) {
45
+ await streamMarkdownFromPage ( context , page , controller ) ;
46
+ controller . close ( ) ;
47
+ } ,
48
+ } ) ,
49
+ {
50
+ headers : {
51
+ 'Content-Type' : 'text/markdown; charset=utf-8' ,
52
+ } ,
53
+ }
54
+ ) ;
55
+ }
56
+
57
+ /**
58
+ * Stream markdown content from a page and its children
59
+ */
60
+ async function streamMarkdownFromPage (
61
+ context : GitBookSiteContext ,
62
+ page : RevisionPageDocument ,
63
+ stream : ReadableStreamDefaultController < Uint8Array >
64
+ ) : Promise < void > {
65
+ const mainPageMarkdown = await fetchMarkdown ( context , page ) ;
66
+ stream . enqueue ( new TextEncoder ( ) . encode ( mainPageMarkdown ) ) ;
67
+
68
+ if ( page . pages . length > 0 ) {
69
+ await streamChildPages ( context , page . pages , stream ) ;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Stream markdown from child pages with controlled concurrency.
75
+ * This function recursively handles nested children by streaming them as they become available.
76
+ */
77
+ async function streamChildPages (
78
+ context : GitBookSiteContext ,
79
+ pages : RevisionPage [ ] ,
80
+ stream : ReadableStreamDefaultController < Uint8Array >
81
+ ) : Promise < void > {
82
+ const eligiblePages = getEligiblePages ( pages ) ;
83
+
84
+ const childPagesMarkdown = pMapIterable (
85
+ eligiblePages ,
86
+ async ( childPage ) => fetchMarkdown ( context , childPage ) ,
87
+ {
88
+ concurrency : MAX_CONCURRENCY ,
89
+ }
90
+ ) ;
91
+
92
+ for await ( const childMarkdown of childPagesMarkdown ) {
93
+ stream . enqueue ( new TextEncoder ( ) . encode ( `\n\n${ childMarkdown } ` ) ) ;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Fetch markdown from a page.
99
+ */
100
+ async function fetchMarkdown (
101
+ context : GitBookSiteContext ,
102
+ page : RevisionPageDocument
103
+ ) : Promise < string > {
21
104
const markdown = await throwIfDataError (
22
105
context . dataFetcher . getRevisionPageMarkdown ( {
23
106
spaceId : context . space . id ,
24
107
revisionId : context . revision . id ,
25
108
pageId : page . id ,
26
109
} )
27
110
) ;
111
+ return markdown ;
112
+ }
28
113
29
- return new Response ( markdown , {
30
- headers : {
31
- 'Content-Type' : 'text/markdown; charset=utf-8' ,
32
- } ,
33
- } ) ;
114
+ /**
115
+ * Get eligible pages from a list of pages.
116
+ * Pages that are not documents or are hidden are excluded.
117
+ */
118
+ function getEligiblePages ( pages : RevisionPage [ ] ) : RevisionPageDocument [ ] {
119
+ return pages . filter (
120
+ ( childPage ) : childPage is RevisionPageDocument =>
121
+ childPage . type === RevisionPageType . Document && ! childPage . hidden
122
+ ) ;
34
123
}
0 commit comments