@@ -3,29 +3,90 @@ const path = require('path');
33const glob = require ( 'glob' ) ;
44const https = require ( 'https' ) ;
55const http = require ( 'http' ) ;
6+ const { URL } = require ( 'url' ) ;
7+
8+ // Whitelist of allowed GitHub repositories
9+ const ALLOWED_GITHUB_REPOS = [
10+ 'ClickHouse/clickhouse-rs' ,
11+ 'ClickHouse/DataflowTemplates' ,
12+ 'ClickHouse/ch-go' ,
13+ 'ClickHouse/clickhouse-beam-connector' ,
14+ 'ClickHouse/clickhouse-connect' ,
15+ 'ClickHouse/clickhouse-cpp' ,
16+ 'ClickHouse/clickhouse-cs' ,
17+ 'ClickHouse/clickhouse-go' ,
18+ 'ClickHouse/clickhouse-java' ,
19+ 'ClickHouse/clickhouse-js' ,
20+ 'ClickHouse/clickhouse-kafka-connect' ,
21+ 'ClickHouse/clickhouse-odbc' ,
22+ 'ClickHouse/clickhouse-tableau-connector-jdbc' ,
23+ 'ClickHouse/dbt-clickhouse' ,
24+ 'ClickHouse/flink-connector-clickhouse' ,
25+ 'ClickHouse/metabase-clickhouse-driver' ,
26+ 'ClickHouse/power-bi-clickhouse' ,
27+ 'ClickHouse/spark-clickhouse-connector' ,
28+ ] ;
29+
30+ // Helper function to validate GitHub raw URL
31+ function validateGitHubUrl ( url ) {
32+ try {
33+ const parsedUrl = new URL ( url ) ;
34+
35+ // Check if it's a raw.githubusercontent.com URL
36+ if ( parsedUrl . hostname !== 'raw.githubusercontent.com' ) {
37+ throw new Error ( 'URL must be from raw.githubusercontent.com' ) ;
38+ }
39+
40+ // Extract repository from pathname (format: /org/repo/...)
41+ const pathParts = parsedUrl . pathname . split ( '/' ) . filter ( p => p ) ;
42+ if ( pathParts . length < 2 ) {
43+ throw new Error ( 'Invalid GitHub URL format' ) ;
44+ }
45+
46+ const repo = `${ pathParts [ 0 ] } /${ pathParts [ 1 ] } ` ;
47+
48+ // Check if repository is in whitelist
49+ if ( ! ALLOWED_GITHUB_REPOS . includes ( repo ) ) {
50+ throw new Error ( `Repository "${ repo } " is not in the allowed list. Allowed repositories: ${ ALLOWED_GITHUB_REPOS . join ( ', ' ) } ` ) ;
51+ }
52+
53+ return true ;
54+ } catch ( error ) {
55+ throw new Error ( `GitHub URL validation failed: ${ error . message } ` ) ;
56+ }
57+ }
658
759// Helper function to fetch content from URL
860function fetchUrl ( url ) {
961 return new Promise ( ( resolve , reject ) => {
10- const client = url . startsWith ( 'https:' ) ? https : http ;
11-
62+ let parsedUrl ;
63+ try {
64+ parsedUrl = new URL ( url ) ;
65+ } catch ( error ) {
66+ reject ( new Error ( `Invalid URL: ${ error . message } ` ) ) ;
67+ return ;
68+ }
69+
70+ const client = parsedUrl . protocol === 'https:' ? https : http ;
71+
1272 client . get ( url , ( res ) => {
1373 if ( res . statusCode !== 200 ) {
1474 reject ( new Error ( `HTTP ${ res . statusCode } : ${ res . statusMessage } ` ) ) ;
1575 return ;
1676 }
17-
77+
1878 let data = '' ;
1979 res . on ( 'data' , chunk => data += chunk ) ;
2080 res . on ( 'end' , ( ) => resolve ( data ) ) ;
2181 } ) . on ( 'error' , reject ) ;
2282 } ) ;
2383}
2484
85+
2586// Helper function to extract snippet from content using comment markers
2687function extractSnippet ( content , snippetId = null ) {
2788 const lines = content . split ( '\n' ) ;
28-
89+
2990 // Define comment patterns for different languages
3091 const commentPatterns = [
3192 // Hash-style comments (Python, Ruby, Shell, YAML, etc.)
@@ -37,11 +98,11 @@ function extractSnippet(content, snippetId = null) {
3798 // XML/HTML comments
3899 { start : `<!--docs-start${ snippetId ? `-${ snippetId } ` : '' } -->` , end : `<!--docs-end${ snippetId ? `-${ snippetId } ` : '' } -->` }
39100 ] ;
40-
101+
41102 for ( const pattern of commentPatterns ) {
42103 let startIndex = - 1 ;
43104 let endIndex = - 1 ;
44-
105+
45106 for ( let i = 0 ; i < lines . length ; i ++ ) {
46107 const line = lines [ i ] . trim ( ) ;
47108 if ( line . includes ( pattern . start ) ) {
@@ -51,12 +112,12 @@ function extractSnippet(content, snippetId = null) {
51112 break ;
52113 }
53114 }
54-
115+
55116 if ( startIndex !== - 1 && endIndex !== - 1 && startIndex < endIndex ) {
56117 return lines . slice ( startIndex , endIndex ) . join ( '\n' ) ;
57118 }
58119 }
59-
120+
60121 // If no snippet markers found, return original content
61122 return content ;
62123}
@@ -67,34 +128,34 @@ function codeImportPlugin(context, options) {
67128 async loadContent ( ) {
68129 // Find all markdown files in docs directory that might contain code imports
69130 const docsPath = path . join ( context . siteDir , 'docs' ) ;
70-
131+
71132 const markdownFiles = [
72133 ...glob . sync ( '**/*.md' , { cwd : docsPath , absolute : true } ) ,
73134 ...glob . sync ( '**/*.mdx' , { cwd : docsPath , absolute : true } ) ,
74135 ] ;
75136
76137 // Process each markdown file for code imports
77138 const processedFiles = [ ] ;
78-
139+
79140 for ( const filePath of markdownFiles ) {
80141 try {
81142 let content = fs . readFileSync ( filePath , 'utf8' ) ;
82143 let modified = false ;
83-
144+
84145 // Process code blocks with file= or url= syntax
85146 const fileUrlRegex = / ` ` ` ( \w + ) ? \s * ( (?: f i l e | u r l ) = [ ^ \s \n ] + ) ( [ ^ \n ] * ) \n ( [ ^ ` ] * ?) ` ` ` / g;
86147 const matches = [ ...content . matchAll ( fileUrlRegex ) ] ;
87-
148+
88149 for ( const match of matches ) {
89150 const [ fullMatch , lang , param , additionalMeta , existingContent ] = match ;
90-
151+
91152 // Parse snippet parameter from additional metadata
92153 const snippetMatch = additionalMeta . match ( / s n i p p e t = ( \w + ) / ) ;
93154 const snippetId = snippetMatch ? snippetMatch [ 1 ] : null ;
94-
155+
95156 try {
96157 let importedContent ;
97-
158+
98159 if ( param . startsWith ( 'file=' ) ) {
99160 // Handle file import
100161 const importPath = param . replace ( 'file=' , '' ) ;
@@ -105,6 +166,8 @@ function codeImportPlugin(context, options) {
105166 // Handle URL import
106167 const url = param . replace ( 'url=' , '' ) ;
107168 try {
169+ // Validate GitHub URL before fetching
170+ validateGitHubUrl ( url ) ;
108171 const rawContent = await fetchUrl ( url ) ;
109172 importedContent = extractSnippet ( rawContent , snippetId ) ;
110173 } catch ( urlError ) {
@@ -113,21 +176,21 @@ function codeImportPlugin(context, options) {
113176 continue ; // Skip this replacement if URL fetch fails
114177 }
115178 }
116-
179+
117180 // Preserve the complete metadata
118181 const fullMeta = `${ param } ${ additionalMeta } ` ;
119182 const metaStr = fullMeta ? ` ${ fullMeta } ` : '' ;
120183 const replacement = `\`\`\`${ lang || '' } ${ metaStr } \n${ importedContent } \n\`\`\`` ;
121-
184+
122185 content = content . replace ( fullMatch , replacement ) ;
123186 modified = true ;
124-
187+
125188 } catch ( error ) {
126189 console . warn ( `Could not process ${ param } in ${ filePath } : ${ error . message } ` ) ;
127190 process . exit ( 1 ) ;
128191 }
129192 }
130-
193+
131194 if ( modified ) {
132195 processedFiles . push ( {
133196 path : filePath ,
@@ -140,13 +203,13 @@ function codeImportPlugin(context, options) {
140203 process . exit ( 1 ) ;
141204 }
142205 }
143-
206+
144207 return { processedFiles } ;
145208 } ,
146-
209+
147210 async contentLoaded ( { content, actions } ) {
148211 const { processedFiles } = content ;
149-
212+
150213 // Write processed files back to disk during build
151214 for ( const file of processedFiles ) {
152215 try {
0 commit comments