Skip to content

Commit 442b22b

Browse files
authored
Merge pull request #4729 from Blargian/remote_code_improvements
Integrations: code blocks from external repos improvements
2 parents e60214b + 3fd9875 commit 442b22b

File tree

1 file changed

+85
-22
lines changed

1 file changed

+85
-22
lines changed

plugins/code-import-plugin.js

Lines changed: 85 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,90 @@ const path = require('path');
33
const glob = require('glob');
44
const https = require('https');
55
const http = require('http');
6+
const { URL } = require('url');
7+
8+
// Whitelist of allowed GitHub repositories
9+
const ALLOWED_GITHUB_REPOS = [
10+
'ClickHouse/clickhouse-rs',
11+
'ClickHouse/DataflowTemplates',
12+
'ClickHouse/ch-go',
13+
'ClickHouse/clickhouse-beam-connector',
14+
'ClickHouse/clickhouse-connect',
15+
'ClickHouse/clickhouse-cpp',
16+
'ClickHouse/clickhouse-cs',
17+
'ClickHouse/clickhouse-go',
18+
'ClickHouse/clickhouse-java',
19+
'ClickHouse/clickhouse-js',
20+
'ClickHouse/clickhouse-kafka-connect',
21+
'ClickHouse/clickhouse-odbc',
22+
'ClickHouse/clickhouse-tableau-connector-jdbc',
23+
'ClickHouse/dbt-clickhouse',
24+
'ClickHouse/flink-connector-clickhouse',
25+
'ClickHouse/metabase-clickhouse-driver',
26+
'ClickHouse/power-bi-clickhouse',
27+
'ClickHouse/spark-clickhouse-connector',
28+
];
29+
30+
// Helper function to validate GitHub raw URL
31+
function validateGitHubUrl(url) {
32+
try {
33+
const parsedUrl = new URL(url);
34+
35+
// Check if it's a raw.githubusercontent.com URL
36+
if (parsedUrl.hostname !== 'raw.githubusercontent.com') {
37+
throw new Error('URL must be from raw.githubusercontent.com');
38+
}
39+
40+
// Extract repository from pathname (format: /org/repo/...)
41+
const pathParts = parsedUrl.pathname.split('/').filter(p => p);
42+
if (pathParts.length < 2) {
43+
throw new Error('Invalid GitHub URL format');
44+
}
45+
46+
const repo = `${pathParts[0]}/${pathParts[1]}`;
47+
48+
// Check if repository is in whitelist
49+
if (!ALLOWED_GITHUB_REPOS.includes(repo)) {
50+
throw new Error(`Repository "${repo}" is not in the allowed list. Allowed repositories: ${ALLOWED_GITHUB_REPOS.join(', ')}`);
51+
}
52+
53+
return true;
54+
} catch (error) {
55+
throw new Error(`GitHub URL validation failed: ${error.message}`);
56+
}
57+
}
658

759
// Helper function to fetch content from URL
860
function fetchUrl(url) {
961
return new Promise((resolve, reject) => {
10-
const client = url.startsWith('https:') ? https : http;
11-
62+
let parsedUrl;
63+
try {
64+
parsedUrl = new URL(url);
65+
} catch (error) {
66+
reject(new Error(`Invalid URL: ${error.message}`));
67+
return;
68+
}
69+
70+
const client = parsedUrl.protocol === 'https:' ? https : http;
71+
1272
client.get(url, (res) => {
1373
if (res.statusCode !== 200) {
1474
reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`));
1575
return;
1676
}
17-
77+
1878
let data = '';
1979
res.on('data', chunk => data += chunk);
2080
res.on('end', () => resolve(data));
2181
}).on('error', reject);
2282
});
2383
}
2484

85+
2586
// Helper function to extract snippet from content using comment markers
2687
function extractSnippet(content, snippetId = null) {
2788
const lines = content.split('\n');
28-
89+
2990
// Define comment patterns for different languages
3091
const commentPatterns = [
3192
// Hash-style comments (Python, Ruby, Shell, YAML, etc.)
@@ -37,11 +98,11 @@ function extractSnippet(content, snippetId = null) {
3798
// XML/HTML comments
3899
{ start: `<!--docs-start${snippetId ? `-${snippetId}` : ''}-->`, end: `<!--docs-end${snippetId ? `-${snippetId}` : ''}-->` }
39100
];
40-
101+
41102
for (const pattern of commentPatterns) {
42103
let startIndex = -1;
43104
let endIndex = -1;
44-
105+
45106
for (let i = 0; i < lines.length; i++) {
46107
const line = lines[i].trim();
47108
if (line.includes(pattern.start)) {
@@ -51,12 +112,12 @@ function extractSnippet(content, snippetId = null) {
51112
break;
52113
}
53114
}
54-
115+
55116
if (startIndex !== -1 && endIndex !== -1 && startIndex < endIndex) {
56117
return lines.slice(startIndex, endIndex).join('\n');
57118
}
58119
}
59-
120+
60121
// If no snippet markers found, return original content
61122
return content;
62123
}
@@ -67,34 +128,34 @@ function codeImportPlugin(context, options) {
67128
async loadContent() {
68129
// Find all markdown files in docs directory that might contain code imports
69130
const docsPath = path.join(context.siteDir, 'docs');
70-
131+
71132
const markdownFiles = [
72133
...glob.sync('**/*.md', { cwd: docsPath, absolute: true }),
73134
...glob.sync('**/*.mdx', { cwd: docsPath, absolute: true }),
74135
];
75136

76137
// Process each markdown file for code imports
77138
const processedFiles = [];
78-
139+
79140
for (const filePath of markdownFiles) {
80141
try {
81142
let content = fs.readFileSync(filePath, 'utf8');
82143
let modified = false;
83-
144+
84145
// Process code blocks with file= or url= syntax
85146
const fileUrlRegex = /```(\w+)?\s*((?:file|url)=[^\s\n]+)([^\n]*)\n([^`]*?)```/g;
86147
const matches = [...content.matchAll(fileUrlRegex)];
87-
148+
88149
for (const match of matches) {
89150
const [fullMatch, lang, param, additionalMeta, existingContent] = match;
90-
151+
91152
// Parse snippet parameter from additional metadata
92153
const snippetMatch = additionalMeta.match(/snippet=(\w+)/);
93154
const snippetId = snippetMatch ? snippetMatch[1] : null;
94-
155+
95156
try {
96157
let importedContent;
97-
158+
98159
if (param.startsWith('file=')) {
99160
// Handle file import
100161
const importPath = param.replace('file=', '');
@@ -105,6 +166,8 @@ function codeImportPlugin(context, options) {
105166
// Handle URL import
106167
const url = param.replace('url=', '');
107168
try {
169+
// Validate GitHub URL before fetching
170+
validateGitHubUrl(url);
108171
const rawContent = await fetchUrl(url);
109172
importedContent = extractSnippet(rawContent, snippetId);
110173
} catch (urlError) {
@@ -113,21 +176,21 @@ function codeImportPlugin(context, options) {
113176
continue; // Skip this replacement if URL fetch fails
114177
}
115178
}
116-
179+
117180
// Preserve the complete metadata
118181
const fullMeta = `${param}${additionalMeta}`;
119182
const metaStr = fullMeta ? ` ${fullMeta}` : '';
120183
const replacement = `\`\`\`${lang || ''}${metaStr}\n${importedContent}\n\`\`\``;
121-
184+
122185
content = content.replace(fullMatch, replacement);
123186
modified = true;
124-
187+
125188
} catch (error) {
126189
console.warn(`Could not process ${param} in ${filePath}: ${error.message}`);
127190
process.exit(1);
128191
}
129192
}
130-
193+
131194
if (modified) {
132195
processedFiles.push({
133196
path: filePath,
@@ -140,13 +203,13 @@ function codeImportPlugin(context, options) {
140203
process.exit(1);
141204
}
142205
}
143-
206+
144207
return { processedFiles };
145208
},
146-
209+
147210
async contentLoaded({ content, actions }) {
148211
const { processedFiles } = content;
149-
212+
150213
// Write processed files back to disk during build
151214
for (const file of processedFiles) {
152215
try {

0 commit comments

Comments
 (0)