diff --git a/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml b/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml new file mode 100644 index 00000000000..25059c60598 --- /dev/null +++ b/code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml @@ -0,0 +1,43 @@ +receivers: + filelog: + include: + - /opt/data/logs/access-unstructured.log + start_at: beginning + operators: + - type: regex_parser + regex: '^(?P[\d.]+)\s+-\s+-\s+\[(?P[^\]]+)\]\s+"(?P[A-Z]+)\s+(?P[^\s]+)\s+HTTP/[^\s]+"\s+(?P\d+)\s+(?P\d+)\s+"(?P[^"]*)"\s+"(?P[^"]*)"' + timestamp: + parse_from: attributes.timestamp + layout: '%d/%b/%Y:%H:%M:%S %z' + #22/Jan/2019:03:56:14 +0330 +processors: + batch: + timeout: 1s + send_batch_size: 100 + memory_limiter: + check_interval: 1s + limit_mib: 2048 + spike_limit_mib: 256 +exporters: + # HTTP setup + otlphttp/hdx: + endpoint: 'http://localhost:4318' + headers: + authorization: + compression: gzip + + # gRPC setup (alternative) + otlp/hdx: + endpoint: 'localhost:4317' + headers: + authorization: + compression: gzip +service: + telemetry: + metrics: + address: 0.0.0.0:9888 # Modified as 2 collectors running on same host + pipelines: + logs: + receivers: [filelog] + processors: [batch] + exporters: [otlphttp/hdx] diff --git a/contribute/style-guide.md b/contribute/style-guide.md index 02519179d42..69bec9e15fe 100644 --- a/contribute/style-guide.md +++ b/contribute/style-guide.md @@ -112,12 +112,44 @@ SELECT * FROM system.contributors; \``` ``` +Note: in the snippet above `\` is used only for formatting purposes in this guide. +You should not include it when you write markdown. + Code blocks: - Should always have a language defined immediately next to the opening 3 backticks, without any space. - Have a title (optional) such as 'Query' or 'Response' - Use language `response` if it is for the result of a query. +#### Importing code from files or URLs + +There are a few additional parameters you can include on a code block if you want +to import code. + +To import from a file use `file=`: + +```text +\```python file=code_snippets/integrations/example.py +Code will be inserted here +\``` +``` + +When `yarn build` is run, the code from the file will be inserted as text into +the code block. + +To import from a url use `url=`: + +```text +\```python url=https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/refs/heads/main/examples/pandas_examples.py +Code will be inserted here +\``` +``` + +You should commit the code inserted to the snippet as we want people (or LLMs) +reading the markdown to be able to see the code. The advantage of importing code +to snippets this way is that you can test your snippets externally or store them +wherever you want. + ### Highlighting You can highlight lines in a code block using the following keywords: diff --git a/docs/use-cases/observability/clickstack/ingesting-data/collector.md b/docs/use-cases/observability/clickstack/ingesting-data/collector.md index 8e61466701c..e55460191a0 100644 --- a/docs/use-cases/observability/clickstack/ingesting-data/collector.md +++ b/docs/use-cases/observability/clickstack/ingesting-data/collector.md @@ -161,8 +161,7 @@ The following configuration shows collection of this [unstructured log file](htt Note the use of operators to extract structure from the log lines (`regex_parser`) and filter events, along with a processor to batch events and limit memory usage. -```yaml -# config-unstructured-logs-with-processor.yaml +```yaml file=code_snippets/ClickStack/config-unstructured-logs-with-processor.yaml receivers: filelog: include: @@ -190,7 +189,7 @@ exporters: headers: authorization: compression: gzip - + # gRPC setup (alternative) otlp/hdx: endpoint: 'localhost:4317' diff --git a/docusaurus.config.en.js b/docusaurus.config.en.js index b930c2a57a2..ea2ff37236c 100644 --- a/docusaurus.config.en.js +++ b/docusaurus.config.en.js @@ -5,6 +5,7 @@ import chHeader from "./plugins/header.js"; import fixLinks from "./src/hooks/fixLinks.js"; const path = require('path'); const remarkCustomBlocks = require('./plugins/remark-custom-blocks'); +const codeImportPlugin = require('./plugins/code-import-plugin'); // Import custom plugins const { customParseFrontMatter } = require('./plugins/frontmatter-validation/customParseFrontMatter'); @@ -355,6 +356,10 @@ const config = { [ './plugins/tailwind-config.js', {} + ], + [ + codeImportPlugin, + {} ] ], customFields: { diff --git a/plugins/code-import-plugin.js b/plugins/code-import-plugin.js new file mode 100644 index 00000000000..aea0253eb59 --- /dev/null +++ b/plugins/code-import-plugin.js @@ -0,0 +1,115 @@ +const fs = require('fs'); +const path = require('path'); +const glob = require('glob'); +const https = require('https'); +const http = require('http'); + +// Helper function to fetch content from URL +function fetchUrl(url) { + return new Promise((resolve, reject) => { + const client = url.startsWith('https:') ? https : http; + + client.get(url, (res) => { + if (res.statusCode !== 200) { + reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`)); + return; + } + + let data = ''; + res.on('data', chunk => data += chunk); + res.on('end', () => resolve(data)); + }).on('error', reject); + }); +} + +function codeImportPlugin(context, options) { + return { + name: 'code-import-plugin', + async loadContent() { + // Find all markdown files in docs directory that might contain code imports + const docsPath = path.join(context.siteDir, 'docs'); + + const markdownFiles = [ + ...glob.sync('**/*.md', { cwd: docsPath, absolute: true }), + ...glob.sync('**/*.mdx', { cwd: docsPath, absolute: true }), + ]; + + // Process each markdown file for code imports + const processedFiles = []; + + for (const filePath of markdownFiles) { + try { + let content = fs.readFileSync(filePath, 'utf8'); + let modified = false; + + // Process code blocks with file= or url= syntax + const fileUrlRegex = /```(\w+)?\s*((?:file|url)=[^\s\n]+)([^\n]*)\n([^`]*?)```/g; + const matches = [...content.matchAll(fileUrlRegex)]; + + for (const match of matches) { + const [fullMatch, lang, param, additionalMeta, existingContent] = match; + + try { + let importedContent; + + if (param.startsWith('file=')) { + // Handle file import + const importPath = param.replace('file=', ''); + const absoluteImportPath = path.resolve(context.siteDir, importPath); + importedContent = fs.readFileSync(absoluteImportPath, 'utf8'); + } else if (param.startsWith('url=')) { + // Handle URL import + const url = param.replace('url=', ''); + try { + importedContent = await fetchUrl(url); + } catch (urlError) { + console.warn(`Could not fetch URL ${url} in ${filePath}: ${urlError.message}`); + continue; // Skip this replacement if URL fetch fails + } + } + + // Preserve the complete metadata + const fullMeta = `${param}${additionalMeta}`; + const metaStr = fullMeta ? ` ${fullMeta}` : ''; + const replacement = `\`\`\`${lang || ''}${metaStr}\n${importedContent}\`\`\``; + + content = content.replace(fullMatch, replacement); + modified = true; + + } catch (error) { + console.warn(`Could not process ${param} in ${filePath}: ${error.message}`); + } + } + + if (modified) { + processedFiles.push({ + path: filePath, + content: content, + originalPath: filePath + }); + } + } catch (error) { + console.warn(`Error processing file ${filePath}: ${error.message}`); + } + } + + return { processedFiles }; + }, + + async contentLoaded({ content, actions }) { + const { processedFiles } = content; + + // Write processed files back to disk during build + for (const file of processedFiles) { + try { + fs.writeFileSync(file.path, file.content, 'utf8'); + console.log(`Processed code imports in: ${path.relative(context.siteDir, file.path)}`); + } catch (error) { + console.error(`Error writing processed file ${file.path}: ${error.message}`); + } + } + } + }; +} + +module.exports = codeImportPlugin;