diff --git a/lib/vector-core/src/config/global_options.rs b/lib/vector-core/src/config/global_options.rs index 329cf1a58e006..d86df49803552 100644 --- a/lib/vector-core/src/config/global_options.rs +++ b/lib/vector-core/src/config/global_options.rs @@ -58,7 +58,7 @@ pub enum WildcardMatching { // // If this is modified, make sure those changes are reflected in the `ConfigBuilder::append` // function! -#[configurable_component(global_option("global_option"))] +#[configurable_component] #[derive(Clone, Debug, Default, PartialEq)] pub struct GlobalOptions { /// The directory used for persisting Vector state data. diff --git a/scripts/generate-component-docs.rb b/scripts/generate-component-docs.rb index 8a3554b39c23c..9cd0204f2f906 100755 --- a/scripts/generate-component-docs.rb +++ b/scripts/generate-component-docs.rb @@ -1722,28 +1722,134 @@ def render_and_import_component_schema(root_schema, schema_name, component_type, ) end -def render_and_import_generated_global_option_schema(root_schema, global_options) - global_option_schema = {} +def render_and_import_generated_top_level_config_schema(root_schema) + top_level_config_schema = {} + + # Define logical groupings for top-level configuration fields + # These groups will be used to organize separate documentation pages + field_groups = { + # Pipeline component containers + 'sources' => 'pipeline_components', + 'transforms' => 'pipeline_components', + 'sinks' => 'pipeline_components', + 'enrichment_tables' => 'pipeline_components', + + # Individual feature pages + 'api' => 'api', + 'schema' => 'schema', + 'log_schema' => 'schema', + 'secret' => 'secrets', + + # Global options (everything else defaults to this) + } + + group_metadata = { + 'global_options' => { + 'title' => 'Global Options', + 'description' => 'Global configuration options that apply to Vector as a whole.', + 'order' => 1 + }, + 'pipeline_components' => { + 'title' => 'Pipeline Components', + 'description' => 'Configure sources, transforms, sinks, and enrichment tables for your observability pipeline.', + 'order' => 2 + }, + 'api' => { + 'title' => 'API', + 'description' => 'Configure Vector\'s observability API.', + 'order' => 3 + }, + 'schema' => { + 'title' => 'Schema', + 'description' => 'Configure Vector\'s internal schema system for type tracking and validation.', + 'order' => 4 + }, + 'secrets' => { + 'title' => 'Secrets', + 'description' => 'Configure secrets management for secure configuration.', + 'order' => 5 + } + } + + # Usage of #[serde(flatten)] creates multiple schemas in the `allOf` array: + # - One or more schemas contain ConfigBuilder's direct fields + # - One or more schemas contain flattened GlobalOptions fields + all_of_schemas = root_schema['allOf'] || [] + + if all_of_schemas.empty? + @logger.error "Could not find ConfigBuilder allOf schemas in root schema" + return + end + + # Collect all properties from all allOf schemas into a single hash. + # Since ConfigBuilder uses #[serde(flatten)], field names are unique across all schemas. + all_properties = all_of_schemas.reduce({}) do |acc, schema| + acc.merge(schema['properties'] || {}) + end - global_options.each do |component_name, schema_name| - friendly_name = "'#{component_name}' #{schema_name} configuration" + @logger.info "[*] Found #{all_properties.keys.length} total properties across #{all_of_schemas.length} allOf schemas" - if component_name == "global_option" - # Flattening global options - unwrap_resolved_schema(root_schema, schema_name, friendly_name) - .each { |name, schema| global_option_schema[name] = schema } + # Process each property once + all_properties.each do |field_name, field_schema| + # Skip fields marked with docs::hidden + metadata = field_schema['_metadata'] || {} + if metadata['docs::hidden'] + @logger.info "[*] Skipping '#{field_name}' (marked as docs::hidden)" + next + end + + # Extract and resolve the field + @logger.info "[*] Extracting '#{field_name}' field from ConfigBuilder..." + resolved_field = resolve_schema(root_schema, field_schema) + + # Assign group metadata to organize the documentation + if field_groups.key?(field_name) + group_name = field_groups[field_name] + resolved_field['group'] = group_name + @logger.debug "Assigned '#{field_name}' to group '#{group_name}'" else - # Resolving and assigning other global options - global_option_schema[component_name] = resolve_schema_by_name(root_schema, schema_name) + # Default to global_options for any fields not explicitly grouped + resolved_field['group'] = 'global_options' + @logger.debug "Assigned '#{field_name}' to default group 'global_options'" end + + top_level_config_schema[field_name] = resolved_field + @logger.info "[✓] Resolved '#{field_name}'" end - render_and_import_schema( - global_option_schema, - "configuration", - ["generated", "configuration"], - "generated/configuration.cue" - ) + # Build the final data structure with both configuration and group metadata + friendly_name = "configuration" + config_map_path = ["generated", "configuration"] + cue_relative_path = "generated/configuration.cue" + + # Set up the structure for the value based on the configuration map path + data = {} + last = data + config_map_path.each do |segment| + last[segment] = {} if last[segment].nil? + last = last[segment] + end + + # Add both the configuration schema and the group metadata + last['configuration'] = top_level_config_schema + last['groups'] = group_metadata + + config_map_path.prepend('config-schema-base') + tmp_file_prefix = config_map_path.join('-') + final_json = to_pretty_json(data) + + # Write the resolved schema as JSON + json_output_file = write_to_temp_file(["config-schema-#{tmp_file_prefix}-", '.json'], final_json) + @logger.info "[✓] Wrote #{friendly_name} schema to '#{json_output_file}'. (#{final_json.length} bytes)" + + # Import it as Cue + @logger.info "[*] Importing #{friendly_name} schema as Cue file..." + cue_output_file = "website/cue/reference/#{cue_relative_path}" + unless system(@cue_binary_path, 'import', '-f', '-o', cue_output_file, '-p', 'metadata', json_output_file) + @logger.error "[!] Failed to import #{friendly_name} schema as valid Cue." + exit 1 + end + @logger.info "[✓] Imported #{friendly_name} schema to '#{cue_output_file}'." end if ARGV.empty? @@ -1792,12 +1898,7 @@ def render_and_import_generated_global_option_schema(root_schema, global_options end end -# At last, we generate the global options configuration. -global_options = root_schema['definitions'].filter_map do |key, definition| - component_type = get_schema_metadata(definition, 'docs::component_type') - component_name = get_schema_metadata(definition, 'docs::component_name') - { component_name => key } if component_type == "global_option" -end -.reduce { |acc, item| nested_merge(acc, item) } - -render_and_import_generated_global_option_schema(root_schema, global_options) +# Finally, generate the top-level Vector configuration schema. We extract ALL top-level config fields directly from the +# ConfigBuilder struct (defined in src/config/builder.rs) by processing its allOf schemas. ConfigBuilder is the single +# source of truth for what's actually allowed at the top level of Vector's configuration file. +render_and_import_generated_top_level_config_schema(root_schema) diff --git a/src/config/api.rs b/src/config/api.rs index ed14ce06976d6..a5dc097b60ef0 100644 --- a/src/config/api.rs +++ b/src/config/api.rs @@ -4,7 +4,7 @@ use url::Url; use vector_lib::configurable::configurable_component; /// API options. -#[configurable_component(global_option("api"))] +#[configurable_component] #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(default, deny_unknown_fields)] pub struct Options { diff --git a/src/config/builder.rs b/src/config/builder.rs index f732d37c89665..cce736f03b707 100644 --- a/src/config/builder.rs +++ b/src/config/builder.rs @@ -25,7 +25,6 @@ pub struct ConfigBuilder { pub api: api::Options, #[configurable(derived)] - #[configurable(metadata(docs::hidden))] #[serde(default)] pub schema: schema::Options, @@ -34,22 +33,27 @@ pub struct ConfigBuilder { pub healthchecks: HealthcheckOptions, /// All configured enrichment tables. + #[configurable(metadata(docs::additional_props_description = "An enrichment table."))] #[serde(default)] pub enrichment_tables: IndexMap>, /// All configured sources. + #[configurable(metadata(docs::additional_props_description = "A source."))] #[serde(default)] pub sources: IndexMap, /// All configured sinks. + #[configurable(metadata(docs::additional_props_description = "A sink."))] #[serde(default)] pub sinks: IndexMap>, /// All configured transforms. + #[configurable(metadata(docs::additional_props_description = "A transform."))] #[serde(default)] pub transforms: IndexMap>, /// All configured unit tests. + #[configurable(metadata(docs::hidden))] #[serde(default)] pub tests: Vec>, @@ -57,9 +61,11 @@ pub struct ConfigBuilder { /// /// Configuration providers allow sourcing configuration information from a source other than /// the typical configuration files that must be passed to Vector. + #[configurable(metadata(docs::hidden))] pub provider: Option, /// All configured secrets backends. + #[configurable(metadata(docs::additional_props_description = "A secret backend."))] #[serde(default)] pub secret: IndexMap, diff --git a/src/config/mod.rs b/src/config/mod.rs index fc36a9aa2364b..dceade72d6eeb 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -270,7 +270,7 @@ impl Config { } /// Healthcheck options. -#[configurable_component(global_option("healthchecks"))] +#[configurable_component] #[derive(Clone, Copy, Debug)] #[serde(default)] pub struct HealthcheckOptions { diff --git a/src/config/schema.rs b/src/config/schema.rs index 3a4cd987e7951..388765b29e1b9 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -3,19 +3,41 @@ use vector_lib::{config::LogNamespace, configurable::configurable_component}; pub(crate) use crate::schema::Definition; /// Schema options. +/// +/// **Note:** The `enabled` and `validation` options are experimental and should only be enabled if you +/// understand the limitations. While the infrastructure exists for schema tracking and validation, the +/// full vision of automatic semantic field mapping and comprehensive schema enforcement was never fully +/// realized. +/// +/// If you encounter issues with these features, please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). #[configurable_component] #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(default, deny_unknown_fields)] pub struct Options { - /// Whether or not schema is enabled. + /// When enabled, Vector tracks the schema (field types and structure) of events as they flow + /// from sources through transforms to sinks. This allows Vector to understand what data each + /// component receives and produces. #[serde(default = "default_enabled")] pub enabled: bool, - /// Whether or not schema validation is enabled. + /// When enabled, Vector validates that events flowing into each sink match the schema + /// requirements of that sink. If a sink requires certain fields or types that are missing + /// from the incoming events, Vector will report an error during configuration validation. + /// + /// This helps catch pipeline configuration errors early, before runtime. #[serde(default = "default_validation")] pub validation: bool, - /// Whether or not to enable log namespacing. + /// Controls how metadata is stored in log events. + /// + /// When set to `false` (legacy mode), metadata fields like `host`, `timestamp`, and `source_type` + /// are stored as top-level fields alongside your log data. + /// + /// When set to `true` (Vector namespace mode), metadata is stored in a separate metadata namespace, + /// keeping it distinct from your actual log data. + /// + /// See the [Log Namespacing guide](/guides/level-up/log_namespace/) for detailed information + /// about when to use Vector namespace mode and how to migrate from legacy mode. pub log_namespace: Option, } diff --git a/src/enrichment_tables/mod.rs b/src/enrichment_tables/mod.rs index f814e175f576e..fb693b180308b 100644 --- a/src/enrichment_tables/mod.rs +++ b/src/enrichment_tables/mod.rs @@ -35,7 +35,7 @@ pub mod mmdb; /// condition. We don't recommend using a condition that uses only date range searches. /// /// -#[configurable_component(global_option("enrichment_tables"))] +#[configurable_component] #[derive(Clone, Debug)] #[serde(tag = "type", rename_all = "snake_case")] #[enum_dispatch(EnrichmentTableConfig)] @@ -67,6 +67,21 @@ pub enum EnrichmentTables { Mmdb(mmdb::MmdbConfig), } +// Manual NamedComponent impl required because enum_dispatch doesn't support it yet. +impl vector_lib::configurable::NamedComponent for EnrichmentTables { + fn get_component_name(&self) -> &'static str { + match self { + Self::File(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-memory")] + Self::Memory(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-geoip")] + Self::Geoip(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-mmdb")] + Self::Mmdb(config) => config.get_component_name(), + } + } +} + impl GenerateConfig for EnrichmentTables { fn generate_config() -> toml::Value { toml::Value::try_from(Self::File(file::FileConfig { diff --git a/src/providers/http.rs b/src/providers/http.rs index 975db2e17f319..5555a99e0e98d 100644 --- a/src/providers/http.rs +++ b/src/providers/http.rs @@ -21,6 +21,7 @@ use crate::{ #[derive(Clone, Debug)] pub struct RequestConfig { /// HTTP headers to add to the request. + #[configurable(metadata(docs::additional_props_description = "An HTTP header."))] #[serde(default)] pub headers: IndexMap, } diff --git a/src/secrets/mod.rs b/src/secrets/mod.rs index 162e589f23f13..19bc6471ee96e 100644 --- a/src/secrets/mod.rs +++ b/src/secrets/mod.rs @@ -51,7 +51,7 @@ mod test; /// Secrets are loaded when Vector starts or if Vector receives a `SIGHUP` signal triggering its /// configuration reload process. #[allow(clippy::large_enum_variant)] -#[configurable_component(global_option("secret"))] +#[configurable_component] #[derive(Clone, Debug)] #[enum_dispatch(SecretBackend)] #[serde(tag = "type", rename_all = "snake_case")] @@ -79,6 +79,20 @@ pub enum SecretBackends { Test(test::TestBackend), } +// Manual NamedComponent impl required because enum_dispatch doesn't support it yet. +impl vector_lib::configurable::NamedComponent for SecretBackends { + fn get_component_name(&self) -> &'static str { + match self { + Self::File(config) => config.get_component_name(), + Self::Directory(config) => config.get_component_name(), + Self::Exec(config) => config.get_component_name(), + #[cfg(feature = "secrets-aws-secrets-manager")] + Self::AwsSecretsManager(config) => config.get_component_name(), + Self::Test(config) => config.get_component_name(), + } + } +} + impl GenerateConfig for SecretBackends { fn generate_config() -> toml::Value { toml::Value::try_from(Self::File(file::FileBackend { diff --git a/website/content/en/docs/reference/configuration/api.md b/website/content/en/docs/reference/configuration/api.md new file mode 100644 index 0000000000000..e3ba6a0372ff7 --- /dev/null +++ b/website/content/en/docs/reference/configuration/api.md @@ -0,0 +1,14 @@ +--- +title: API configuration reference +short: API +weight: 6 +show_toc: true +--- + +This page documents the configuration for Vector's observability API. + +The API enables you to query Vector's topology, metrics, and health information through a GraphQL endpoint, as well as access an interactive GraphQL playground for development. + +{{< config-cross-links group="api" >}} + +{{< config/group group="api" >}} diff --git a/website/content/en/docs/reference/configuration/global-options.md b/website/content/en/docs/reference/configuration/global-options.md index d3c2059f3c88b..6e2c296a476ce 100644 --- a/website/content/en/docs/reference/configuration/global-options.md +++ b/website/content/en/docs/reference/configuration/global-options.md @@ -3,8 +3,11 @@ title: Global options reference short: Global options weight: 4 aliases: ["/docs/reference/global-options"] +show_toc: true --- -## Global configuration parameters +This page documents global configuration options that apply to Vector as a whole, such as data directories, timezone settings, logging configuration, and more. -{{< config/global >}} +{{< config-cross-links group="global_options" >}} + +{{< config/group group="global_options" >}} diff --git a/website/content/en/docs/reference/configuration/pipeline-components.md b/website/content/en/docs/reference/configuration/pipeline-components.md new file mode 100644 index 0000000000000..c58bd681416e9 --- /dev/null +++ b/website/content/en/docs/reference/configuration/pipeline-components.md @@ -0,0 +1,14 @@ +--- +title: Pipeline components reference +short: Pipeline components +weight: 5 +show_toc: true +--- + +This page documents the top-level configuration for pipeline components: sources, transforms, sinks, and enrichment tables. + +These fields define the structure of your observability data pipeline. Each component is defined as a table within these sections, with component-specific configuration options. + +{{< config-cross-links group="pipeline_components" >}} + +{{< config/group group="pipeline_components" >}} diff --git a/website/content/en/docs/reference/configuration/schema.md b/website/content/en/docs/reference/configuration/schema.md new file mode 100644 index 0000000000000..94f380ed1a19b --- /dev/null +++ b/website/content/en/docs/reference/configuration/schema.md @@ -0,0 +1,12 @@ +--- +title: Schema configuration reference +short: Schema +weight: 7 +show_toc: true +--- + +This page documents the configuration for Vector's internal schema system. + +{{< config-cross-links group="schema" >}} + +{{< config/group group="schema" >}} diff --git a/website/content/en/docs/reference/configuration/secrets.md b/website/content/en/docs/reference/configuration/secrets.md new file mode 100644 index 0000000000000..5137831498929 --- /dev/null +++ b/website/content/en/docs/reference/configuration/secrets.md @@ -0,0 +1,14 @@ +--- +title: Secrets configuration reference +short: Secrets +weight: 8 +show_toc: true +--- + +This page documents the configuration for Vector's secrets management. + +Secrets allow you to securely store and reference sensitive configuration values like API keys, passwords, and tokens without exposing them in plaintext configuration files. + +{{< config-cross-links group="secrets" >}} + +{{< config/group group="secrets" >}} diff --git a/website/content/en/guides/level-up/log_namespace.md b/website/content/en/guides/level-up/log_namespace.md index 14ae5fc02230e..5826b27ac6ba8 100644 --- a/website/content/en/guides/level-up/log_namespace.md +++ b/website/content/en/guides/level-up/log_namespace.md @@ -18,8 +18,26 @@ Before you begin, this guide assumes the following: [global schema settings]: /docs/reference/configuration/global-options/#log_schema [docs.setup.quickstart]: /docs/setup/quickstart/ + +If you encounter any issues please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). + {{< /requirement >}} +## Background + +Vector traditionally stored metadata (like `host`, `timestamp`, and `source_type`) as top-level +fields alongside your log data. This "legacy" approach has a few drawbacks: + +* **Field name collisions**: If your logs contain a field named `host`, it could conflict with + Vector's metadata field +* **Unclear ownership**: It's not immediately obvious which fields are from your data and which + are Vector metadata +* **Difficult transformations**: When you want to transform only your data (not metadata), you + need to be careful to exclude metadata fields + +The Vector namespace mode solves these issues by storing metadata in a separate namespace, +completely isolated from your log data. + ## Default Behavior ### Vector Config @@ -190,3 +208,58 @@ Sample output from `json_console`: ```json "bar" ``` + +## Migration Considerations + +If you're considering migrating from legacy mode (`log_namespace = false`) to Vector namespace mode +(`log_namespace = true`), here are key things to be aware of: + +### VRL Updates + +VRL scripts that reference metadata fields will need to be updated to use the metadata accessor syntax: + +**Legacy mode:** + +```coffee +.host = "new-host" +.timestamp = now() +``` + +**Vector namespace mode:** + +```coffee +%vector.host = "new-host" +%vector.ingest_timestamp = now() +``` + +### Sink Behavior Differences + +Many sinks will behave differently depending on the namespace setting. Always test your sinks after switching modes to verify expected +behavior before deploying. + +### Gradual Migration Strategy + +You can configure `log_namespace` per-source if you need a gradual migration: + +```yaml +# Global default (legacy) +schema: + log_namespace: false + +sources: + # New source using Vector namespace + new_source: + type: http_server + log_namespace: true + + # Existing source still using legacy + existing_source: + type: file + # Uses global default (false) +``` + +This allows you to: + +1. Keep existing pipelines working with legacy mode +2. Adopt Vector namespace mode for selected sources only +3. Migrate sources incrementally over time diff --git a/website/cue/reference/configuration.cue b/website/cue/reference/configuration.cue index 2321cea179a15..38fb761216d4a 100644 --- a/website/cue/reference/configuration.cue +++ b/website/cue/reference/configuration.cue @@ -6,11 +6,13 @@ configuration: { outputs: [components.#Output, ...components.#Output] } } + groups?: _ how_it_works: #HowItWorks } configuration: { configuration: generated.configuration.configuration + groups: generated.configuration.groups configuration: { // expire_metrics's type is a little bit tricky, we could not generate `uint` from `docs::type_override` metadata macro easily. @@ -25,6 +27,7 @@ configuration: { """ required: false warnings: ["Deprecated, please use `expire_metrics_secs` instead."] + group: "global_options" type: object: options: { secs: { common: true diff --git a/website/cue/reference/generated/configuration.cue b/website/cue/reference/generated/configuration.cue index bcfe0a2b4ac70..97d71dfeac5a6 100644 --- a/website/cue/reference/generated/configuration.cue +++ b/website/cue/reference/generated/configuration.cue @@ -1,1090 +1,1547 @@ package metadata -generated: configuration: configuration: { - healthchecks: { - type: object: options: { - enabled: { - type: bool: default: true - description: """ - Whether or not healthchecks are enabled for all sinks. - - Can be overridden on a per-sink basis. - """ - required: false - } - require_healthy: { - type: bool: default: false - description: """ - Whether or not to require a sink to report as being healthy during startup. - - When enabled and a sink reports not being healthy, Vector will exit during start-up. - - Can be alternatively set, and overridden by, the `--require-healthy` command-line flag. - """ - required: false - } - } - description: "Healthcheck options." - } - api: { - type: object: options: { - address: { - type: string: { - default: "127.0.0.1:8686" - examples: ["0.0.0.0:8686", "127.0.0.1:1234"] +generated: configuration: { + configuration: { + api: { + type: object: options: { + address: { + type: string: { + default: "127.0.0.1:8686" + examples: ["0.0.0.0:8686", "127.0.0.1:1234"] + } + description: """ + The network address to which the API should bind. If you're running + Vector in a Docker container, bind to `0.0.0.0`. Otherwise + the API will not be exposed outside the container. + """ + common: true + required: false + } + enabled: { + type: bool: default: false + description: "Whether the GraphQL API is enabled for this Vector instance." + common: true + required: false + } + graphql: { + type: bool: default: true + description: """ + Whether the endpoint for receiving and processing GraphQL queries is + enabled for the API. The endpoint is accessible via the `/graphql` + endpoint of the address set using the `bind` parameter. + """ + common: true + required: false + } + playground: { + type: bool: default: true + description: """ + Whether the [GraphQL Playground](https://github.com/graphql/graphql-playground) is enabled + for the API. The Playground is accessible via the `/playground` endpoint + of the address set using the `bind` parameter. Note that the `playground` + endpoint will only be enabled if the `graphql` endpoint is also enabled. + """ + common: false + required: false } - description: """ - The network address to which the API should bind. If you're running - Vector in a Docker container, bind to `0.0.0.0`. Otherwise - the API will not be exposed outside the container. - """ - common: true - required: false - } - enabled: { - type: bool: default: false - description: "Whether the GraphQL API is enabled for this Vector instance." - common: true - required: false - } - graphql: { - type: bool: default: true - description: """ - Whether the endpoint for receiving and processing GraphQL queries is - enabled for the API. The endpoint is accessible via the `/graphql` - endpoint of the address set using the `bind` parameter. - """ - common: true - required: false - } - playground: { - type: bool: default: true - description: """ - Whether the [GraphQL Playground](https://github.com/graphql/graphql-playground) is enabled - for the API. The Playground is accessible via the `/playground` endpoint - of the address set using the `bind` parameter. Note that the `playground` - endpoint will only be enabled if the `graphql` endpoint is also enabled. - """ - common: false - required: false } + description: "API options." + group: "api" } - description: "API options." - } - enrichment_tables: { - type: object: options: { - file: { + enrichment_tables: { + type: object: options: "*": { type: object: options: { - encoding: { + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph + + They are added to the node as provided + """ + required: false + } + description: """ + Extra graph configuration + + Configure output for component when generated with graph command + """ + required: false + } + inputs: { + type: array: { + items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] + default: [] + } + description: """ + A list of upstream [source][sources] or [transform][transforms] IDs. + + Wildcards (`*`) are supported. + + See [configuration][configuration] for more info. + + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ + """ + required: false + } + file: { type: object: options: { - delimiter: { - type: string: default: "," - description: "The delimiter used to separate fields in each row of the CSV file." - required: false + encoding: { + type: object: options: { + delimiter: { + type: string: default: "," + description: "The delimiter used to separate fields in each row of the CSV file." + required: false + } + include_headers: { + type: bool: default: true + description: """ + Whether or not the file contains column headers. + + When set to `true`, the first row of the CSV file will be read as the header row, and + the values will be used for the names of each column. This is the default behavior. + + When set to `false`, columns are referred to by their numerical index. + """ + required: false + } + type: { + required: true + type: string: enum: csv: """ + Decodes the file as a [CSV][csv] (comma-separated values) file. + + [csv]: https://wikipedia.org/wiki/Comma-separated_values + """ + description: "File encoding type." + } + } + description: "File encoding configuration." + required: true } - include_headers: { - type: bool: default: true + path: { + type: string: {} description: """ - Whether or not the file contains column headers. + The path of the enrichment table file. - When set to `true`, the first row of the CSV file will be read as the header row, and - the values will be used for the names of each column. This is the default behavior. + Currently, only [CSV][csv] files are supported. - When set to `false`, columns are referred to by their numerical index. + [csv]: https://en.wikipedia.org/wiki/Comma-separated_values """ - required: false - } - type: { required: true - type: string: enum: csv: """ - Decodes the file as a [CSV][csv] (comma-separated values) file. - - [csv]: https://wikipedia.org/wiki/Comma-separated_values - """ - description: "File encoding type." } } - description: "File encoding configuration." - required: true + description: "File-specific settings." + required: true + relevant_when: "type = \"file\"" } - path: { - type: string: {} + schema: { + type: object: options: "*": { + type: string: {} + required: true + description: "Represents mapped log field names and types." + } description: """ - The path of the enrichment table file. - - Currently, only [CSV][csv] files are supported. - - [csv]: https://en.wikipedia.org/wiki/Comma-separated_values + Key/value pairs representing mapped log field names and types. + + This is used to coerce log fields from strings into their proper types. The available types are listed in the `Types` list below. + + Timestamp coercions need to be prefaced with `timestamp|`, for example `"timestamp|%F"`. Timestamp specifiers can use either of the following: + + 1. One of the built-in-formats listed in the `Timestamp Formats` table below. + 2. The [time format specifiers][chrono_fmt] from Rust’s `chrono` library. + + Types + + - **`bool`** + - **`string`** + - **`float`** + - **`integer`** + - **`date`** + - **`timestamp`** (see the table below for formats) + + Timestamp Formats + + | Format | Description | Example | + |----------------------|----------------------------------------------------------------------------------|----------------------------------| + | `%F %T` | `YYYY-MM-DD HH:MM:SS` | `2020-12-01 02:37:54` | + | `%v %T` | `DD-Mmm-YYYY HH:MM:SS` | `01-Dec-2020 02:37:54` | + | `%FT%T` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], without time zone | `2020-12-01T02:37:54` | + | `%FT%TZ` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC | `2020-12-01T09:37:54Z` | + | `%+` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC, with time zone | `2020-12-01T02:37:54-07:00` | + | `%a, %d %b %Y %T` | [RFC 822][rfc822]/[RFC 2822][rfc2822], without time zone | `Tue, 01 Dec 2020 02:37:54` | + | `%a %b %e %T %Y` | [ctime][ctime] format | `Tue Dec 1 02:37:54 2020` | + | `%s` | [UNIX timestamp][unix_ts] | `1606790274` | + | `%a %d %b %T %Y` | [date][date] command, without time zone | `Tue 01 Dec 02:37:54 2020` | + | `%a %d %b %T %Z %Y` | [date][date] command, with time zone | `Tue 01 Dec 02:37:54 PST 2020` | + | `%a %d %b %T %z %Y` | [date][date] command, with numeric time zone | `Tue 01 Dec 02:37:54 -0700 2020` | + | `%a %d %b %T %#z %Y` | [date][date] command, with numeric time zone (minutes can be missing or present) | `Tue 01 Dec 02:37:54 -07 2020` | + + [date]: https://man7.org/linux/man-pages/man1/date.1.html + [ctime]: https://www.cplusplus.com/reference/ctime + [unix_ts]: https://en.wikipedia.org/wiki/Unix_time + [rfc822]: https://tools.ietf.org/html/rfc822#section-5 + [rfc2822]: https://tools.ietf.org/html/rfc2822#section-3.3 + [iso8601]: https://en.wikipedia.org/wiki/ISO_8601 + [rfc3339]: https://tools.ietf.org/html/rfc3339 + [chrono_fmt]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers """ - required: true + required: false + relevant_when: "type = \"file\"" } - } - description: "File-specific settings." - required: true - relevant_when: "type = \"file\"" - } - schema: { - type: object: options: "*": { - type: string: {} - required: true - description: "Represents mapped log field names and types." - } - description: """ - Key/value pairs representing mapped log field names and types. - - This is used to coerce log fields from strings into their proper types. The available types are listed in the `Types` list below. - - Timestamp coercions need to be prefaced with `timestamp|`, for example `"timestamp|%F"`. Timestamp specifiers can use either of the following: - - 1. One of the built-in-formats listed in the `Timestamp Formats` table below. - 2. The [time format specifiers][chrono_fmt] from Rust’s `chrono` library. - - Types - - - **`bool`** - - **`string`** - - **`float`** - - **`integer`** - - **`date`** - - **`timestamp`** (see the table below for formats) - - Timestamp Formats - - | Format | Description | Example | - |----------------------|----------------------------------------------------------------------------------|----------------------------------| - | `%F %T` | `YYYY-MM-DD HH:MM:SS` | `2020-12-01 02:37:54` | - | `%v %T` | `DD-Mmm-YYYY HH:MM:SS` | `01-Dec-2020 02:37:54` | - | `%FT%T` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], without time zone | `2020-12-01T02:37:54` | - | `%FT%TZ` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC | `2020-12-01T09:37:54Z` | - | `%+` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC, with time zone | `2020-12-01T02:37:54-07:00` | - | `%a, %d %b %Y %T` | [RFC 822][rfc822]/[RFC 2822][rfc2822], without time zone | `Tue, 01 Dec 2020 02:37:54` | - | `%a %b %e %T %Y` | [ctime][ctime] format | `Tue Dec 1 02:37:54 2020` | - | `%s` | [UNIX timestamp][unix_ts] | `1606790274` | - | `%a %d %b %T %Y` | [date][date] command, without time zone | `Tue 01 Dec 02:37:54 2020` | - | `%a %d %b %T %Z %Y` | [date][date] command, with time zone | `Tue 01 Dec 02:37:54 PST 2020` | - | `%a %d %b %T %z %Y` | [date][date] command, with numeric time zone | `Tue 01 Dec 02:37:54 -0700 2020` | - | `%a %d %b %T %#z %Y` | [date][date] command, with numeric time zone (minutes can be missing or present) | `Tue 01 Dec 02:37:54 -07 2020` | - - [date]: https://man7.org/linux/man-pages/man1/date.1.html - [ctime]: https://www.cplusplus.com/reference/ctime - [unix_ts]: https://en.wikipedia.org/wiki/Unix_time - [rfc822]: https://tools.ietf.org/html/rfc822#section-5 - [rfc2822]: https://tools.ietf.org/html/rfc2822#section-3.3 - [iso8601]: https://en.wikipedia.org/wiki/ISO_8601 - [rfc3339]: https://tools.ietf.org/html/rfc3339 - [chrono_fmt]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers - """ - required: false - relevant_when: "type = \"file\"" - } - flush_interval: { - type: uint: {} - description: """ - The interval used for making writes visible in the table. - Longer intervals might get better performance, - but there is a longer delay before the data is visible in the table. - Since every TTL scan makes its changes visible, only use this value - if it is shorter than the `scan_interval`. - - By default, all writes are made visible immediately. - """ - required: false - relevant_when: "type = \"memory\"" - } - internal_metrics: { - type: object: options: include_key_tag: { - type: bool: default: false - description: """ - Determines whether to include the key tag on internal metrics. - - This is useful for distinguishing between different keys while monitoring. However, the tag's - cardinality is unbounded. - """ - required: false - } - description: "Configuration of internal metrics" - required: false - relevant_when: "type = \"memory\"" - } - max_byte_size: { - type: uint: {} - description: """ - Maximum size of the table in bytes. All insertions that make - this table bigger than the maximum size are rejected. + flush_interval: { + type: uint: {} + description: """ + The interval used for making writes visible in the table. + Longer intervals might get better performance, + but there is a longer delay before the data is visible in the table. + Since every TTL scan makes its changes visible, only use this value + if it is shorter than the `scan_interval`. - By default, there is no size limit. - """ - required: false - relevant_when: "type = \"memory\"" - } - scan_interval: { - type: uint: default: 30 - description: """ - The scan interval used to look for expired records. This is provided - as an optimization to ensure that TTL is updated, but without doing - too many cache scans. - """ - required: false - relevant_when: "type = \"memory\"" - } - source_config: { - type: object: options: { - export_batch_size: { + By default, all writes are made visible immediately. + """ + required: false + relevant_when: "type = \"memory\"" + } + internal_metrics: { + type: object: options: include_key_tag: { + type: bool: default: false + description: """ + Determines whether to include the key tag on internal metrics. + + This is useful for distinguishing between different keys while monitoring. However, the tag's + cardinality is unbounded. + """ + required: false + } + description: "Configuration of internal metrics" + required: false + relevant_when: "type = \"memory\"" + } + max_byte_size: { type: uint: {} description: """ - Batch size for data exporting. Used to prevent exporting entire table at - once and blocking the system. + Maximum size of the table in bytes. All insertions that make + this table bigger than the maximum size are rejected. - By default, batches are not used and entire table is exported. + By default, there is no size limit. """ - required: false + required: false + relevant_when: "type = \"memory\"" } - export_expired_items: { - type: bool: default: false + scan_interval: { + type: uint: default: 30 description: """ - Set to true to export expired items via the `expired` output port. - Expired items ignore other settings and are exported as they are flushed from the table. + The scan interval used to look for expired records. This is provided + as an optimization to ensure that TTL is updated, but without doing + too many cache scans. """ - required: false + required: false + relevant_when: "type = \"memory\"" } - export_interval: { - type: uint: {} - description: "Interval for exporting all data from the table when used as a source." - required: false + source_config: { + type: object: options: { + export_batch_size: { + type: uint: {} + description: """ + Batch size for data exporting. Used to prevent exporting entire table at + once and blocking the system. + + By default, batches are not used and entire table is exported. + """ + required: false + } + export_expired_items: { + type: bool: default: false + description: """ + Set to true to export expired items via the `expired` output port. + Expired items ignore other settings and are exported as they are flushed from the table. + """ + required: false + } + export_interval: { + type: uint: {} + description: "Interval for exporting all data from the table when used as a source." + required: false + } + remove_after_export: { + type: bool: default: false + description: """ + If set to true, all data will be removed from cache after exporting. + Only valid if used as a source and export_interval > 0 + + By default, export will not remove data from cache + """ + required: false + } + source_key: { + type: string: {} + description: """ + Key to use for this component when used as a source. This must be different from the + component key. + """ + required: true + } + } + description: "Configuration for source functionality." + required: false + relevant_when: "type = \"memory\"" } - remove_after_export: { - type: bool: default: false + ttl: { + type: uint: default: 600 + description: """ + TTL (time-to-live in seconds) is used to limit the lifetime of data stored in the cache. + When TTL expires, data behind a specific key in the cache is removed. + TTL is reset when the key is replaced. + """ + required: false + relevant_when: "type = \"memory\"" + } + ttl_field: { + type: string: default: "" + description: "Field in the incoming value used as the TTL override." + required: false + relevant_when: "type = \"memory\"" + } + locale: { + type: string: default: "en" description: """ - If set to true, all data will be removed from cache after exporting. - Only valid if used as a source and export_interval > 0 + The locale to use when querying the database. - By default, export will not remove data from cache + MaxMind includes localized versions of some of the fields within their database, such as + country name. This setting can control which of those localized versions are returned by the + transform. + + More information on which portions of the geolocation data are localized, and what languages + are available, can be found [here][locale_docs]. + + [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q """ - required: false + required: false + relevant_when: "type = \"geoip\"" } - source_key: { + path: { type: string: {} description: """ - Key to use for this component when used as a source. This must be different from the - component key. + Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2] + (**GeoLite2-City.mmdb**). + + Other databases, such as the country database, are not supported. + `mmdb` enrichment table can be used for other databases. + + [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable + [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access """ + required: true + relevant_when: "type = \"geoip\" or type = \"mmdb\"" + } + type: { required: true + type: string: enum: { + file: "Exposes data from a static file as an enrichment table." + memory: """ + Exposes data from a memory cache as an enrichment table. The cache can be written to using + a sink. + """ + geoip: """ + Exposes data from a [MaxMind][maxmind] [GeoIP2][geoip2] database as an enrichment table. + + [maxmind]: https://www.maxmind.com/ + [geoip2]: https://www.maxmind.com/en/geoip2-databases + """ + mmdb: """ + Exposes data from a [MaxMind][maxmind] database as an enrichment table. + + [maxmind]: https://www.maxmind.com/ + """ + } + description: "enrichment table type" } } - description: "Configuration for source functionality." - required: false - relevant_when: "type = \"memory\"" + description: "An enrichment table." + required: true } - ttl: { - type: uint: default: 600 - description: """ - TTL (time-to-live in seconds) is used to limit the lifetime of data stored in the cache. - When TTL expires, data behind a specific key in the cache is removed. - TTL is reset when the key is replaced. - """ - required: false - relevant_when: "type = \"memory\"" - } - ttl_field: { - type: string: default: "" - description: "Field in the incoming value used as the TTL override." - required: false - relevant_when: "type = \"memory\"" - } - locale: { - type: string: default: "en" - description: """ - The locale to use when querying the database. + description: "All configured enrichment tables." + group: "pipeline_components" + } + healthchecks: { + type: object: options: { + enabled: { + type: bool: default: true + description: """ + Whether or not healthchecks are enabled for all sinks. - MaxMind includes localized versions of some of the fields within their database, such as - country name. This setting can control which of those localized versions are returned by the - transform. + Can be overridden on a per-sink basis. + """ + required: false + } + require_healthy: { + type: bool: default: false + description: """ + Whether or not to require a sink to report as being healthy during startup. - More information on which portions of the geolocation data are localized, and what languages - are available, can be found [here][locale_docs]. + When enabled and a sink reports not being healthy, Vector will exit during start-up. - [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q - """ - required: false - relevant_when: "type = \"geoip\"" + Can be alternatively set, and overridden by, the `--require-healthy` command-line flag. + """ + required: false + } } - path: { - type: string: {} - description: """ - Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2] - (**GeoLite2-City.mmdb**). + description: "Healthcheck options." + group: "global_options" + } + schema: { + type: object: options: { + enabled: { + type: bool: default: false + description: """ + When enabled, Vector tracks the schema (field types and structure) of events as they flow + from sources through transforms to sinks. This allows Vector to understand what data each + component receives and produces. + """ + required: false + } + log_namespace: { + type: bool: {} + description: """ + Controls how metadata is stored in log events. - Other databases, such as the country database, are not supported. - `mmdb` enrichment table can be used for other databases. + When set to `false` (legacy mode), metadata fields like `host`, `timestamp`, and `source_type` + are stored as top-level fields alongside your log data. - [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable - [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access - """ - required: true - relevant_when: "type = \"geoip\" or type = \"mmdb\"" - } - type: { - required: true - type: string: enum: { - file: "Exposes data from a static file as an enrichment table." - memory: """ - Exposes data from a memory cache as an enrichment table. The cache can be written to using - a sink. - """ - geoip: """ - Exposes data from a [MaxMind][maxmind] [GeoIP2][geoip2] database as an enrichment table. + When set to `true` (Vector namespace mode), metadata is stored in a separate metadata namespace, + keeping it distinct from your actual log data. - [maxmind]: https://www.maxmind.com/ - [geoip2]: https://www.maxmind.com/en/geoip2-databases + See the [Log Namespacing guide](/guides/level-up/log_namespace/) for detailed information + about when to use Vector namespace mode and how to migrate from legacy mode. """ - mmdb: """ - Exposes data from a [MaxMind][maxmind] database as an enrichment table. + required: false + } + validation: { + type: bool: default: false + description: """ + When enabled, Vector validates that events flowing into each sink match the schema + requirements of that sink. If a sink requires certain fields or types that are missing + from the incoming events, Vector will report an error during configuration validation. - [maxmind]: https://www.maxmind.com/ + This helps catch pipeline configuration errors early, before runtime. """ + required: false } - description: "enrichment table type" - } - } - description: """ - Configuration options for an [enrichment table](https://vector.dev/docs/reference/glossary/#enrichment-tables) to be used in a - [`remap`](https://vector.dev/docs/reference/configuration/transforms/remap/) transform. Currently supported are: - - * [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) files - * [MaxMind](https://www.maxmind.com/en/home) databases - * In-memory storage - - For the lookup in the enrichment tables to be as performant as possible, the data is indexed according - to the fields that are used in the search. Note that indices can only be created for fields for which an - exact match is used in the condition. For range searches, an index isn't used and the enrichment table - drops back to a sequential scan of the data. A sequential scan shouldn't impact performance - significantly provided that there are only a few possible rows returned by the exact matches in the - condition. We don't recommend using a condition that uses only date range searches. - """ - common: false - required: false - } - secret: { - type: object: options: { - path: { - type: string: {} - description: "File path to read secrets from." - required: true - relevant_when: "type = \"file\" or type = \"directory\"" - } - remove_trailing_whitespace: { - type: bool: default: false - description: "Remove trailing whitespace from file contents." - required: false - relevant_when: "type = \"directory\"" } - command: { - type: array: items: type: string: {} - description: """ - Command arguments to execute. + description: """ + Schema options. - The path to the script or binary must be the first argument. - """ - required: true - relevant_when: "type = \"exec\"" - } - protocol: { + **Note:** The `enabled` and `validation` options are experimental and should only be enabled if you + understand the limitations. While the infrastructure exists for schema tracking and validation, the + full vision of automatic semantic field mapping and comprehensive schema enforcement was never fully + realized. + + If you encounter issues with these features, please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). + """ + group: "schema" + } + secret: { + type: object: options: "*": { type: object: options: { - backend_config: { - type: "*": {} - description: """ - The configuration to pass to the secrets executable. This is the `config` field in the - backend request. Refer to the documentation of your `backend_type `to see which options - are required to be set. - """ + path: { + type: string: {} + description: "File path to read secrets from." + required: true + relevant_when: "type = \"file\" or type = \"directory\"" + } + remove_trailing_whitespace: { + type: bool: default: false + description: "Remove trailing whitespace from file contents." required: false - relevant_when: "version = \"v1_1\"" + relevant_when: "type = \"directory\"" } - backend_type: { - type: string: {} - description: "The name of the backend. This is `type` field in the backend request." + command: { + type: array: items: type: string: {} + description: """ + Command arguments to execute. + + The path to the script or binary must be the first argument. + """ required: true - relevant_when: "version = \"v1_1\"" + relevant_when: "type = \"exec\"" } - version: { - required: false - type: string: { - enum: { - v1: "Expect the command to fetch the configuration options itself." - v1_1: "Configuration options to the command are to be curried upon each request." + protocol: { + type: object: options: { + backend_config: { + type: "*": {} + description: """ + The configuration to pass to the secrets executable. This is the `config` field in the + backend request. Refer to the documentation of your `backend_type `to see which options + are required to be set. + """ + required: false + relevant_when: "version = \"v1_1\"" + } + backend_type: { + type: string: {} + description: "The name of the backend. This is `type` field in the backend request." + required: true + relevant_when: "version = \"v1_1\"" + } + version: { + required: false + type: string: { + enum: { + v1: "Expect the command to fetch the configuration options itself." + v1_1: "Configuration options to the command are to be curried upon each request." + } + default: "v1" + } + description: "The protocol version." } - default: "v1" } - description: "The protocol version." + description: "Settings for the protocol between Vector and the secrets executable." + required: false + relevant_when: "type = \"exec\"" } - } - description: "Settings for the protocol between Vector and the secrets executable." - required: false - relevant_when: "type = \"exec\"" - } - timeout: { - type: uint: default: 5 - description: "The timeout, in seconds, to wait for the command to complete." - required: false - relevant_when: "type = \"exec\"" - } - auth: { - type: object: options: { - access_key_id: { - type: string: examples: ["AKIAIOSFODNN7EXAMPLE"] - description: "The AWS access key ID." - required: true + timeout: { + type: uint: default: 5 + description: "The timeout, in seconds, to wait for the command to complete." + required: false + relevant_when: "type = \"exec\"" } - assume_role: { - type: string: examples: ["arn:aws:iam::123456789098:role/my_role"] - description: """ - The ARN of an [IAM role][iam_role] to assume. + auth: { + type: object: options: { + access_key_id: { + type: string: examples: ["AKIAIOSFODNN7EXAMPLE"] + description: "The AWS access key ID." + required: true + } + assume_role: { + type: string: examples: ["arn:aws:iam::123456789098:role/my_role"] + description: """ + The ARN of an [IAM role][iam_role] to assume. - [iam_role]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html - """ - required: true + [iam_role]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html + """ + required: true + } + external_id: { + type: string: examples: ["randomEXAMPLEidString"] + description: """ + The optional unique external ID in conjunction with role to assume. + + [external_id]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html + """ + required: false + } + region: { + type: string: examples: ["us-west-2"] + description: """ + The [AWS region][aws_region] to send STS requests to. + + If not set, this defaults to the configured region + for the service itself. + + [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints + """ + required: false + } + secret_access_key: { + type: string: examples: ["wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"] + description: "The AWS secret access key." + required: true + } + session_name: { + type: string: examples: ["vector-indexer-role"] + description: """ + The optional [RoleSessionName][role_session_name] is a unique session identifier for your assumed role. + + Should be unique per principal or reason. + If not set, the session name is autogenerated like assume-role-provider-1736428351340 + + [role_session_name]: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html + """ + required: false + } + session_token: { + type: string: examples: ["AQoDYXdz...AQoDYXdz..."] + description: """ + The AWS session token. + See [AWS temporary credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) + """ + required: false + } + credentials_file: { + type: string: examples: ["/my/aws/credentials"] + description: "Path to the credentials file." + required: true + } + profile: { + type: string: { + default: "default" + examples: ["develop"] + } + description: """ + The credentials profile to use. + + Used to select AWS credentials from a provided credentials file. + """ + required: false + } + imds: { + type: object: options: { + connect_timeout_seconds: { + type: uint: { + default: 1 + unit: "seconds" + } + description: "Connect timeout for IMDS." + required: false + } + max_attempts: { + type: uint: default: 4 + description: "Number of IMDS retries for fetching tokens and metadata." + required: false + } + read_timeout_seconds: { + type: uint: { + default: 1 + unit: "seconds" + } + description: "Read timeout for IMDS." + required: false + } + } + description: "Configuration for authenticating with AWS through IMDS." + required: false + } + load_timeout_secs: { + type: uint: { + examples: [30] + unit: "seconds" + } + description: """ + Timeout for successfully loading any credentials, in seconds. + + Relevant when the default credentials chain or `assume_role` is used. + """ + required: false + } + } + description: "Configuration of the authentication strategy for interacting with AWS services." + required: false + relevant_when: "type = \"aws_secrets_manager\"" } - external_id: { - type: string: examples: ["randomEXAMPLEidString"] - description: """ - The optional unique external ID in conjunction with role to assume. + secret_id: { + type: string: {} + description: "ID of the secret to resolve." + required: true + relevant_when: "type = \"aws_secrets_manager\"" + } + tls: { + type: object: options: { + alpn_protocols: { + type: array: items: type: string: examples: ["h2"] + description: """ + Sets the list of supported ALPN protocols. - [external_id]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html - """ - required: false + Declare the supported ALPN protocols, which are used during negotiation with a peer. They are prioritized in the order + that they are defined. + """ + required: false + } + ca_file: { + type: string: examples: ["/path/to/certificate_authority.crt"] + description: """ + Absolute path to an additional CA certificate file. + + The certificate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. + """ + required: false + } + crt_file: { + type: string: examples: ["/path/to/host_certificate.crt"] + description: """ + Absolute path to a certificate file used to identify this server. + + The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as + an inline string in PEM format. + + If this is set _and_ is not a PKCS#12 archive, `key_file` must also be set. + """ + required: false + } + key_file: { + type: string: examples: ["/path/to/host_certificate.key"] + description: """ + Absolute path to a private key file used to identify this server. + + The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. + """ + required: false + } + key_pass: { + type: string: examples: ["${KEY_PASS_ENV_VAR}", "PassWord1"] + description: """ + Passphrase used to unlock the encrypted key file. + + This has no effect unless `key_file` is set. + """ + required: false + } + server_name: { + type: string: examples: ["www.example.com"] + description: """ + Server name to use when using Server Name Indication (SNI). + + Only relevant for outgoing connections. + """ + required: false + } + verify_certificate: { + type: bool: {} + description: """ + Enables certificate verification. For components that create a server, this requires that the + client connections have a valid client certificate. For components that initiate requests, + this validates that the upstream has a valid certificate. + + If enabled, certificates must not be expired and must be issued by a trusted + issuer. This verification operates in a hierarchical manner, checking that the leaf certificate (the + certificate presented by the client/server) is not only valid, but that the issuer of that certificate is also valid, and + so on, until the verification process reaches a root certificate. + + Do NOT set this to `false` unless you understand the risks of not verifying the validity of certificates. + """ + required: false + } + verify_hostname: { + type: bool: {} + description: """ + Enables hostname verification. + + If enabled, the hostname used to connect to the remote host must be present in the TLS certificate presented by + the remote host, either as the Common Name or as an entry in the Subject Alternative Name extension. + + Only relevant for outgoing connections. + + Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. + """ + required: false + } + } + description: "TLS configuration." + required: false + relevant_when: "type = \"aws_secrets_manager\"" + } + endpoint: { + type: string: examples: ["http://127.0.0.0:5000/path/to/service"] + description: "Custom endpoint for use with AWS-compatible services." + required: false + relevant_when: "type = \"aws_secrets_manager\"" } region: { - type: string: examples: ["us-west-2"] + type: string: examples: ["us-east-1"] description: """ - The [AWS region][aws_region] to send STS requests to. - - If not set, this defaults to the configured region - for the service itself. + The [AWS region][aws_region] of the target service. [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints """ - required: false + required: false + relevant_when: "type = \"aws_secrets_manager\"" } - secret_access_key: { - type: string: examples: ["wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"] - description: "The AWS secret access key." - required: true + type: { + required: true + type: string: enum: { + file: "File." + directory: "Directory." + exec: "Exec." + aws_secrets_manager: "AWS Secrets Manager." + } + description: "secret type" } - session_name: { - type: string: examples: ["vector-indexer-role"] - description: """ - The optional [RoleSessionName][role_session_name] is a unique session identifier for your assumed role. + } + description: "A secret backend." + common: false + required: true + } + description: "All configured secrets backends." + group: "secrets" + } + sinks: { + type: object: options: "*": { + type: object: options: { + buffer: { + type: object: options: { + when_full: { + type: string: { + enum: { + block: """ + Wait for free space in the buffer. + + This applies backpressure up the topology, signalling that sources should slow down + the acceptance/consumption of events. This means that while no data is lost, data will pile + up at the edge. + """ + drop_newest: """ + Drops the event instead of waiting for free space in buffer. - Should be unique per principal or reason. - If not set, the session name is autogenerated like assume-role-provider-1736428351340 + The event will be intentionally dropped. This mode is typically used when performance is the + highest priority, and it is preferable to temporarily lose events rather than cause a + slowdown in the acceptance/consumption of events. + """ + } + default: "block" + } + description: "Event handling behavior when a buffer is full." + required: false + } + max_events: { + type: uint: default: 500 + required: false + description: "The maximum number of events allowed in the buffer." + relevant_when: "type = \"memory\"" + } + max_size: { + type: uint: unit: "bytes" + required: true + description: """ + The maximum allowed amount of allocated memory the buffer can hold. - [role_session_name]: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html - """ - required: false - } - session_token: { - type: string: examples: ["AQoDYXdz...AQoDYXdz..."] + If `type = "disk"` then must be at least ~256 megabytes (268435488 bytes). + """ + } + type: { + required: false + type: string: { + enum: { + memory: """ + Events are buffered in memory. + + This is more performant, but less durable. Data will be lost if Vector is restarted + forcefully or crashes. + """ + disk: """ + Events are buffered on disk. + + This is less performant, but more durable. Data that has been synchronized to disk will not + be lost if Vector is restarted forcefully or crashes. + + Data is synchronized to disk every 500ms. + """ + } + default: "memory" + } + description: "The type of buffer to use." + } + } description: """ - The AWS session token. - See [AWS temporary credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) + Configures the buffering behavior for this sink. + + More information about the individual buffer types, and buffer behavior, can be found in the + [Buffering Model][buffering_model] section. + + [buffering_model]: /docs/architecture/buffering-model/ """ required: false } - credentials_file: { - type: string: examples: ["/my/aws/credentials"] - description: "Path to the credentials file." - required: true - } - profile: { - type: string: { - default: "default" - examples: ["develop"] + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph + + They are added to the node as provided + """ + required: false } description: """ - The credentials profile to use. + Extra graph configuration - Used to select AWS credentials from a provided credentials file. + Configure output for component when generated with graph command """ required: false } - imds: { + healthcheck: { type: object: options: { - connect_timeout_seconds: { - type: uint: { - default: 1 + enabled: { + type: bool: default: true + description: "Whether or not to check the health of the sink when Vector starts up." + required: false + } + timeout: { + type: float: { + default: 10.0 unit: "seconds" } - description: "Connect timeout for IMDS." + description: "Timeout duration for healthcheck in seconds." required: false } - max_attempts: { - type: uint: default: 4 - description: "Number of IMDS retries for fetching tokens and metadata." + uri: { + type: string: {} + description: """ + The full URI to make HTTP healthcheck requests to. + + This must be a valid URI, which requires at least the scheme and host. All other + components -- port, path, etc -- are allowed as well. + """ + required: false + } + } + description: "Healthcheck configuration." + required: false + } + inputs: { + type: array: items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] + description: """ + A list of upstream [source][sources] or [transform][transforms] IDs. + + Wildcards (`*`) are supported. + + See [configuration][configuration] for more info. + + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ + """ + required: true + } + proxy: { + type: object: options: { + enabled: { + type: bool: default: true + description: "Enables proxying support." required: false } - read_timeout_seconds: { - type: uint: { - default: 1 - unit: "seconds" + http: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTP traffic. + + Must be a valid URI string. + """ + required: false + } + https: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTPS traffic. + + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] } - description: "Read timeout for IMDS." - required: false + description: """ + A list of hosts to avoid proxying. + + Multiple patterns are allowed: + + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing + """ + required: false } } - description: "Configuration for authenticating with AWS through IMDS." - required: false - } - load_timeout_secs: { - type: uint: { - examples: [30] - unit: "seconds" - } description: """ - Timeout for successfully loading any credentials, in seconds. + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. - Relevant when the default credentials chain or `assume_role` is used. + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. """ required: false } } - description: "Configuration of the authentication strategy for interacting with AWS services." - required: false - relevant_when: "type = \"aws_secrets_manager\"" + description: "A sink." + required: true } - secret_id: { - type: string: {} - description: "ID of the secret to resolve." - required: true - relevant_when: "type = \"aws_secrets_manager\"" - } - tls: { + description: "All configured sinks." + group: "pipeline_components" + } + sources: { + type: object: options: "*": { type: object: options: { - alpn_protocols: { - type: array: items: type: string: examples: ["h2"] - description: """ - Sets the list of supported ALPN protocols. + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph - Declare the supported ALPN protocols, which are used during negotiation with a peer. They are prioritized in the order - that they are defined. - """ - required: false - } - ca_file: { - type: string: examples: ["/path/to/certificate_authority.crt"] + They are added to the node as provided + """ + required: false + } description: """ - Absolute path to an additional CA certificate file. + Extra graph configuration - The certificate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. + Configure output for component when generated with graph command """ required: false } - crt_file: { - type: string: examples: ["/path/to/host_certificate.crt"] - description: """ - Absolute path to a certificate file used to identify this server. + proxy: { + type: object: options: { + enabled: { + type: bool: default: true + description: "Enables proxying support." + required: false + } + http: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTP traffic. - The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as - an inline string in PEM format. + Must be a valid URI string. + """ + required: false + } + https: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTPS traffic. - If this is set _and_ is not a PKCS#12 archive, `key_file` must also be set. - """ - required: false - } - key_file: { - type: string: examples: ["/path/to/host_certificate.key"] - description: """ - Absolute path to a private key file used to identify this server. + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] + } + description: """ + A list of hosts to avoid proxying. - The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. - """ - required: false - } - key_pass: { - type: string: examples: ["${KEY_PASS_ENV_VAR}", "PassWord1"] - description: """ - Passphrase used to unlock the encrypted key file. + Multiple patterns are allowed: - This has no effect unless `key_file` is set. - """ - required: false - } - server_name: { - type: string: examples: ["www.example.com"] + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing + """ + required: false + } + } description: """ - Server name to use when using Server Name Indication (SNI). + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. - Only relevant for outgoing connections. + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. """ required: false } - verify_certificate: { - type: bool: {} - description: """ - Enables certificate verification. For components that create a server, this requires that the - client connections have a valid client certificate. For components that initiate requests, - this validates that the upstream has a valid certificate. + } + description: "A source." + required: true + } + description: "All configured sources." + group: "pipeline_components" + } + transforms: { + type: object: options: "*": { + type: object: options: { + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph - If enabled, certificates must not be expired and must be issued by a trusted - issuer. This verification operates in a hierarchical manner, checking that the leaf certificate (the - certificate presented by the client/server) is not only valid, but that the issuer of that certificate is also valid, and - so on, until the verification process reaches a root certificate. + They are added to the node as provided + """ + required: false + } + description: """ + Extra graph configuration - Do NOT set this to `false` unless you understand the risks of not verifying the validity of certificates. + Configure output for component when generated with graph command """ required: false } - verify_hostname: { - type: bool: {} + inputs: { + type: array: items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] description: """ - Enables hostname verification. + A list of upstream [source][sources] or [transform][transforms] IDs. - If enabled, the hostname used to connect to the remote host must be present in the TLS certificate presented by - the remote host, either as the Common Name or as an entry in the Subject Alternative Name extension. + Wildcards (`*`) are supported. - Only relevant for outgoing connections. + See [configuration][configuration] for more info. - Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ """ - required: false + required: true } } - description: "TLS configuration." - required: false - relevant_when: "type = \"aws_secrets_manager\"" + description: "A transform." + required: true } - endpoint: { - type: string: examples: ["http://127.0.0.0:5000/path/to/service"] - description: "Custom endpoint for use with AWS-compatible services." - required: false - relevant_when: "type = \"aws_secrets_manager\"" - } - region: { - type: string: examples: ["us-east-1"] + description: "All configured transforms." + group: "pipeline_components" + } + acknowledgements: { + type: object: options: enabled: { + type: bool: {} description: """ - The [AWS region][aws_region] of the target service. + Controls whether or not end-to-end acknowledgements are enabled. - [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints + When enabled for a sink, any source that supports end-to-end + acknowledgements that is connected to that sink waits for events + to be acknowledged by **all connected sinks** before acknowledging them at the source. + + Enabling or disabling acknowledgements at the sink level takes precedence over any global + [`acknowledgements`][global_acks] configuration. + + [global_acks]: https://vector.dev/docs/reference/configuration/global-options/#acknowledgements """ - required: false - relevant_when: "type = \"aws_secrets_manager\"" - } - type: { - required: true - type: string: enum: { - file: "File." - directory: "Directory." - exec: "Exec." - aws_secrets_manager: "AWS Secrets Manager." - } - description: "secret type" + required: false } - } - description: """ - Configuration options to retrieve secrets from external backend in order to avoid storing secrets in plaintext - in Vector config. Multiple backends can be configured. Use `SECRET[.]` to tell Vector to retrieve the secret. This placeholder is replaced by the secret - retrieved from the relevant backend. + description: """ + Controls how acknowledgements are handled for all sinks by default. - When `type` is `exec`, the provided command will be run and provided a list of - secrets to fetch, determined from the configuration file, on stdin as JSON in the format: + See [End-to-end Acknowledgements][e2e_acks] for more information on how Vector handles event + acknowledgement. - ```json - {"version": "1.0", "secrets": ["secret1", "secret2"]} - ``` + [e2e_acks]: https://vector.dev/docs/architecture/end-to-end-acknowledgements/ + """ + common: true + required: false + group: "global_options" + } + buffer_utilization_ewma_half_life_seconds: { + type: float: {} + description: """ + The half-life, in seconds, for the exponential weighted moving average (EWMA) of source + and transform buffer utilization metrics. - The executable is expected to respond with the values of these secrets on stdout, also as JSON, in the format: + This controls how quickly the `*_buffer_utilization_mean` gauges respond to new + observations. Longer half-lives retain more of the previous value, leading to slower + adjustments. - ```json - { - "secret1": {"value": "secret_value", "error": null}, - "secret2": {"value": null, "error": "could not fetch the secret"} - } - ``` - If an `error` is returned for any secrets, or if the command exits with a non-zero status code, - Vector will log the errors and exit. - - Otherwise, the secret must be a JSON text string with key/value pairs. For example: - ```json - { - "username": "test", - "password": "example-password" - } - ``` + - Lower values (< 1): Metrics update quickly but may be volatile + - Default (5): Balanced between responsiveness and stability + - Higher values (> 5): Smooth, stable metrics that update slowly - If an error occurred while reading the file or retrieving the secrets, Vector logs the error and exits. + Adjust based on whether you need fast detection of buffer issues (lower) + or want to see sustained trends without noise (higher). - Secrets are loaded when Vector starts or if Vector receives a `SIGHUP` signal triggering its - configuration reload process. - """ - common: false - required: false - } - acknowledgements: { - common: true - description: """ - Controls how acknowledgements are handled for all sinks by default. - - See [End-to-end Acknowledgements][e2e_acks] for more information on how Vector handles event - acknowledgement. - - [e2e_acks]: https://vector.dev/docs/architecture/end-to-end-acknowledgements/ - """ - required: false - type: object: options: enabled: { + Must be greater than 0. + """ + group: "global_options" + } + data_dir: { + type: string: default: "/var/lib/vector/" description: """ - Controls whether or not end-to-end acknowledgements are enabled. + The directory used for persisting Vector state data. - When enabled for a sink, any source that supports end-to-end - acknowledgements that is connected to that sink waits for events - to be acknowledged by **all connected sinks** before acknowledging them at the source. + This is the directory where Vector will store any state data, such as disk buffers, file + checkpoints, and more. - Enabling or disabling acknowledgements at the sink level takes precedence over any global - [`acknowledgements`][global_acks] configuration. - - [global_acks]: https://vector.dev/docs/reference/configuration/global-options/#acknowledgements + Vector must have write permissions to this directory. """ - required: false - type: bool: {} + common: false + group: "global_options" } - } - buffer_utilization_ewma_half_life_seconds: { - description: """ - The half-life, in seconds, for the exponential weighted moving average (EWMA) of source - and transform buffer utilization metrics. - - This controls how quickly the `*_buffer_utilization_mean` gauges respond to new - observations. Longer half-lives retain more of the previous value, leading to slower - adjustments. - - - Lower values (< 1): Metrics update quickly but may be volatile - - Default (5): Balanced between responsiveness and stability - - Higher values (> 5): Smooth, stable metrics that update slowly - - Adjust based on whether you need fast detection of buffer issues (lower) - or want to see sustained trends without noise (higher). - - Must be greater than 0. - """ - required: false - type: float: {} - } - data_dir: { - common: false - description: """ - The directory used for persisting Vector state data. - - This is the directory where Vector will store any state data, such as disk buffers, file - checkpoints, and more. - - Vector must have write permissions to this directory. - """ - required: false - type: string: default: "/var/lib/vector/" - } - expire_metrics_per_metric_set: { - description: """ - This allows configuring different expiration intervals for different metric sets. - By default this is empty and any metric not matched by one of these sets will use - the global default value, defined using `expire_metrics_secs`. - """ - required: false - type: array: items: type: object: options: { - expire_secs: { - description: """ - The amount of time, in seconds, that internal metrics will persist after having not been - updated before they expire and are removed. + expire_metrics_per_metric_set: { + type: array: items: type: object: options: { + expire_secs: { + type: float: examples: [60.0] + description: """ + The amount of time, in seconds, that internal metrics will persist after having not been + updated before they expire and are removed. - Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) - so that metrics live long enough to be emitted and captured. - """ - required: true - type: float: examples: [60.0] - } - labels: { - description: "Labels to apply this expiration to. Ignores labels if not defined." - required: false - type: object: options: { - matchers: { - description: "List of matchers to check." - required: true - type: array: items: type: object: options: { - key: { - description: "Metric key to look for." - required: true - type: string: {} - } - type: { - description: "Metric label matcher type." - required: true - type: string: enum: { - exact: "Looks for an exact match of one label key value pair." - regex: "Compares label value with given key to the provided pattern." + Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) + so that metrics live long enough to be emitted and captured. + """ + required: true + } + labels: { + type: object: options: { + matchers: { + type: array: items: type: object: options: { + key: { + type: string: {} + description: "Metric key to look for." + required: true + } + value: { + type: string: {} + description: "The exact metric label value." + required: true + relevant_when: "type = \"exact\"" + } + value_pattern: { + type: string: {} + description: "Pattern to compare metric label value to." + required: true + relevant_when: "type = \"regex\"" + } + type: { + required: true + type: string: enum: { + exact: "Looks for an exact match of one label key value pair." + regex: "Compares label value with given key to the provided pattern." + } + description: "Metric label matcher type." } } - value: { - description: "The exact metric label value." - relevant_when: "type = \"exact\"" - required: true - type: string: {} - } - value_pattern: { - description: "Pattern to compare metric label value to." - relevant_when: "type = \"regex\"" - required: true - type: string: {} - } + description: "List of matchers to check." + required: true } - } - type: { - description: "Metric label group matcher type." - required: true - type: string: enum: { - all: "Checks that all of the provided matchers can be applied to given metric." - any: "Checks that any of the provided matchers can be applied to given metric." + type: { + required: true + type: string: enum: { + any: "Checks that any of the provided matchers can be applied to given metric." + all: "Checks that all of the provided matchers can be applied to given metric." + } + description: "Metric label group matcher type." } } + description: "Labels to apply this expiration to. Ignores labels if not defined." + required: false } - } - name: { - description: "Metric name to apply this expiration to. Ignores metric name if not defined." - required: false - type: object: options: { - pattern: { - description: "Pattern to compare to." - relevant_when: "type = \"regex\"" - required: true - type: string: {} - } - type: { - description: "Metric name matcher type." - required: true - type: string: enum: { - exact: "Only considers exact name matches." - regex: "Compares metric name to the provided pattern." + name: { + type: object: options: { + value: { + type: string: {} + description: "The exact metric name." + required: true + relevant_when: "type = \"exact\"" + } + pattern: { + type: string: {} + description: "Pattern to compare to." + required: true + relevant_when: "type = \"regex\"" + } + type: { + required: true + type: string: enum: { + exact: "Only considers exact name matches." + regex: "Compares metric name to the provided pattern." + } + description: "Metric name matcher type." } } - value: { - description: "The exact metric name." - relevant_when: "type = \"exact\"" - required: true - type: string: {} - } + description: "Metric name to apply this expiration to. Ignores metric name if not defined." + required: false } } + description: """ + This allows configuring different expiration intervals for different metric sets. + By default this is empty and any metric not matched by one of these sets will use + the global default value, defined using `expire_metrics_secs`. + """ + group: "global_options" } - } - expire_metrics_secs: { - common: false - description: """ - The amount of time, in seconds, that internal metrics will persist after having not been - updated before they expire and are removed. - - Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) - so metrics live long enough to be emitted and captured. - """ - required: false - type: float: {} - } - latency_ewma_alpha: { - description: """ - The alpha value for the exponential weighted moving average (EWMA) of transform latency - metrics. - - This controls how quickly the `component_latency_mean_seconds` gauge responds to new - observations. Values closer to 1.0 retain more of the previous value, leading to slower - adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. - - Must be between 0 and 1 exclusively (0 < alpha < 1). - """ - required: false - type: float: {} - } - log_schema: { - common: false - description: """ - Default log schema for all events. - - This is used if a component does not have its own specific log schema. All events use a log - schema, whether or not the default is used, to assign event fields on incoming events. - """ - required: false - type: object: options: { - host_key: { - description: """ - The name of the event field to treat as the host which sent the message. - - This field will generally represent a real host, or container, that generated the message, - but is somewhat source-dependent. - """ - required: false - type: string: default: ".host" - } - message_key: { - description: """ - The name of the event field to treat as the event message. + expire_metrics_secs: { + type: float: {} + description: """ + The amount of time, in seconds, that internal metrics will persist after having not been + updated before they expire and are removed. - This would be the field that holds the raw message, such as a raw log line. - """ - required: false - type: string: default: ".message" - } - metadata_key: { - description: """ - The name of the event field to set the event metadata in. + Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) + so metrics live long enough to be emitted and captured. + """ + common: false + required: false + group: "global_options" + } + latency_ewma_alpha: { + type: float: {} + description: """ + The alpha value for the exponential weighted moving average (EWMA) of transform latency + metrics. - Generally, this field will be set by Vector to hold event-specific metadata, such as - annotations by the `remap` transform when an error or abort is encountered. - """ - required: false - type: string: default: ".metadata" - } - source_type_key: { - description: """ - The name of the event field to set the source identifier in. + This controls how quickly the `component_latency_mean_seconds` gauge responds to new + observations. Values closer to 1.0 retain more of the previous value, leading to slower + adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. - This field will be set by the Vector source that the event was created in. - """ - required: false - type: string: default: ".source_type" - } - timestamp_key: { - description: "The name of the event field to treat as the event timestamp." - required: false - type: string: default: ".timestamp" - } + Must be between 0 and 1 exclusively (0 < alpha < 1). + """ + group: "global_options" } - } - metrics_storage_refresh_period: { - description: """ - The interval, in seconds, at which the internal metrics cache for VRL is refreshed. - This must be set to be able to access metrics in VRL functions. - - Higher values lead to stale metric values from `get_vector_metric`, - `find_vector_metrics`, and `aggregate_vector_metrics` functions. - """ - required: false - type: float: {} - } - proxy: { - common: false - description: """ - Proxy configuration. - - Configure to proxy traffic through an HTTP(S) proxy when making external requests. - - Similar to common proxy configuration convention, you can set different proxies - to use based on the type of traffic being proxied. You can also set specific hosts that - should not be proxied. - """ - required: false - type: object: options: { - enabled: { - description: "Enables proxying support." - required: false - type: bool: default: true - } - http: { - description: """ - Proxy endpoint to use when proxying HTTP traffic. - - Must be a valid URI string. - """ - required: false - type: string: examples: ["http://foo.bar:3128"] - } - https: { - description: """ - Proxy endpoint to use when proxying HTTPS traffic. + log_schema: { + type: object: options: { + host_key: { + type: string: default: ".host" + description: """ + The name of the event field to treat as the host which sent the message. - Must be a valid URI string. - """ - required: false - type: string: examples: ["http://foo.bar:3128"] - } - no_proxy: { - description: """ - A list of hosts to avoid proxying. + This field will generally represent a real host, or container, that generated the message, + but is somewhat source-dependent. + """ + required: false + } + message_key: { + type: string: default: ".message" + description: """ + The name of the event field to treat as the event message. - Multiple patterns are allowed: + This would be the field that holds the raw message, such as a raw log line. + """ + required: false + } + metadata_key: { + type: string: default: ".metadata" + description: """ + The name of the event field to set the event metadata in. - | Pattern | Example match | - | ------------------- | --------------------------------------------------------------------------- | - | Domain names | `example.com` matches requests to `example.com` | - | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | - | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | - | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | - | Splat | `*` matches all hosts | + Generally, this field will be set by Vector to hold event-specific metadata, such as + annotations by the `remap` transform when an error or abort is encountered. + """ + required: false + } + source_type_key: { + type: string: default: ".source_type" + description: """ + The name of the event field to set the source identifier in. - [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing - """ - required: false - type: array: { - default: [] - items: type: string: examples: ["localhost", ".foo.bar", "*"] + This field will be set by the Vector source that the event was created in. + """ + required: false + } + timestamp_key: { + type: string: default: ".timestamp" + description: "The name of the event field to treat as the event timestamp." + required: false } } + description: """ + Default log schema for all events. + + This is used if a component does not have its own specific log schema. All events use a log + schema, whether or not the default is used, to assign event fields on incoming events. + """ + common: false + required: false + group: "schema" } - } - telemetry: { - common: false - description: """ - Telemetry options. - - Determines whether `source` and `service` tags should be emitted with the - `component_sent_*` and `component_received_*` events. - """ - required: false - type: object: options: tags: { - description: "Configures whether to emit certain tags" - required: false + metrics_storage_refresh_period: { + type: float: {} + description: """ + The interval, in seconds, at which the internal metrics cache for VRL is refreshed. + This must be set to be able to access metrics in VRL functions. + + Higher values lead to stale metric values from `get_vector_metric`, + `find_vector_metrics`, and `aggregate_vector_metrics` functions. + """ + group: "global_options" + } + proxy: { type: object: options: { - emit_service: { + enabled: { + type: bool: default: true + description: "Enables proxying support." + required: false + } + http: { + type: string: examples: ["http://foo.bar:3128"] description: """ - True if the `service` tag should be emitted - in the `component_received_*` and `component_sent_*` - telemetry. + Proxy endpoint to use when proxying HTTP traffic. + + Must be a valid URI string. """ required: false - type: bool: default: false } - emit_source: { + https: { + type: string: examples: ["http://foo.bar:3128"] description: """ - True if the `source` tag should be emitted - in the `component_received_*` and `component_sent_*` - telemetry. + Proxy endpoint to use when proxying HTTPS traffic. + + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] + } + description: """ + A list of hosts to avoid proxying. + + Multiple patterns are allowed: + + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing """ required: false - type: bool: default: false } } + description: """ + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. + + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. + """ + common: false + required: false + group: "global_options" } - } - timezone: { - common: false - description: """ - The name of the time zone to apply to timestamp conversions that do not contain an explicit time zone. + telemetry: { + type: object: options: tags: { + type: object: options: { + emit_service: { + type: bool: default: false + description: """ + True if the `service` tag should be emitted + in the `component_received_*` and `component_sent_*` + telemetry. + """ + required: false + } + emit_source: { + type: bool: default: false + description: """ + True if the `source` tag should be emitted + in the `component_received_*` and `component_sent_*` + telemetry. + """ + required: false + } + } + description: "Configures whether to emit certain tags" + required: false + } + description: """ + Telemetry options. + + Determines whether `source` and `service` tags should be emitted with the + `component_sent_*` and `component_received_*` events. + """ + common: false + required: false + group: "global_options" + } + timezone: { + type: string: examples: ["local", "America/New_York", "EST5EDT"] + description: """ + The name of the time zone to apply to timestamp conversions that do not contain an explicit time zone. - The time zone name may be any name in the [TZ database][tzdb] or `local` to indicate system - local time. + The time zone name may be any name in the [TZ database][tzdb] or `local` to indicate system + local time. - Note that in Vector/VRL all timestamps are represented in UTC. + Note that in Vector/VRL all timestamps are represented in UTC. + + [tzdb]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + """ + common: false + group: "global_options" + } + wildcard_matching: { + type: string: enum: { + strict: "Strict matching (must match at least one existing input)" + relaxed: "Relaxed matching (must match 0 or more inputs)" + } + description: """ + Set wildcard matching mode for inputs - [tzdb]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - """ - required: false - type: string: examples: ["local", "America/New_York", "EST5EDT"] + Setting this to "relaxed" allows configurations with wildcards that do not match any inputs + to be accepted without causing an error. + """ + common: false + required: false + group: "global_options" + } } - wildcard_matching: { - common: false - description: """ - Set wildcard matching mode for inputs - - Setting this to "relaxed" allows configurations with wildcards that do not match any inputs - to be accepted without causing an error. - """ - required: false - type: string: enum: { - relaxed: "Relaxed matching (must match 0 or more inputs)" - strict: "Strict matching (must match at least one existing input)" + groups: { + global_options: { + title: "Global Options" + description: "Global configuration options that apply to Vector as a whole." + order: 1 + } + pipeline_components: { + title: "Pipeline Components" + description: "Configure sources, transforms, sinks, and enrichment tables for your observability pipeline." + order: 2 + } + api: { + title: "API" + description: "Configure Vector's observability API." + order: 3 + } + schema: { + title: "Schema" + description: "Configure Vector's internal schema system for type tracking and validation." + order: 4 + } + secrets: { + title: "Secrets" + description: "Configure secrets management for secure configuration." + order: 5 } } } diff --git a/website/layouts/partials/docs/sidebar.html b/website/layouts/partials/docs/sidebar.html index bf44a644da000..f9c81ed763cd8 100644 --- a/website/layouts/partials/docs/sidebar.html +++ b/website/layouts/partials/docs/sidebar.html @@ -47,7 +47,7 @@ {{ define "subsection-group" }} {{ $here := .here }} {{ $section := .section }} -{{ $open := .ctx.IsAncestor $section }} +{{ $open := or (.ctx.IsAncestor $section) (eq .ctx.RelPermalink $here) }}
{{ template "link" (dict "here" $here "url" .ctx.RelPermalink "title" (.ctx.Params.short | default .ctx.Title)) }} diff --git a/website/layouts/partials/heading.html b/website/layouts/partials/heading.html index 84f361f360851..853e0ffd9a5bb 100644 --- a/website/layouts/partials/heading.html +++ b/website/layouts/partials/heading.html @@ -1,5 +1,5 @@ {{ $id := (.id | default .text) | urlize }} -{{ $text := .text | markdownify }} +{{ $text := cond .mono .text (.text | markdownify) }} {{ $level := .level | default 2 }} {{ $iconSizes := dict "1" "7" "2" "6" "3" "5" "4" "4" "5" "3" "6" "3" }} {{ $n := index $iconSizes ($level | string) }} diff --git a/website/layouts/shortcodes/config-cross-links.html b/website/layouts/shortcodes/config-cross-links.html new file mode 100644 index 0000000000000..c7f111632fac8 --- /dev/null +++ b/website/layouts/shortcodes/config-cross-links.html @@ -0,0 +1,18 @@ +{{ $currentGroup := .Get "group" -}} + +{{- /* Define all configuration pages in one place */ -}} +{{ $configPages := slice + (dict "group" "global_options" "url" "/docs/reference/configuration/global-options/" "title" "Global Options" "desc" "Global settings like data directories and timezone") + (dict "group" "pipeline_components" "url" "/docs/reference/configuration/pipeline-components/" "title" "Pipeline Components" "desc" "Configure sources, transforms, sinks, and enrichment tables") + (dict "group" "api" "url" "/docs/reference/configuration/api/" "title" "API" "desc" "Configure Vector's observability API") + (dict "group" "schema" "url" "/docs/reference/configuration/schema/" "title" "Schema" "desc" "Configure Vector's internal schema system") + (dict "group" "secrets" "url" "/docs/reference/configuration/secrets/" "title" "Secrets" "desc" "Configure secrets management") +-}} +

For other top-level configuration options, see:

+
    +{{ range $configPages -}} +{{ if ne .group $currentGroup -}} +
  • {{ .title }} - {{ .desc }}
  • +{{ end -}} +{{ end -}} +
diff --git a/website/layouts/shortcodes/config/global.html b/website/layouts/shortcodes/config/global.html index 200c929a7c92f..5c70909b69fdc 100644 --- a/website/layouts/shortcodes/config/global.html +++ b/website/layouts/shortcodes/config/global.html @@ -1,2 +1,33 @@ -{{ $global := site.Data.docs.configuration.configuration }} -{{ partial "data.html" (dict "component_config" $global) }} \ No newline at end of file +{{- $config := site.Data.docs.configuration -}} +{{- $fields := $config.configuration -}} +{{- $groups := $config.groups -}} + +{{- /* Sort groups by their order field */ -}} +{{- $sortedGroups := slice -}} +{{- range $groupKey, $groupMeta := $groups -}} + {{- $sortedGroups = $sortedGroups | append (dict "key" $groupKey "meta" $groupMeta) -}} +{{- end -}} +{{- $sortedGroups = sort $sortedGroups "meta.order" -}} + +{{- /* Render each group in order */ -}} +{{- range $groupInfo := $sortedGroups -}} + {{- $groupKey := $groupInfo.key -}} + {{- $groupMeta := $groupInfo.meta -}} + + {{- /* Group header */ -}} +
+ {{ partial "heading.html" (dict "text" $groupMeta.title "level" 2 "anchor" true "icon" false) }} +

{{ $groupMeta.description }}

+
+ + {{- /* Collect fields for this group */ -}} + {{- $groupFields := dict -}} + {{- range $fieldKey, $fieldValue := $fields -}} + {{- if eq $fieldValue.group $groupKey -}} + {{- $groupFields = merge $groupFields (dict $fieldKey $fieldValue) -}} + {{- end -}} + {{- end -}} + + {{- /* Render fields in this group using the existing data.html partial */ -}} + {{ partial "data.html" (dict "component_config" $groupFields) }} +{{- end -}} \ No newline at end of file diff --git a/website/layouts/shortcodes/config/group.html b/website/layouts/shortcodes/config/group.html new file mode 100644 index 0000000000000..ec6138e389252 --- /dev/null +++ b/website/layouts/shortcodes/config/group.html @@ -0,0 +1,14 @@ +{{- $config := site.Data.docs.configuration -}} +{{- $fields := $config.configuration -}} +{{- $groupName := .Get "group" -}} + +{{- /* Filter fields to only those in the specified group */ -}} +{{- $groupFields := dict -}} +{{- range $fieldKey, $fieldValue := $fields -}} + {{- if eq $fieldValue.group $groupName -}} + {{- $groupFields = merge $groupFields (dict $fieldKey $fieldValue) -}} + {{- end -}} +{{- end -}} + +{{- /* Render fields using the existing data.html partial */ -}} +{{ partial "data.html" (dict "component_config" $groupFields) }}