From bec1ef4791bc268f40f60ea7cce814eef3f2f347 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 31 Mar 2025 16:39:31 -0600 Subject: [PATCH 01/43] update local playbook + ss schema id validation --- local-antora-playbook.yml | 2 +- .../pages/schema-reg/schema-id-validation.adoc | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index 3c2954f6d5..c77c6ac19e 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: 'main' + branches: 'DOC-666-cluster-config-in-Cloud' - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs'] diff --git a/modules/manage/pages/schema-reg/schema-id-validation.adoc b/modules/manage/pages/schema-reg/schema-id-validation.adoc index 5ecc6c52bd..d1de362525 100644 --- a/modules/manage/pages/schema-reg/schema-id-validation.adoc +++ b/modules/manage/pages/schema-reg/schema-id-validation.adoc @@ -1,6 +1,7 @@ = Server-Side Schema ID Validation :page-categories: Management, Schema Registry, rpk :page-aliases: manage:schema-id-validation.adoc +// tag::single-source[] :description: Learn about server-side schema ID validation for clients using SerDes that produce to Redpanda brokers, and learn how to configure Redpanda to inspect and reject records with invalid schema IDs. You can use server-side schema ID validation for clients using Confluent's SerDes format that produce to Redpanda brokers. You can also configure Redpanda to inspect and reject records with schema IDs that aren't valid according to the configured Subject Name strategy and registered with the Schema Registry. @@ -34,6 +35,7 @@ To use schema ID validation: === Enable schema ID validation +ifndef::env-cloud[] By default, server-side schema ID validation is disabled in Redpanda. To enable schema ID validation, change the xref:reference:cluster-properties.adoc#enable_schema_id_validation[`enable_schema_id_validation`] cluster property from its default value of `none` to either `redpanda` or `compat`: * `none`: Schema validation is disabled (no schema ID checks are done). Associated topic properties cannot be modified. @@ -46,6 +48,17 @@ For example, use `rpk` to set the value of `enable_schema_id_validation` to `red ---- rpk cluster config set enable_schema_id_validation redpanda --api-urls=:9644 ---- +endif::[] + +ifdef::env-cloud[] +To enable schema ID validation, set the `enable_schema_id_validation` cluster property to either `redpanda` or `compat`: + +* `none`: Schema validation is disabled (no schema ID checks are done). Associated topic properties cannot be modified. +* `redpanda`: Schema validation is enabled. Only Redpanda topic properties are accepted. +* `compat`: Schema validation is enabled. Both Redpanda and compatible topic properties are accepted. + +See xref:ROOT:manage:config-cluster.adoc[] +endif::[] === Set subject name strategy per topic @@ -124,4 +137,6 @@ rpk topic alter-config topic_foo \ --set redpanda.value.schema.id.validation=true \ --set redpanda.value.subject.name.strategy=RecordNameStrategy \ -X brokers=:9092 ----- \ No newline at end of file +---- + +// end::single-source[] \ No newline at end of file From 896d86afc51b8c13916a0f3dce675757a6f9b445 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 31 Mar 2025 17:06:26 -0600 Subject: [PATCH 02/43] update playbook --- local-antora-playbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index c77c6ac19e..559890672a 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: 'DOC-666-cluster-config-in-Cloud' + branches: [main, 'DOC-666-cluster-config-in-Cloud'] - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs'] From 399fde989260cdbdf88f9dc4fa8c7ced191d51bb Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 31 Mar 2025 17:17:29 -0600 Subject: [PATCH 03/43] fix link --- modules/manage/pages/schema-reg/schema-id-validation.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/manage/pages/schema-reg/schema-id-validation.adoc b/modules/manage/pages/schema-reg/schema-id-validation.adoc index d1de362525..45da10acf7 100644 --- a/modules/manage/pages/schema-reg/schema-id-validation.adoc +++ b/modules/manage/pages/schema-reg/schema-id-validation.adoc @@ -57,7 +57,7 @@ To enable schema ID validation, set the `enable_schema_id_validation` cluster pr * `redpanda`: Schema validation is enabled. Only Redpanda topic properties are accepted. * `compat`: Schema validation is enabled. Both Redpanda and compatible topic properties are accepted. -See xref:ROOT:manage:config-cluster.adoc[] +See xref:manage:config-cluster.adoc[] endif::[] === Set subject name strategy per topic From 95490155b3236f36225f51e244f193012742089f Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 31 Mar 2025 18:26:12 -0600 Subject: [PATCH 04/43] fix playbook --- local-antora-playbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index 559890672a..dfb2aa2f01 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: [main, 'DOC-666-cluster-config-in-Cloud'] + branches: 'DOC-666-Document-feature-Manage-form-factor-appropriate-cluster-configuration-properties-in-Console' - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs'] From 1e1f74ba5603cc7ac14d8e141d1d2481d06cf4b1 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 1 Apr 2025 16:33:35 -0600 Subject: [PATCH 05/43] single sourcing updates --- modules/console/pages/ui/edit-topic-configuration.adoc | 2 +- modules/console/pages/ui/programmable-push-filters.adoc | 2 +- modules/console/pages/ui/record-deserialization.adoc | 2 +- modules/console/pages/ui/schema-reg.adoc | 2 +- modules/manage/pages/audit-logging.adoc | 5 ++++- modules/manage/pages/audit-logging/audit-log-samples.adoc | 5 ++++- modules/manage/pages/schema-reg/schema-id-validation.adoc | 4 ++-- 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/modules/console/pages/ui/edit-topic-configuration.adoc b/modules/console/pages/ui/edit-topic-configuration.adoc index 8216c7bc11..6fb1fd1c4b 100644 --- a/modules/console/pages/ui/edit-topic-configuration.adoc +++ b/modules/console/pages/ui/edit-topic-configuration.adoc @@ -1,7 +1,7 @@ = Edit Topic Configuration in the {ui} :page-aliases: manage:console/edit-topic-configuration.adoc -// tag::single-source[] :description: Use {ui} to edit the configuration of existing topics in a cluster. +// tag::single-source[] {description} diff --git a/modules/console/pages/ui/programmable-push-filters.adoc b/modules/console/pages/ui/programmable-push-filters.adoc index f7a438627f..77ea12008e 100644 --- a/modules/console/pages/ui/programmable-push-filters.adoc +++ b/modules/console/pages/ui/programmable-push-filters.adoc @@ -1,8 +1,8 @@ = Filter Messages with JavaScript in {ui} :page-aliases: console:features/programmable-push-filters.adoc, reference:console/programmable-push-filters.adoc // Do not put page aliases in the single-sourced content -// tag::single-source[] :description: Learn how to filter Kafka records in {ui} based on your provided JavaScript code. +// tag::single-source[] You can use push-down filters in {ui} to search for specific records within a Kafka topic. diff --git a/modules/console/pages/ui/record-deserialization.adoc b/modules/console/pages/ui/record-deserialization.adoc index 57ded2e83f..592cf2f139 100644 --- a/modules/console/pages/ui/record-deserialization.adoc +++ b/modules/console/pages/ui/record-deserialization.adoc @@ -1,7 +1,7 @@ = View Deserialized Messages in {ui} :page-aliases: console:features/record-deserialization.adoc, manage:console/protobuf.adoc, reference:console/record-deserialization.adoc -// tag::single-source[] :description: Learn how {ui} deserializes messages. +// tag::single-source[] In Redpanda, the messages exchanged between producers and consumers contain raw bytes. Schemas work as an agreed-upon format, like a contract, for producers and consumers to serialize and deserialize those messages. If a producer breaks this contract, consumers can fail. diff --git a/modules/console/pages/ui/schema-reg.adoc b/modules/console/pages/ui/schema-reg.adoc index f0b5429c61..fd1bb7ebe5 100644 --- a/modules/console/pages/ui/schema-reg.adoc +++ b/modules/console/pages/ui/schema-reg.adoc @@ -1,8 +1,8 @@ = Use Schema Registry in {ui} :page-aliases: manage:schema-reg/schema-reg-ui.adoc :page-categories: Management, Schema Registry -// tag::single-source[] :description: Perform common Schema Registry management operations in the {ui}. +// tag::single-source[] In {ui}, the *Schema Registry* menu lists registered and verified schemas, including their serialization format and versions. Select an individual schema to see which topics it applies to. diff --git a/modules/manage/pages/audit-logging.adoc b/modules/manage/pages/audit-logging.adoc index e157b90bc4..0b98f49fbb 100644 --- a/modules/manage/pages/audit-logging.adoc +++ b/modules/manage/pages/audit-logging.adoc @@ -3,5 +3,8 @@ :page-context-links: [{"name": "Linux", "to": "manage:audit-logging.adoc" },{"name": "Kubernetes", "to": "manage:kubernetes/security/k-audit-logging.adoc" } ] :page-categories: Management, Security :env-linux: true +// tag::single-source[] -include::manage:partial$audit-logging.adoc[] \ No newline at end of file +include::manage:partial$audit-logging.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/manage/pages/audit-logging/audit-log-samples.adoc b/modules/manage/pages/audit-logging/audit-log-samples.adoc index d1032a1270..d29a8228fd 100644 --- a/modules/manage/pages/audit-logging/audit-log-samples.adoc +++ b/modules/manage/pages/audit-logging/audit-log-samples.adoc @@ -1,6 +1,7 @@ = Sample Audit Log Messages :description: Sample Redpanda audit log messages. :page-categories: Management, Security +// tag::single-source[] [NOTE] ==== @@ -534,4 +535,6 @@ Similar to the previous example, this example illustrates a user requesting clus "unmapped": {} } ---- -==== \ No newline at end of file +==== + +// end::single-source[] \ No newline at end of file diff --git a/modules/manage/pages/schema-reg/schema-id-validation.adoc b/modules/manage/pages/schema-reg/schema-id-validation.adoc index 45da10acf7..017988730d 100644 --- a/modules/manage/pages/schema-reg/schema-id-validation.adoc +++ b/modules/manage/pages/schema-reg/schema-id-validation.adoc @@ -1,8 +1,8 @@ = Server-Side Schema ID Validation :page-categories: Management, Schema Registry, rpk :page-aliases: manage:schema-id-validation.adoc -// tag::single-source[] :description: Learn about server-side schema ID validation for clients using SerDes that produce to Redpanda brokers, and learn how to configure Redpanda to inspect and reject records with invalid schema IDs. +// tag::single-source[] You can use server-side schema ID validation for clients using Confluent's SerDes format that produce to Redpanda brokers. You can also configure Redpanda to inspect and reject records with schema IDs that aren't valid according to the configured Subject Name strategy and registered with the Schema Registry. @@ -57,7 +57,7 @@ To enable schema ID validation, set the `enable_schema_id_validation` cluster pr * `redpanda`: Schema validation is enabled. Only Redpanda topic properties are accepted. * `compat`: Schema validation is enabled. Both Redpanda and compatible topic properties are accepted. -See xref:manage:config-cluster.adoc[] +See xref:manage:cluster-maintenance/config-cluster.adoc[] endif::[] === Set subject name strategy per topic From d2efddf892bbc7928ba04f27884b4eede6197c5f Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 1 Apr 2025 17:08:16 -0600 Subject: [PATCH 06/43] conditionalize audit logging --- .../audit-logging/audit-log-samples.adoc | 4 +++ modules/manage/partials/audit-logging.adoc | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/modules/manage/pages/audit-logging/audit-log-samples.adoc b/modules/manage/pages/audit-logging/audit-log-samples.adoc index d29a8228fd..7a79e1a4d3 100644 --- a/modules/manage/pages/audit-logging/audit-log-samples.adoc +++ b/modules/manage/pages/audit-logging/audit-log-samples.adoc @@ -3,10 +3,12 @@ :page-categories: Management, Security // tag::single-source[] +ifndef::env-cloud[] [NOTE] ==== include::shared:partial$enterprise-license.adoc[] ==== +endif::[] Redpanda's audit logs comply with version 1.0.0 of the https://github.com/ocsf[Open Cybersecurity Schema Framework (OCSF)]. This provides a predictable and extensible solution that works seamlessly with industry standard tools. This page aggregates several sample log files covering a range of scenarios. @@ -329,6 +331,7 @@ This shows a message for a scenario where a user requests a set of metadata usin ---- ==== +ifndef::env-cloud[] == Admin API events The following examples show audit messages related to use of the Redpanda Admin API. @@ -536,5 +539,6 @@ Similar to the previous example, this example illustrates a user requesting clus } ---- ==== +endif::[] // end::single-source[] \ No newline at end of file diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index e0db54b47e..568f355b9d 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -1,7 +1,9 @@ +ifndef::env-cloud[] [NOTE] ==== include::shared:partial$enterprise-license.adoc[] ==== +endif::[] Many scenarios for streaming data include the need for fine-grained auditing of user activity related to the system. This is especially true for regulated industries such as finance, healthcare, and the public sector. Complying with https://pcidssguide.com/whats-new-in-pci-dss-v4-0/[PCI DSS v4] standards, for example, requires verbose and detailed activity auditing, alerting, and analysis capabilities. @@ -28,6 +30,7 @@ Redpanda's audit logging mechanism supports several options to control the volum ifdef::env-kubernetes[You can configure these options directly in either the Helm values or the Redpanda resource.] ifdef::env-kubernetes[] +ifndef::env-cloud[] * `auditLogging.enabled`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`] cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.partitions`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`] cluster property to define the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. * `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. @@ -42,6 +45,14 @@ Even though audited event messages are stored to a specialized immutable topic, IMPORTANT: You cannot change the values of `auditLogging.partitions` and `auditLogging.replicationFactor` after enabling audit logging because these settings impact the creation of the `_redpanda.audit_log` topic. The Kafka API allows you to add partitions or alter the replication factor after enabling audit logging, but Redpanda prevents you from altering these two configuration values directly. endif::[] +ifndef::env-cloud[] + +ifdef::env-cloud[] +* `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. +* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. +endif[] + ifndef::env-kubernetes[] * xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`]: Integer value defining the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. @@ -300,6 +311,16 @@ Some key tuning recommendations for your audit logging settings include: * Set `audit_enabled` to `true`. * <>. +The sequence of commands in `rpk` for this audit log configuration is: + + rpk cluster config set audit_log_num_partitions 6 + rpk cluster config set audit_log_replication_factor 5 + rpk cluster config set audit_enabled_event_types '["management","describe","authenticate"]' + rpk cluster config set audit_excluded_topics '["topic1","topic2"]' + rpk cluster config set audit_excluded_principals '["User:principal1", "principal2"]' + rpk cluster config set audit_enabled true + rpk topic alter-config _redpanda.audit_log --set retention.ms=259200000 + The sequence of commands in `rpk` for this audit log configuration is: rpk cluster config set audit_log_num_partitions 6 @@ -319,6 +340,7 @@ With audit logging, the pattern of message generation may be very different from A typical scenario with audit logging is to route the messages to an analytics platform like Splunk. If your retention period is too long, you will find that you are storing excessive amounts of replicated messages in both Redpanda and in your analytics suite. Identifying the right balance of retention and replication settings minimizes this duplication while retaining your data in a system that provides actionable intelligence. +ifndef::env-cloud[] Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic or by setting xref:reference:cluster-properties.adoc#delete_retention_ms[`delete_retention_ms`] at the cluster level. == Next steps @@ -329,3 +351,11 @@ include::shared:partial$suggested-reading.adoc[] - xref:reference:topic-properties.adoc[] - xref:develop:config-topics.adoc[] +endif::[] + +ifdef::env-cloud[] +Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting `retention.ms` for the `_redpanda.audit_log` topic. + +== Next steps +xref:manage:cluster-maintenance/audit-log-samples.adoc[See samples of audit log messages]. +endif::[] \ No newline at end of file From 866b6bc33bf616a5f90c0dcaefcff93ee9235635 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 1 Apr 2025 20:43:46 -0600 Subject: [PATCH 07/43] minor edits --- .../audit-logging/audit-log-samples.adoc | 4 +-- modules/manage/partials/audit-logging.adoc | 27 +++++++++++++------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/modules/manage/pages/audit-logging/audit-log-samples.adoc b/modules/manage/pages/audit-logging/audit-log-samples.adoc index 7a79e1a4d3..7ba297e6b8 100644 --- a/modules/manage/pages/audit-logging/audit-log-samples.adoc +++ b/modules/manage/pages/audit-logging/audit-log-samples.adoc @@ -133,7 +133,7 @@ This scenario illustrates a common failure where a user entered the wrong creden The Redpanda Kafka API offers a wide array of options for interacting with your Redpanda clusters. Following are examples of messages from common interactions with the API. -.Create ACL Entry +.Create ACL entry [%collapsible] ==== This example illustrates an ACL update that also requires a superuser authentication. It lists the edited ACL and the updated permissions. This is a management type event. @@ -237,7 +237,7 @@ This example illustrates an ACL update that also requires a superuser authentica ---- ==== -.Metadata Request (with counts) +.Metadata request (with counts) [%collapsible] ==== This shows a message for a scenario where a user requests a set of metadata using rpk. It provides detailed information on the type of request and the information sent to the user. This is a describe type event. diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 568f355b9d..f225b64033 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -13,6 +13,8 @@ With audit logging enabled, there should be no noticeable changes in performance NOTE: Audit logging is configured at the cluster level. Redpanda supports excluding specific topics or principals from auditing to help reduce noise in the log. Audit logging is disabled by default. + + == Audit log flow The Redpanda audit log mechanism functions similar to the Kafka flow you may be familiar with. When a user interacts with another user or with a topics, Redpanda writes an event to a specialized audit topic. The audit topic is immutable. Only Redpanda can write to it. Users are prevented from writing to the audit topic directly and the Kafka API cannot create or delete it. @@ -25,12 +27,23 @@ Messages recorded to the audit log topic comply with the https://schema.ocsf.io/ == Audit log configuration options +ifdef::env-cloud[] + +Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. + +* `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. +* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. + +To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. +endif::[] + +ifndef::env-cloud[] Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. Configuration is applied at the cluster level using the standard xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster configuration mechanism]. ifdef::env-kubernetes[You can configure these options directly in either the Helm values or the Redpanda resource.] ifdef::env-kubernetes[] -ifndef::env-cloud[] * `auditLogging.enabled`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`] cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.partitions`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`] cluster property to define the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. * `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. @@ -45,13 +58,6 @@ Even though audited event messages are stored to a specialized immutable topic, IMPORTANT: You cannot change the values of `auditLogging.partitions` and `auditLogging.replicationFactor` after enabling audit logging because these settings impact the creation of the `_redpanda.audit_log` topic. The Kafka API allows you to add partitions or alter the replication factor after enabling audit logging, but Redpanda prevents you from altering these two configuration values directly. endif::[] -ifndef::env-cloud[] - -ifdef::env-cloud[] -* `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. -* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. -* `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. -endif[] ifndef::env-kubernetes[] * xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. @@ -136,6 +142,11 @@ a|* All Admin API calls == Enable audit logging All audit log settings are applied at the cluster level. + +ifdef::env-cloud[] +See xref:manage:cluster-maintenance/config-cluster.adoc[] +endif::[] + You can configure audit log settings in the Redpanda Helm chart, using Helm values or the Redpanda resource with the Redpanda Operator. ifdef::env-kubernetes[] From 093d21eb658b3226a4b1a5172595042dc4fbcd14 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 12:40:34 -0600 Subject: [PATCH 08/43] single source data transforms --- modules/console/pages/ui/data-transforms.adoc | 13 +++++++------ modules/develop/pages/data-transforms/build.adoc | 8 ++++++++ .../develop/pages/data-transforms/configure.adoc | 12 ++++++++++-- modules/develop/pages/data-transforms/deploy.adoc | 5 ++++- .../pages/data-transforms/how-transforms-work.adoc | 4 +++- modules/develop/pages/data-transforms/monitor.adoc | 13 +++++++++++-- .../pages/data-transforms/run-transforms.adoc | 2 -- modules/develop/pages/data-transforms/test.adoc | 5 ++++- .../develop/partials/data-transforms-ga-notice.adoc | 1 - 9 files changed, 47 insertions(+), 16 deletions(-) delete mode 100644 modules/develop/partials/data-transforms-ga-notice.adoc diff --git a/modules/console/pages/ui/data-transforms.adoc b/modules/console/pages/ui/data-transforms.adoc index 0aac8515ae..a8e578694c 100644 --- a/modules/console/pages/ui/data-transforms.adoc +++ b/modules/console/pages/ui/data-transforms.adoc @@ -1,5 +1,5 @@ -= Manage Data Transforms in Redpanda Console -:description: You can use Redpanda Console to monitor the status and performance metrics of your transform functions. You can also view detailed logs and delete transform functions when they are no longer needed. += Manage Data Transforms in {ui} +:description: You can use {ui} to monitor the status and performance metrics of your transform functions. You can also view detailed logs and delete transform functions when they are no longer needed. {description} @@ -7,6 +7,7 @@ Before you begin, ensure that you have the following: + - Redpanda Console installed. - xref:develop:data-transforms/configure.adoc#enable-transforms[Data transforms enabled] in your Redpanda cluster. - At least one transform function deployed to your Redpanda cluster. @@ -14,7 +15,7 @@ Before you begin, ensure that you have the following: [[monitor]] == Monitor transform functions -To monitor transform functions in Redpanda Console: +To monitor transform functions: . Navigate to the *Transforms* menu. . Click on the name of a transform function to view detailed information: @@ -25,18 +26,18 @@ To monitor transform functions in Redpanda Console: [[logs]] == View logs -To view logs for a transform function in Redpanda Console: +To view logs for a transform function: . Navigate to the *Transforms* menu. . Click on the name of a transform function. . Click the *Logs* tab to see the logs. -Redpanda Console displays a limited number of logs for transform functions. To view the full history of logs, use the xref:develop:data-transforms/monitor.adoc#logs[`rpk` command-line tool]. +{ui} displays a limited number of logs for transform functions. To view the full history of logs, use the xref:develop:data-transforms/monitor.adoc#logs[`rpk` command-line tool]. [[delete]] == Delete transform functions -To delete a transform function in Redpanda Console: +To delete a transform function: 1. Navigate to the *Transforms* menu. 2. Find the transform function you want to delete from the list. diff --git a/modules/develop/pages/data-transforms/build.adoc b/modules/develop/pages/data-transforms/build.adoc index c95b468850..58813b5f19 100644 --- a/modules/develop/pages/data-transforms/build.adoc +++ b/modules/develop/pages/data-transforms/build.adoc @@ -1,6 +1,7 @@ = Develop Data Transforms :description: Learn how to initialize a data transforms project and write transform functions in your chosen language. :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] {description} @@ -8,7 +9,12 @@ You must have the following development tools installed on your host machine: +ifdef::env-cloud[] +* The xref:manage:rpk/rpk-install.adoc[`rpk` command-line client] installed. +endif::[] +ifndef::env-cloud[] * The xref:get-started:rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. +endif::[] * For Golang projects, you must have at least version 1.20 of https://go.dev/doc/install[Go^]. * For Rust projects, you must have the latest stable version of https://rustup.rs/[Rust^]. * For JavaScript and TypeScript projects, you must have the https://nodejs.org/en/download/package-manager[latest long-term-support release of Node.js^]. @@ -458,3 +464,5 @@ xref:develop:data-transforms/configure.adoc[] - xref:develop:data-transforms/how-transforms-work.adoc[] - xref:reference:data-transforms/sdks.adoc[] - xref:reference:rpk/rpk-transform/rpk-transform.adoc[`rpk transform` commands] + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index 063b7f6edc..03d632a1df 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -1,6 +1,7 @@ = Configure Data Transforms :description: pass:q[Learn how to configure data transforms in Redpanda, including editing the `transform.yaml` file, environment variables, and memory settings. This topic covers both the configuration of transform functions and the WebAssembly (Wasm) engine's environment.] :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] {description} @@ -62,6 +63,7 @@ This section covers how to configure the Wasm engine environment using Redpanda To use data transforms, you must enable it for a Redpanda cluster using the xref:reference:properties/cluster-properties.adoc#data_transforms_enabled[`data_transforms_enabled`] property. +ifndef::env-cloud[] [[resources]] === Configure memory resources for data transforms @@ -74,6 +76,7 @@ Set the following properties based on the number of functions you have and the a - xref:reference:properties/cluster-properties.adoc#data_transforms_per_function_memory_limit[`data_transforms_per_function_memory_limit`]: Adjust this setting if individual transform functions require more memory to process records efficiently. Reducing it may cause memory errors in complex transforms. The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. When that limit is hit, Redpanda cannot allocate memory for the VM and the transforms stay in `errored` states. +endif::[] [[binary-size]] === Configure maximum binary size @@ -91,22 +94,27 @@ Adjust this setting to control how frequently the transform function's progress [[log]] === Configure transform logging - +ifndef::env-cloud[] Redpanda provides several properties to configure logging for data transforms: - xref:reference:properties/cluster-properties.adoc#data_transforms_logging_buffer_capacity_bytes[`data_transforms_logging_buffer_capacity_bytes`]: Increase this value if your transform logs are large or if you need to buffer more log data before flushing. Reducing this value may cause more frequent log flushing. - xref:reference:properties/cluster-properties.adoc#data_transforms_logging_flush_interval_ms[`data_transforms_logging_flush_interval_ms`]: Adjust this value to control how frequently logs are flushed to the `transform_logs` topic. Shorter intervals provide more frequent log updates but can increase load. Longer intervals reduce load but may delay log updates. +endif::[] - xref:reference:properties/cluster-properties.adoc#data_transforms_logging_line_max_bytes[`data_transforms_logging_line_max_bytes`]: Increase this value if your log messages are frequently truncated. Setting this value too low may truncate important log information. +ifndef::env-cloud[] [[runtime-limit]] === Configure runtime limits You can set the maximum runtime for starting up a data transform and the time it takes for a single record to be transformed using the xref:reference:properties/cluster-properties.adoc#data_transforms_runtime_limit_ms[`data_transforms_runtime_limit_ms`] property. Adjust this value only if your transform functions need more time to process each record or to start up. +endif::[] == Next steps -xref:develop:data-transforms/deploy.adoc[]. +xref:develop:data-transforms/deploy.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/deploy.adoc b/modules/develop/pages/data-transforms/deploy.adoc index 5fc358cc10..77f4b775bc 100644 --- a/modules/develop/pages/data-transforms/deploy.adoc +++ b/modules/develop/pages/data-transforms/deploy.adoc @@ -1,6 +1,7 @@ = Deploy Data Transforms :description: Learn how to build, deploy, share, and troubleshoot data transforms in Redpanda. :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] {description} @@ -145,4 +146,6 @@ All transform functions must register a callback with the `OnRecordWritten()` me == Next steps -xref:develop:data-transforms/monitor.adoc[Set up monitoring] for data transforms. \ No newline at end of file +xref:develop:data-transforms/monitor.adoc[Set up monitoring] for data transforms. + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/how-transforms-work.adoc b/modules/develop/pages/data-transforms/how-transforms-work.adoc index 7f35a46096..a27798f07c 100644 --- a/modules/develop/pages/data-transforms/how-transforms-work.adoc +++ b/modules/develop/pages/data-transforms/how-transforms-work.adoc @@ -1,7 +1,7 @@ = How Data Transforms Work :page-categories: Development, Stream Processing, Data Transforms -include::develop:partial$data-transforms-ga-notice.adoc[] :description: Learn how Redpanda data transforms work. +// tag::single-source[] Redpanda provides the framework to build and deploy inline transformations (data transforms) on data written to Redpanda topics, delivering processed and validated data to consumers in the format they expect. Redpanda does this directly inside the broker, eliminating the need to manage a separate stream processing environment or use third-party tools. @@ -77,3 +77,5 @@ This section outlines the limitations of data transforms. These constraints are - xref:reference:data-transform-golang-sdk.adoc[] - xref:reference:data-transform-rust-sdk.adoc[] - xref:reference:rpk/rpk-transform/rpk-transform.adoc[`rpk transform` commands] + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/monitor.adoc b/modules/develop/pages/data-transforms/monitor.adoc index b1bd07f4ed..b0a3363d32 100644 --- a/modules/develop/pages/data-transforms/monitor.adoc +++ b/modules/develop/pages/data-transforms/monitor.adoc @@ -1,12 +1,19 @@ = Monitor Data Transforms :description: This topic provides guidelines on how to monitor the health of your data transforms and view logs. :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] {description} == Prerequisites -xref:manage:monitoring.adoc[Set up monitoring] for your Redpanda cluster. +ifndef::env-cloud[] +xref:manage:monitoring.adoc[Set up monitoring] for your cluster. +endif::[] + +ifdef::env-cloud[] +xref:manage:monitor-cloud.adoc[Set up monitoring] for your cluster. +endif::[] == Performance @@ -62,7 +69,7 @@ rpk transform logs Replace `` with the xref:develop:data-transforms/configure.adoc[configured name] of the transform function. -TIP: You can also xref:console:ui/data-transforms.adoc#logs[view logs in Redpanda Console]. +TIP: You can also xref:console:ui/data-transforms.adoc#logs[view logs in {ui}]. By default, Redpanda provides several settings to manage logging for data transforms, such as buffer capacity, flush interval, and maximum log line length. These settings ensure that logging operates efficiently without overwhelming the system. However, you may need to adjust these settings based on your specific requirements and workloads. For information on how to configure logging, see the xref:develop:data-transforms/configure.adoc#log[Configure transform logging] section of the configuration guide. @@ -70,3 +77,5 @@ By default, Redpanda provides several settings to manage logging for data transf - xref:reference:public-metrics-reference.adoc#data_transform_metrics[Data transforms metrics] - xref:console:ui/data-transforms.adoc[] + +// end::single-source[] diff --git a/modules/develop/pages/data-transforms/run-transforms.adoc b/modules/develop/pages/data-transforms/run-transforms.adoc index 7909456592..fb53b05d5f 100644 --- a/modules/develop/pages/data-transforms/run-transforms.adoc +++ b/modules/develop/pages/data-transforms/run-transforms.adoc @@ -3,6 +3,4 @@ :page-context-links: [{"name": "Linux", "to": "develop:data-transforms/run-transforms.adoc" },{"name": "Kubernetes", "to": "develop:data-transforms/k-run-transforms.adoc" } ] :page-categories: Development, Stream Processing, Data Transforms -include::develop:partial$data-transforms-ga-notice.adoc[] - include::develop:partial$run-transforms.adoc[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/test.adoc b/modules/develop/pages/data-transforms/test.adoc index 8be044a3c9..6bad5185b6 100644 --- a/modules/develop/pages/data-transforms/test.adoc +++ b/modules/develop/pages/data-transforms/test.adoc @@ -1,5 +1,6 @@ = Write Integration Tests for Transform Functions :description: pass:q[Learn how to write integration tests for data transform functions in Redpanda, including setting up unit tests and using testcontainers for integration tests.] +// tag::single-source[] {description} @@ -80,4 +81,6 @@ This will execute all tests in the current directory. Integration tests verify that your transform functions work correctly in a real Redpanda environment. You can use https://github.com/testcontainers/testcontainers-go/tree/main[testcontainers] to set up and manage a Redpanda instance for testing. -For more detailed examples and helper code for setting up integration tests, refer to the SDK integration tests on https://github.com/redpanda-data/redpanda/tree/dev/src/transform-sdk/tests[GitHub]. \ No newline at end of file +For more detailed examples and helper code for setting up integration tests, refer to the SDK integration tests on https://github.com/redpanda-data/redpanda/tree/dev/src/transform-sdk/tests[GitHub]. + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/partials/data-transforms-ga-notice.adoc b/modules/develop/partials/data-transforms-ga-notice.adoc deleted file mode 100644 index a5b0ce3c05..0000000000 --- a/modules/develop/partials/data-transforms-ga-notice.adoc +++ /dev/null @@ -1 +0,0 @@ -TIP: Data transforms is generally available for all Redpanda Community and Redpanda Enterprise Edition users. To unlock this feature in Redpanda Cloud, contact https://support.redpanda.com/hc/en-us/requests/new[Redpanda support^]. \ No newline at end of file From fcdadc478dcd055aa7195c7c7261c88f85c0a26c Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 14:08:46 -0600 Subject: [PATCH 09/43] single source transforms SDK reference --- modules/console/pages/ui/data-transforms.adoc | 5 ++++- modules/develop/pages/data-transforms/run-transforms.adoc | 5 ++++- modules/reference/pages/data-transforms/golang-sdk.adoc | 6 +++++- modules/reference/pages/data-transforms/js/js-sdk-sr.adoc | 5 ++++- modules/reference/pages/data-transforms/js/js-sdk.adoc | 5 ++++- modules/reference/pages/data-transforms/rust-sdk.adoc | 3 +++ 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/modules/console/pages/ui/data-transforms.adoc b/modules/console/pages/ui/data-transforms.adoc index a8e578694c..bf352d0ac3 100644 --- a/modules/console/pages/ui/data-transforms.adoc +++ b/modules/console/pages/ui/data-transforms.adoc @@ -1,5 +1,6 @@ = Manage Data Transforms in {ui} :description: You can use {ui} to monitor the status and performance metrics of your transform functions. You can also view detailed logs and delete transform functions when they are no longer needed. +// tag::single-source[] {description} @@ -50,4 +51,6 @@ Deleting a transform function will remove it from the cluster and stop any furth - xref:develop:data-transforms/how-transforms-work.adoc[] - xref:develop:data-transforms/deploy.adoc[] -- xref:develop:data-transforms/monitor.adoc[] \ No newline at end of file +- xref:develop:data-transforms/monitor.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/run-transforms.adoc b/modules/develop/pages/data-transforms/run-transforms.adoc index fb53b05d5f..d9f3ca346a 100644 --- a/modules/develop/pages/data-transforms/run-transforms.adoc +++ b/modules/develop/pages/data-transforms/run-transforms.adoc @@ -2,5 +2,8 @@ :description: Learn how to build and deploy your first transform function in Linux deployments. :page-context-links: [{"name": "Linux", "to": "develop:data-transforms/run-transforms.adoc" },{"name": "Kubernetes", "to": "develop:data-transforms/k-run-transforms.adoc" } ] :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] -include::develop:partial$run-transforms.adoc[] \ No newline at end of file +include::develop:partial$run-transforms.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/reference/pages/data-transforms/golang-sdk.adoc b/modules/reference/pages/data-transforms/golang-sdk.adoc index 6b58636335..fb70e438b6 100644 --- a/modules/reference/pages/data-transforms/golang-sdk.adoc +++ b/modules/reference/pages/data-transforms/golang-sdk.adoc @@ -2,6 +2,7 @@ :description: Work with data transform APIs in Redpanda using Go. :page-aliases: labs:data-transform/data-transform-api.adoc, reference:data-transform-api.adoc, reference:data-transform-golang-sdk.adoc :page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] The API reference is in the Go package documentation: @@ -9,7 +10,10 @@ The API reference is in the Go package documentation: - https://pkg.go.dev/github.com/redpanda-data/redpanda/src/transform-sdk/go/transform/sr[Schema Registry client library]: This library provides data transforms with access to the Schema Registry built into Redpanda. +// end::single-source[] + == Suggested reading - xref:develop:data-transforms/versioning-compatibility.adoc[] -- xref:develop:data-transforms/upgrade.adoc[] \ No newline at end of file +- xref:develop:data-transforms/upgrade.adoc[] + diff --git a/modules/reference/pages/data-transforms/js/js-sdk-sr.adoc b/modules/reference/pages/data-transforms/js/js-sdk-sr.adoc index 6010652f37..bfed92c695 100644 --- a/modules/reference/pages/data-transforms/js/js-sdk-sr.adoc +++ b/modules/reference/pages/data-transforms/js/js-sdk-sr.adoc @@ -1,5 +1,6 @@ = JavaScript Schema Registry API for Data Transforms :description: Work with Schema Registry in data transforms using JavaScript. +// tag::single-source[] This page contains the API reference for the Schema Registry client library of the data transforms JavaScript SDK. @@ -129,4 +130,6 @@ Client interface for interacting with Redpanda Schema Registry. == Suggested reading -xref:reference:data-transforms/js/js-sdk.adoc[] \ No newline at end of file +xref:reference:data-transforms/js/js-sdk.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/reference/pages/data-transforms/js/js-sdk.adoc b/modules/reference/pages/data-transforms/js/js-sdk.adoc index 148a48e273..6232d05e5e 100644 --- a/modules/reference/pages/data-transforms/js/js-sdk.adoc +++ b/modules/reference/pages/data-transforms/js/js-sdk.adoc @@ -1,6 +1,7 @@ = JavaScript API for Data Transforms :description: Work with data transforms using JavaScript. :page-aliases: reference:data-transforms/js-sdk.adoc +// tag::single-source[] This page contains the API reference for the data transforms client library of the JavaScript SDK. @@ -134,4 +135,6 @@ Records may have a collection of headers attached to them. Headers are opaque to == Suggested reading -xref:reference:data-transforms/js/js-sdk-sr.adoc[] \ No newline at end of file +xref:reference:data-transforms/js/js-sdk-sr.adoc[] + +// end::single-source[] \ No newline at end of file diff --git a/modules/reference/pages/data-transforms/rust-sdk.adoc b/modules/reference/pages/data-transforms/rust-sdk.adoc index 3dcccfc1de..160e258be6 100644 --- a/modules/reference/pages/data-transforms/rust-sdk.adoc +++ b/modules/reference/pages/data-transforms/rust-sdk.adoc @@ -1,6 +1,7 @@ = Rust SDK for Data Transforms :description: Work with data transforms using Rust. :page-aliases: reference:data-transform-rust-sdk.adoc +// tag::single-source[] The API reference is in the crate documentation: @@ -8,6 +9,8 @@ The API reference is in the crate documentation: - https://docs.rs/redpanda-transform-sdk-sr/latest/redpanda_transform_sdk_sr/[Schema Registry client library]: This crate provides data transforms with access to the Schema Registry built into Redpanda. +// end::single-source[] + == Suggested reading - xref:develop:data-transforms/versioning-compatibility.adoc[] From 549aa20d073ee706e63868445ef079b5d350edca Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 15:20:58 -0600 Subject: [PATCH 10/43] fix links --- modules/develop/pages/data-transforms/build.adoc | 6 ++---- .../develop/pages/data-transforms/how-transforms-work.adoc | 4 ++-- .../develop/pages/data-transforms/run-transforms-index.adoc | 5 ++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/develop/pages/data-transforms/build.adoc b/modules/develop/pages/data-transforms/build.adoc index 58813b5f19..53a5311965 100644 --- a/modules/develop/pages/data-transforms/build.adoc +++ b/modules/develop/pages/data-transforms/build.adoc @@ -52,9 +52,7 @@ For example, if you choose `tinygo-no-goroutines`, the following project files a The `transform.go` file contains a boilerplate transform function. The `transform.yaml` file specifies the configuration settings for the transform function. -See also: - -- xref:develop:data-transforms/configure.adoc[] +See also: xref:develop:data-transforms/configure.adoc[] == Build transform functions @@ -290,7 +288,7 @@ See also: - xref:develop:data-transforms/monitor#logs[View logs for transform functions] - xref:develop:data-transforms/monitor.adoc[Monitor data transforms] - xref:develop:data-transforms/configure.adoc#log[Configure transform logging] -- xref:reference:rpk/rpk-transform/rpk-transform-logs.adoc[] +- xref:reference:rpk/rpk-transform/rpk-transform-logs.adoc[`rpk transform logs` reference] === Avoid state management diff --git a/modules/develop/pages/data-transforms/how-transforms-work.adoc b/modules/develop/pages/data-transforms/how-transforms-work.adoc index a27798f07c..833f489814 100644 --- a/modules/develop/pages/data-transforms/how-transforms-work.adoc +++ b/modules/develop/pages/data-transforms/how-transforms-work.adoc @@ -74,8 +74,8 @@ This section outlines the limitations of data transforms. These constraints are == Suggested reading -- xref:reference:data-transform-golang-sdk.adoc[] -- xref:reference:data-transform-rust-sdk.adoc[] +- xref:reference:data-transforms/golang-sdk.adoc[] +- xref:reference:data-transforms/rust-sdk.adoc[] - xref:reference:rpk/rpk-transform/rpk-transform.adoc[`rpk transform` commands] // end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/run-transforms-index.adoc b/modules/develop/pages/data-transforms/run-transforms-index.adoc index e19100c752..19c8f12578 100644 --- a/modules/develop/pages/data-transforms/run-transforms-index.adoc +++ b/modules/develop/pages/data-transforms/run-transforms-index.adoc @@ -2,4 +2,7 @@ :description: Choose your deployment environment to get started with building and deploying your first transform function in Redpanda. :page-aliases: reference:rpk/rpk-wasm/rpk-wasm.adoc, reference:rpk/rpk-wasm.adoc, reference:rpk/rpk-wasm/rpk-wasm-deploy.adoc, reference:rpk/rpk-wasm/rpk-wasm-generate.adoc, reference:rpk/rpk-wasm/rpk-wasm-remove.adoc, data-management:data-transform.adoc, labs:data-transform/index.adoc :page-layout: index -:page-categories: Development, Stream Processing, Data Transforms \ No newline at end of file +:page-categories: Development, Stream Processing, Data Transforms +// tag::single-source[] + +// end::single-source[] \ No newline at end of file From 3639348713068032c633c8d507972c7444b815b9 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 16:38:35 -0600 Subject: [PATCH 11/43] tag cluster properties in cloud --- .../pages/properties/cluster-properties.adoc | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index a83ef31dc7..ea6f1bb999 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -138,6 +138,7 @@ Defines the number of bytes allocated by the internal audit client for audit mes --- +// tag::audit_enabled[] === audit_enabled include::reference:partial$enterprise-licensed-property.adoc[] @@ -155,7 +156,9 @@ Enables or disables audit logging. When you set this to true, Redpanda checks fo *Default:* `false` --- +// end::audit-enabled[] +// tag::audit_enabled_event_types[] === audit_enabled_event_types List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. @@ -169,7 +172,9 @@ List of strings in JSON style identifying the event types to include in the audi *Default:* `[management, authenticate, admin]` --- +// end::audit-enabled-event-types[] +// tag::audit_excluded_principals[] === audit_excluded_principals List of user principals to exclude from auditing. @@ -183,6 +188,7 @@ List of user principals to exclude from auditing. *Default:* `null` --- +// end::audit_excluded_principals[] === audit_excluded_topics @@ -264,6 +270,7 @@ Defines the maximum amount of memory in bytes used by the audit buffer in each s --- +// tag::auto_create_topics_enabled[] === auto_create_topics_enabled Allow automatic topic creation. To prevent excess topics, this property is not supported on Redpanda Cloud BYOC and Dedicated clusters. You should explicitly manage topic creation for these Redpanda Cloud clusters. @@ -279,6 +286,7 @@ If you produce to a topic that doesn't exist, the topic will be created with def *Default:* `false` --- +// end::auto-create-topics-enabled[] === cluster_id @@ -604,6 +612,7 @@ Timeout, in milliseconds, to wait for new topic creation. --- +// tag::data_transforms_binary_max_size[] === data_transforms_binary_max_size The maximum size for a deployable WebAssembly binary that the broker can store. @@ -617,7 +626,9 @@ The maximum size for a deployable WebAssembly binary that the broker can store. *Default:* `10485760` --- +// end::data_transforms_binary_max_size[] +// tag::data_transforms_commit_interval_ms[] === data_transforms_commit_interval_ms The commit interval at which data transforms progress. @@ -635,7 +646,9 @@ The commit interval at which data transforms progress. *Default:* `3000` --- +// end::data_transforms_commit_interval_ms[] +// tag::data_transforms_enabled[] === data_transforms_enabled Enables WebAssembly-powered data transforms directly in the broker. When `data_transforms_enabled` is set to `true`, Redpanda reserves memory for data transforms, even if no transform functions are currently deployed. This memory reservation ensures that adequate resources are available for transform functions when they are needed, but it also means that some memory is allocated regardless of usage. @@ -649,6 +662,8 @@ Enables WebAssembly-powered data transforms directly in the broker. When `data_t *Default:* `false` --- +// end::data_transforms_enabled[] + === data_transforms_logging_buffer_capacity_bytes @@ -684,6 +699,7 @@ Flush interval for transform logs. When a timer expires, pending logs are collec --- +// tag::data_transforms_logging_line_max_bytes[] === data_transforms_logging_line_max_bytes Transform log lines truncate to this length. Truncation occurs after any character escaping. @@ -699,6 +715,7 @@ Transform log lines truncate to this length. Truncation occurs after any charact *Default:* `1024` --- +// end::::data_transforms_logging_line_max_bytes[] === data_transforms_per_core_memory_reservation @@ -714,6 +731,7 @@ The amount of memory to reserve per core for data transform (Wasm) virtual machi --- +// tag::data_transforms_per_function_memory_limit[] === data_transforms_per_function_memory_limit The amount of memory to give an instance of a data transform (Wasm) virtual machine. The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. @@ -727,6 +745,7 @@ The amount of memory to give an instance of a data transform (Wasm) virtual mach *Default:* `2097152` --- +// end::data_transforms_per_function_memory_limit[] === data_transforms_read_buffer_memory_percentage @@ -884,6 +903,7 @@ Default number of quota tracking windows. --- +// tag::default_topic_partitions[] === default_topic_partitions Default number of partitions per topic. @@ -901,6 +921,7 @@ Default number of partitions per topic. *Default:* `1` --- +// end::default-topic-partitions[] === default_topic_replications @@ -982,6 +1003,7 @@ Disables the cluster recovery loop. --- +// tag::disable_metrics[] === disable_metrics Disable registering the metrics exposed on the internal `/metrics` endpoint. @@ -995,7 +1017,9 @@ Disable registering the metrics exposed on the internal `/metrics` endpoint. *Default:* `false` --- +// end::disable_metrics[] +// tag::disable_public_metrics[] === disable_public_metrics Disable registering the metrics exposed on the `/public_metrics` endpoint. @@ -1009,6 +1033,7 @@ Disable registering the metrics exposed on the `/public_metrics` endpoint. *Default:* `false` --- +// end::disable_public_metrics[] === disk_reservation_percent @@ -1057,6 +1082,7 @@ Limits the write rate for the controller log. --- +// tag::enable_idempotence[] === enable_idempotence Enable idempotent producers. @@ -1070,6 +1096,7 @@ Enable idempotent producers. *Default:* `true` --- +// end::enable_idempotence[] === enable_leader_balancer @@ -1163,6 +1190,7 @@ Enable SASL authentication for Kafka connections. Authorization is required to m --- +// tag::enable_schema_id_validation[] === enable_schema_id_validation include::reference:partial$enterprise-licensed-property.adoc[] @@ -1188,7 +1216,9 @@ Mode to enable server-side schema ID validation. *Default:* `none` --- +// end::enable_schem_id_validation[] +// tag::enable_transactions[] === enable_transactions Enable transactions (atomic writes). @@ -1202,6 +1232,7 @@ Enable transactions (atomic writes). *Default:* `true` --- +// end::enable_transactions[] === enable_usage @@ -1231,6 +1262,7 @@ Whether new feature flags auto-activate after upgrades (true) or must wait for m --- +// tag::fetch_max_bytes[] === fetch_max_bytes Maximum number of bytes returned in a fetch request. @@ -1246,6 +1278,7 @@ Maximum number of bytes returned in a fetch request. *Default:* `57671680` --- +// end::fetch_max_bytes[] === fetch_pid_d_coeff @@ -1275,6 +1308,7 @@ Integral coefficient for fetch PID controller. --- +// tag::fetch_pid_max_debounce_ms[] === fetch_pid_max_debounce_ms The maximum debounce time the fetch PID controller will apply, in milliseconds. @@ -1292,6 +1326,7 @@ The maximum debounce time the fetch PID controller will apply, in milliseconds. *Default:* `100` --- +// end::fetch_pid_max_debounce_ms[] === fetch_pid_p_coeff @@ -1323,6 +1358,7 @@ A fraction, between 0 and 1, for the target reactor utilization of the fetch sch --- +// tag::fetch_read_strategy[] === fetch_read_strategy The strategy used to fulfill fetch requests. @@ -1342,7 +1378,9 @@ The strategy used to fulfill fetch requests. *Default:* `non_polling` --- +// end::fetch_read_strategy[] +// tag::fetch_reads_debounce_timeout[] === fetch_reads_debounce_timeout Time to wait for the next read in fetch requests when the requested minimum bytes was not reached. @@ -1360,6 +1398,7 @@ Time to wait for the next read in fetch requests when the requested minimum byte *Default:* `1` --- +// end::fetch_reads_debounce_timeout[] === fetch_session_eviction_timeout_ms @@ -1379,6 +1418,7 @@ Time duration after which the inactive fetch session is removed from the fetch s --- +// tag::group_initial_rebalance_delay[] === group_initial_rebalance_delay Delay added to the rebalance phase to wait for new members. @@ -1396,7 +1436,9 @@ Delay added to the rebalance phase to wait for new members. *Default:* `3000` --- +// end::group_initial_rebalance_delay[] +// tag::group_max_session_timeout_ms[] === group_max_session_timeout_ms The maximum allowed session timeout for registered consumers. Longer timeouts give consumers more time to process messages in between heartbeats at the cost of a longer time to detect failures. @@ -1414,7 +1456,9 @@ The maximum allowed session timeout for registered consumers. Longer timeouts gi *Default:* `300000` --- +// end::group_max_session_timeout_ms[] +// tag::group_min_session_timeout_ms[] === group_min_session_timeout_ms The minimum allowed session timeout for registered consumers. Shorter timeouts result in quicker failure detection at the cost of more frequent consumer heartbeating, which can overwhelm broker resources. @@ -1432,7 +1476,9 @@ The minimum allowed session timeout for registered consumers. Shorter timeouts r *Default:* `6000` --- +// end::group_min_session_timeout_ms[] +// tag::group_new_member_join_timeout[] === group_new_member_join_timeout Timeout for new member joins. @@ -1450,7 +1496,9 @@ Timeout for new member joins. *Default:* `30000` --- +// end::group_new_member_join_timeout[] +// tag::group_offset_retention_check_ms[] === group_offset_retention_check_ms Frequency rate at which the system should check for expired group offsets. @@ -1468,7 +1516,9 @@ Frequency rate at which the system should check for expired group offsets. *Default:* `600000` (10min) --- +// end::group_offset_retention_check_ms[] +// tag::group_offset_retention_sec[] === group_offset_retention_sec Consumer group offset retention seconds. To disable offset retention, set this to null. @@ -1486,7 +1536,9 @@ Consumer group offset retention seconds. To disable offset retention, set this t *Default:* `604800` (one week) --- +// end::group_offset_retention_sec[] +// tag::group_topic_partitions[] === group_topic_partitions Number of partitions in the internal group membership topic. @@ -1504,6 +1556,7 @@ Number of partitions in the internal group membership topic. *Default:* `16` --- +// end::group_topic_partitions[] === health_manager_tick_interval @@ -1574,6 +1627,7 @@ Base path for the object storage backed Iceberg catalog. After Iceberg is enable --- +// tag::iceberg_catalog_commit_interval_ms[] === iceberg_catalog_commit_interval_ms The frequency at which the Iceberg coordinator commits topic files to the catalog. This is the interval between commit transactions across all topics monitored by the coordinator, not the interval between individual commits. @@ -1591,7 +1645,9 @@ The frequency at which the Iceberg coordinator commits topic files to the catalo *Default:* `60000` --- +// end::iceberg_catalog_commit_interval_ms[] +// tag::iceberg_catalog_type[] === iceberg_catalog_type Iceberg catalog type that Redpanda will use to commit table metadata updates. Supported types: 'rest', 'object_storage'. @@ -1605,7 +1661,9 @@ Iceberg catalog type that Redpanda will use to commit table metadata updates. Su *Default:* `object_storage` --- +// end::iceberg_catalog_type[] +// tag::iceberg_delete[] === iceberg_delete Default value for the `redpanda.iceberg.delete` topic property that determines if the corresponding Iceberg table is deleted upon deleting the topic. @@ -1619,7 +1677,9 @@ Default value for the `redpanda.iceberg.delete` topic property that determines i *Default:* `true` --- +// end::iceberg_delete[] +// tag::iceberg_enabled[] === iceberg_enabled Enables the translation of topic data into Iceberg tables. Setting `iceberg_enabled` to `true` activates the feature at the cluster level, but each topic must also set the `redpanda.iceberg.enabled` topic-level property to `true` to use it. If `iceberg_enabled` is set to `false`, then the feature is disabled for all topics in the cluster, overriding any topic-level settings. @@ -1633,7 +1693,9 @@ Enables the translation of topic data into Iceberg tables. Setting `iceberg_enab *Default:* `false` --- +// end::iceberg_enabled[] +// tag::iceberg_rest_catalog_client_id[] === iceberg_rest_catalog_client_id Iceberg REST catalog user ID. This ID is used to query the catalog API for the OAuth token. Required if catalog type is set to `rest`. @@ -1647,7 +1709,9 @@ Iceberg REST catalog user ID. This ID is used to query the catalog API for the O *Default:* `null` --- +// end::iceberg_rest_catalog_client_id[] +// tag::iceberg_rest_catalog_client_secret[] === iceberg_rest_catalog_client_secret Secret to authenticate against Iceberg REST catalog. Required if catalog type is set to `rest`. @@ -1662,6 +1726,7 @@ Secret to authenticate against Iceberg REST catalog. Required if catalog type is --- + === iceberg_rest_catalog_crl_file Path to certificate revocation list for `iceberg_rest_catalog_trust_file`. @@ -1675,7 +1740,9 @@ Path to certificate revocation list for `iceberg_rest_catalog_trust_file`. *Default:* `null` --- +// end::iceberg_rest_catalog_client_secret[] +// tag::iceberg_rest_catalog_endpoint[] === iceberg_rest_catalog_endpoint URL of Iceberg REST catalog endpoint. @@ -1689,7 +1756,9 @@ URL of Iceberg REST catalog endpoint. *Default:* `null` --- +// end::iceberg_rest_catalog_endpoint +// tag::iceberg_rest_catalog_prefix[] === iceberg_rest_catalog_prefix Prefix part of the Iceberg REST catalog URL. Prefix is appended to the catalog path, for example `/v1/\{prefix}/namespaces`. @@ -1703,7 +1772,9 @@ Prefix part of the Iceberg REST catalog URL. Prefix is appended to the catalog p *Default:* `null` --- +// end::iceberg_rest_catalog_prefix[] +// tag::iceberg_rest_catalog_request_timeout_ms[] === iceberg_rest_catalog_request_timeout_ms Maximum length of time that Redpanda waits for a response from the REST catalog before aborting the request. @@ -1721,7 +1792,9 @@ Maximum length of time that Redpanda waits for a response from the REST catalog *Default:* `10000` --- +// end::iceberg_rest_catalog_request_timeout_ms[] +// tag::iceberg_rest_catalog_token[] === iceberg_rest_catalog_token Token used to access the REST Iceberg catalog. If the token is present, Redpanda ignores credentials stored in the properties <> and <>. @@ -1735,6 +1808,7 @@ Token used to access the REST Iceberg catalog. If the token is present, Redpanda *Default:* `null` --- +// end::iceberg_rest_catalog_token[] === iceberg_rest_catalog_trust_file @@ -1867,6 +1941,7 @@ Target quota rate (partition mutations per default_window_sec). --- +// tag::kafka_batch_max_bytes[] === kafka_batch_max_bytes Maximum size of a batch processed by the server. If the batch is compressed, the limit applies to the compressed batch size. @@ -1884,6 +1959,7 @@ Maximum size of a batch processed by the server. If the batch is compressed, the *Default:* `1048576` --- +// end::kafka_batch_max_bytes[] === kafka_client_group_byte_rate_quota @@ -2074,6 +2150,7 @@ Kafka group recovery timeout. --- +// tag::kafka_max_bytes_per_fetch[] === kafka_max_bytes_per_fetch Limit fetch responses to this many bytes, even if the total of partition bytes limits is higher. @@ -2087,6 +2164,7 @@ Limit fetch responses to this many bytes, even if the total of partition bytes l *Default:* `67108864` --- +// end::kafka_max_bytes_per_fetch[] === kafka_memory_batch_size_estimate_for_fetch @@ -2195,6 +2273,7 @@ Update frequency for Kafka queue depth control. --- +// tag::kafka_qdc_enable[] === kafka_qdc_enable Enable Kafka queue depth control. @@ -2208,6 +2287,7 @@ Enable Kafka queue depth control. *Default:* `false` --- +// end::kafka_qdc_enable[] === kafka_qdc_idle_depth @@ -2412,6 +2492,7 @@ Time window used to average current throughput measurement for quota balancer, i --- +// tag::kafka_request_max_bytes[] === kafka_request_max_bytes Maximum size of a single request processed using the Kafka API. @@ -2429,6 +2510,7 @@ Maximum size of a single request processed using the Kafka API. *Default:* `104857600` --- +// end::kafka_request_max_bytes[] === kafka_rpc_server_stream_recv_buf @@ -2516,6 +2598,7 @@ Per-shard capacity of the cache for validating schema IDs. --- +// tag::kafka_tcp_keepalive_timeout[] === kafka_tcp_keepalive_timeout TCP keepalive idle timeout in seconds for Kafka connections. This describes the timeout between TCP keepalive probes that the remote site successfully acknowledged. Refers to the TCP_KEEPIDLE socket option. When changed, applies to new connections only. @@ -2533,6 +2616,7 @@ TCP keepalive idle timeout in seconds for Kafka connections. This describes the *Default:* `120` --- +// end::kafka_tcp_keepalive_timeout[] === kafka_tcp_keepalive_probe_interval_seconds @@ -2847,6 +2931,7 @@ Period at which to log a warning about using unsafe strings containing control c --- +// tag::log_cleanup_policy[] === log_cleanup_policy Default cleanup policy for topic logs. @@ -2862,6 +2947,7 @@ The topic property xref:./topic-properties.adoc#cleanuppolicy[`cleanup.policy`] *Default:* `delete` --- +// end::log_cleanup_policy[] === log_compaction_interval_ms @@ -2977,6 +3063,7 @@ The topic property xref:./topic-properties.adoc#messagetimestamptype[`message.ti --- +// tag::log_retention_ms[] === log_retention_ms The amount of time to keep a log file before deleting it (in milliseconds). If set to `-1`, no time limit is applied. This is a cluster-wide default when a topic does not set or disable xref:./topic-properties.adoc#retentionms[`retention.ms`]. @@ -2992,7 +3079,9 @@ The amount of time to keep a log file before deleting it (in milliseconds). If s *Default:* `604800000` (one week) --- +// end::log_retention_ms[] +// tag::log_segment_ms[] === log_segment_ms Default lifetime of log segments. If `null`, the property is disabled, and no default lifetime is set. Any value under 60 seconds (60000 ms) is rejected. This property can also be set in the Kafka API using the Kafka-compatible alias, `log.roll.ms`. @@ -3017,6 +3106,7 @@ The topic property xref:./topic-properties.adoc#segmentms[`segment.ms`] override * <> --- +// end::log_segment_ms[] === log_segment_ms_max @@ -3054,6 +3144,7 @@ Lower bound on topic `segment.ms`: lower values will be clamped to this value. --- +// tag::log_segment_size[] === log_segment_size Default log segment size in bytes for topics which do not set `segment.bytes`. @@ -3069,6 +3160,7 @@ Default log segment size in bytes for topics which do not set `segment.bytes`. *Default:* `134217728` --- +// end::log_segment_size[] === log_segment_size_jitter_percent @@ -3886,6 +3978,7 @@ Raft I/O timeout. --- +// tag::raft_learner_recovery_rate[] === raft_learner_recovery_rate Raft learner recovery rate limit. Throttles the rate of data communicated to nodes (learners) that need to catch up to leaders. This rate limit is placed on a node sending data to a recovering node. Each sending node is limited to this rate. The recovering node accepts data as fast as possible according to the combined limits of all healthy nodes in the cluster. For example, if two nodes are sending data to the recovering node, and `raft_learner_recovery_rate` is 100 MB/sec, then the recovering node will recover at a rate of 200 MB/sec. @@ -3899,6 +3992,7 @@ Raft learner recovery rate limit. Throttles the rate of data communicated to nod *Default:* `104857600` --- +// end::raft_learner_recovery_rate[] === raft_max_concurrent_append_requests_per_follower @@ -4235,6 +4329,7 @@ Timeout for append entry requests issued while replicating entries. --- +// tag::retention_bytes[] === retention_bytes Default maximum number of bytes per partition on disk before triggering deletion of the oldest messages. If `null` (the default value), no limit is applied. @@ -4252,6 +4347,7 @@ The topic property xref:./topic-properties.adoc#retentionbytes[`retention.bytes` *Default:* `null` --- +// end::retention_bytes[] === retention_local_strict @@ -4281,6 +4377,7 @@ Trim log data when a cloud topic reaches its local retention limit. When this op --- +// tag::retention_local_target_bytes_default[] === retention_local_target_bytes_default Local retention size target for partitions of topics with object storage write enabled. If `null`, the property is disabled. @@ -4304,6 +4401,7 @@ NOTE: Both `retention_local_target_bytes_default` and `retention_local_target_ms *Default:* `null` --- +// end::retention_local_target_bytes_default[] === retention_local_target_capacity_bytes @@ -4343,6 +4441,7 @@ NOTE: Redpanda Data recommends setting only one of <>. @@ -5215,7 +5322,9 @@ Delete segments older than this age. To ensure transaction state is retained as *Default:* `604800000` (10080min) --- +// end::transaction_coordinator_delete_retention_ms[] +// tag::transaction_coordinator_log_segment_size[] === transaction_coordinator_log_segment_size The size (in bytes) each log segment should be. @@ -5231,7 +5340,9 @@ The size (in bytes) each log segment should be. *Default:* `1073741824` (1Gb) --- +// end::transaction_coordinator_log_segment_size[] +// tag::transaction_coordinator_partitions[] === transaction_coordinator_partitions Number of partitions for transactions coordinator. @@ -5249,7 +5360,9 @@ Number of partitions for transactions coordinator. *Default:* `50` --- +// end::transaction_coordinator_partitions[] +// tag::transaction_max_timeout_ms[] === transaction_max_timeout_ms The maximum allowed timeout for transactions. If a client-requested transaction timeout exceeds this configuration, the broker returns an error during transactional producer initialization. This guardrail prevents hanging transactions from blocking consumer progress. @@ -5267,7 +5380,9 @@ The maximum allowed timeout for transactions. If a client-requested transaction *Default:* `900000` --- +// end::transaction_max_timeout_ms[] +// tag::transactional_id_expiration_ms[] === transactional_id_expiration_ms Expiration time of producer IDs. Measured starting from the time of the last write until now for a given ID. @@ -5285,6 +5400,7 @@ Expiration time of producer IDs. Measured starting from the time of the last wri *Default:* `604800000` (10080min) --- +// end::transactional_id_expiration_ms[] === tx_timeout_delay_ms @@ -5416,6 +5532,7 @@ Timeout to wait for leadership in metadata cache. --- +// tag::write_caching_default[] === write_caching_default The default write caching mode to apply to user topics. Write caching acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to be written to disk. With `acks=all`, this provides lower latency while still ensuring that a majority of brokers acknowledge the write. Fsyncs follow <> and <>, whichever is reached first. @@ -5439,7 +5556,9 @@ The `write_caching_default` cluster property can be overridden with the xref:top * xref:develop:config-topics.adoc#configure-write-caching[Write caching] --- +// end::write_caching_default[] +// tag::zstd_decompress_workspace_bytes[] === zstd_decompress_workspace_bytes Size of the zstd decompression workspace. @@ -5455,4 +5574,5 @@ Size of the zstd decompression workspace. *Default:* `8388608` --- +// end::zstd_decompress_workspace_bytes[] From c503200cf4d8f2ad480c0faa1720986c50950610 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 21:40:43 -0600 Subject: [PATCH 12/43] add SS + conditionalizing --- modules/console/pages/ui/data-transforms.adoc | 4 ++-- .../develop/pages/data-transforms/configure.adoc | 11 ++++++++--- modules/develop/pages/data-transforms/deploy.adoc | 13 ++++++++++++- modules/develop/pages/data-transforms/monitor.adoc | 13 ++++++++++++- .../pages/properties/cluster-properties.adoc | 4 ++++ 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/modules/console/pages/ui/data-transforms.adoc b/modules/console/pages/ui/data-transforms.adoc index bf352d0ac3..d014757176 100644 --- a/modules/console/pages/ui/data-transforms.adoc +++ b/modules/console/pages/ui/data-transforms.adoc @@ -1,5 +1,5 @@ = Manage Data Transforms in {ui} -:description: You can use {ui} to monitor the status and performance metrics of your transform functions. You can also view detailed logs and delete transform functions when they are no longer needed. +:description: Use {ui} to monitor the status and performance metrics of your transform functions. You can also view detailed logs and delete transform functions when they are no longer needed. // tag::single-source[] {description} @@ -19,7 +19,7 @@ Before you begin, ensure that you have the following: To monitor transform functions: . Navigate to the *Transforms* menu. -. Click on the name of a transform function to view detailed information: +. Click the name of a transform function to view detailed information: - The partitions that the function is running on - The broker (node) ID - Any lag (the amount of pending records on the input topic that have yet to be processed by the transform) diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index 03d632a1df..9efefef910 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -63,21 +63,26 @@ This section covers how to configure the Wasm engine environment using Redpanda To use data transforms, you must enable it for a Redpanda cluster using the xref:reference:properties/cluster-properties.adoc#data_transforms_enabled[`data_transforms_enabled`] property. -ifndef::env-cloud[] + [[resources]] === Configure memory resources for data transforms Redpanda reserves memory for each transform function within the broker. You need enough memory for your input record and output record to be in memory at the same time. -Set the following properties based on the number of functions you have and the amount of memory you anticipate needing. +Set the following based on the number of functions you have and the amount of memory you anticipate needing. +ifndef::env-cloud[] - xref:reference:properties/cluster-properties.adoc#data_transforms_per_core_memory_reservation[`data_transforms_per_core_memory_reservation`]: Increase this setting if you plan to deploy a large number of data transforms or if your transforms are memory-intensive. Reducing it may limit the number of concurrent transforms. - +endif::[] - xref:reference:properties/cluster-properties.adoc#data_transforms_per_function_memory_limit[`data_transforms_per_function_memory_limit`]: Adjust this setting if individual transform functions require more memory to process records efficiently. Reducing it may cause memory errors in complex transforms. +endif::[] + +ifndef::env-cloud[] The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. When that limit is hit, Redpanda cannot allocate memory for the VM and the transforms stay in `errored` states. endif::[] + [[binary-size]] === Configure maximum binary size diff --git a/modules/develop/pages/data-transforms/deploy.adoc b/modules/develop/pages/data-transforms/deploy.adoc index 77f4b775bc..9ca1d8405a 100644 --- a/modules/develop/pages/data-transforms/deploy.adoc +++ b/modules/develop/pages/data-transforms/deploy.adoc @@ -11,7 +11,12 @@ Before you begin, ensure that you have the following: - xref:develop:data-transforms/configure.adoc#enable-transforms[Data transforms enabled] in your Redpanda cluster. +ifndef::env-cloud[] - The xref:get-started:rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. +endif::[] +ifdef::env-cloud[] +- The xref:manage:rpk/rpk-install.adoc[`rpk` command-line client]. +endif::[] - A xref:develop:data-transforms/build.adoc[data transform] project. [[build]] @@ -121,7 +126,14 @@ rpk transform delete For more details about this command, see xref:reference:rpk/rpk-transform/rpk-transform-delete.adoc[]. +ifndef::env-cloud[] TIP: You can also xref:console:ui/data-transforms.adoc#delete[delete transform functions in Redpanda Console]. +endif::[] + +ifdef::env-cloud[] +TIP: You can also xref:develop:data-transforms.adoc#delete[delete transform functions in Redpanda Console]. +endif::[] + == Troubleshoot @@ -145,7 +157,6 @@ Invalid WebAssembly - the binary is missing required transform functions. Check All transform functions must register a callback with the `OnRecordWritten()` method. For more details, see xref:develop:data-transforms/build.adoc[]. == Next steps - xref:develop:data-transforms/monitor.adoc[Set up monitoring] for data transforms. // end::single-source[] \ No newline at end of file diff --git a/modules/develop/pages/data-transforms/monitor.adoc b/modules/develop/pages/data-transforms/monitor.adoc index b0a3363d32..520a4019c2 100644 --- a/modules/develop/pages/data-transforms/monitor.adoc +++ b/modules/develop/pages/data-transforms/monitor.adoc @@ -46,7 +46,13 @@ If memory usage is consistently high or exceeds the maximum allocated memory: - Review and optimize your transform functions to reduce memory consumption. This step can involve optimizing data structures, reducing memory allocations, and ensuring efficient handling of records. +ifndef::env-cloud[] - Consider increasing the allocated memory for the Wasm engine. Adjust the xref:develop:data-transforms/configure.adoc#resources[`data_transforms_per_core_memory_reservation`] and xref:develop:data-transforms/configure.adoc#resources[`data_transforms_per_function_memory_limit settings`] to provide more memory to each function and the overall Wasm engine. +endif::[] + +ifdef::env-cloud[] +- Consider increasing the allocated memory for the Wasm engine. Adjust the [`data_transforms_per_function_memory_limit settings`] to provide more memory to each function and the overall Wasm engine. +endif::[] == Throughput @@ -69,13 +75,18 @@ rpk transform logs Replace `` with the xref:develop:data-transforms/configure.adoc[configured name] of the transform function. +ifndef::env-cloud[] TIP: You can also xref:console:ui/data-transforms.adoc#logs[view logs in {ui}]. +endif::[] + +ifdef::env-cloud[] +TIP: You can also xref:develop/data-transforms.adoc#logs[view logs in {ui}]. +endif::[] By default, Redpanda provides several settings to manage logging for data transforms, such as buffer capacity, flush interval, and maximum log line length. These settings ensure that logging operates efficiently without overwhelming the system. However, you may need to adjust these settings based on your specific requirements and workloads. For information on how to configure logging, see the xref:develop:data-transforms/configure.adoc#log[Configure transform logging] section of the configuration guide. == Suggested reading - xref:reference:public-metrics-reference.adoc#data_transform_metrics[Data transforms metrics] -- xref:console:ui/data-transforms.adoc[] // end::single-source[] diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index ea6f1bb999..5a9c91c468 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -141,7 +141,9 @@ Defines the number of bytes allocated by the internal audit client for audit mes // tag::audit_enabled[] === audit_enabled +ifndef::env-cloud[] include::reference:partial$enterprise-licensed-property.adoc[] +endif::[] Enables or disables audit logging. When you set this to true, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. @@ -1193,7 +1195,9 @@ Enable SASL authentication for Kafka connections. Authorization is required to m // tag::enable_schema_id_validation[] === enable_schema_id_validation +ifndef::env-cloud[] include::reference:partial$enterprise-licensed-property.adoc[] +endif::[] Mode to enable server-side schema ID validation. From 107c2eccdc58334e1f2c128b3b0b986f78b61a05 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Wed, 2 Apr 2025 22:57:00 -0600 Subject: [PATCH 13/43] clean up conditionalizing --- modules/develop/pages/data-transforms/configure.adoc | 11 ++--------- .../pages/properties/cluster-properties.adoc | 8 +++++++- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index 9efefef910..b454fde984 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -63,25 +63,18 @@ This section covers how to configure the Wasm engine environment using Redpanda To use data transforms, you must enable it for a Redpanda cluster using the xref:reference:properties/cluster-properties.adoc#data_transforms_enabled[`data_transforms_enabled`] property. - -[[resources]] +ifndef::env-cloud[] === Configure memory resources for data transforms Redpanda reserves memory for each transform function within the broker. You need enough memory for your input record and output record to be in memory at the same time. Set the following based on the number of functions you have and the amount of memory you anticipate needing. -ifndef::env-cloud[] - xref:reference:properties/cluster-properties.adoc#data_transforms_per_core_memory_reservation[`data_transforms_per_core_memory_reservation`]: Increase this setting if you plan to deploy a large number of data transforms or if your transforms are memory-intensive. Reducing it may limit the number of concurrent transforms. -endif::[] - xref:reference:properties/cluster-properties.adoc#data_transforms_per_function_memory_limit[`data_transforms_per_function_memory_limit`]: Adjust this setting if individual transform functions require more memory to process records efficiently. Reducing it may cause memory errors in complex transforms. -endif::[] - -ifndef::env-cloud[] The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. When that limit is hit, Redpanda cannot allocate memory for the VM and the transforms stay in `errored` states. -endif::[] - +endif::[] [[binary-size]] === Configure maximum binary size diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 5a9c91c468..00103f6f71 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -5539,9 +5539,13 @@ Timeout to wait for leadership in metadata cache. // tag::write_caching_default[] === write_caching_default -The default write caching mode to apply to user topics. Write caching acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to be written to disk. With `acks=all`, this provides lower latency while still ensuring that a majority of brokers acknowledge the write. Fsyncs follow <> and <>, whichever is reached first. +The default write caching mode to apply to user topics. Write caching acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to be written to disk. With `acks=all`, this provides lower latency while still ensuring that a majority of brokers acknowledge the write. + +ifndef::env-cloud[] +Fsyncs follow <> and <>, whichever is reached first. The `write_caching_default` cluster property can be overridden with the xref:topic-properties.adoc#writecaching[`write.caching`] topic property. +endif::[] *Requires restart:* no @@ -5555,9 +5559,11 @@ The `write_caching_default` cluster property can be overridden with the xref:top *Default*: For clusters in production mode, the default is `false`. For clusters in development mode, the default is `true`. +ifndef::env-cloud[] *Related topics*: * xref:develop:config-topics.adoc#configure-write-caching[Write caching] +endif::[] --- // end::write_caching_default[] From 4aacff34706ce73ee4665c0e7d6baee8ecf99360 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Thu, 3 Apr 2025 10:48:23 -0600 Subject: [PATCH 14/43] Update modules/develop/pages/data-transforms/deploy.adoc Co-authored-by: Jake Cahill <45230295+JakeSCahill@users.noreply.github.com> --- modules/develop/pages/data-transforms/deploy.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/develop/pages/data-transforms/deploy.adoc b/modules/develop/pages/data-transforms/deploy.adoc index 9ca1d8405a..361584c93b 100644 --- a/modules/develop/pages/data-transforms/deploy.adoc +++ b/modules/develop/pages/data-transforms/deploy.adoc @@ -131,7 +131,7 @@ TIP: You can also xref:console:ui/data-transforms.adoc#delete[delete transform f endif::[] ifdef::env-cloud[] -TIP: You can also xref:develop:data-transforms.adoc#delete[delete transform functions in Redpanda Console]. +TIP: You can also xref:develop:data-transforms.adoc#delete[delete transform functions in Redpanda Cloud]. endif::[] From 3d6eb11ba8a1b4ff51f223ed4012ae19aab1a504 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Thu, 3 Apr 2025 12:39:43 -0600 Subject: [PATCH 15/43] fix tags --- .../reference/pages/properties/cluster-properties.adoc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 00103f6f71..e396b540c3 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -174,7 +174,7 @@ List of strings in JSON style identifying the event types to include in the audi *Default:* `[management, authenticate, admin]` --- -// end::audit-enabled-event-types[] +// end::audit_enabled_event_types[] // tag::audit_excluded_principals[] === audit_excluded_principals @@ -288,7 +288,7 @@ If you produce to a topic that doesn't exist, the topic will be created with def *Default:* `false` --- -// end::auto-create-topics-enabled[] +// end::auto_create_topics_enabled[] === cluster_id @@ -923,7 +923,7 @@ Default number of partitions per topic. *Default:* `1` --- -// end::default-topic-partitions[] +// end::default_topic_partitions[] === default_topic_replications @@ -5223,7 +5223,9 @@ The retention time for tombstone records in a compacted topic. Cannot be enabled *Default:* `null` +ifndef::env-cloud[] *Related topics:* xref:manage:cluster-maintenance/compaction-settings.adoc#tombstone-record-removal[Tombstone record removal] +endif::[] --- // end::tombstone_retention_ms[] From 3e1904db2eef9a822c0b90bc7824e4aaaf9d4ee7 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Thu, 3 Apr 2025 22:27:02 -0600 Subject: [PATCH 16/43] add topic properties reference --- modules/develop/pages/config-topics.adoc | 4 -- .../pages/properties/cluster-properties.adoc | 59 +++++++++++++++++++ .../pages/properties/topic-properties.adoc | 21 +++++-- 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/modules/develop/pages/config-topics.adoc b/modules/develop/pages/config-topics.adoc index 347217b5cc..98bcca1e5c 100644 --- a/modules/develop/pages/config-topics.adoc +++ b/modules/develop/pages/config-topics.adoc @@ -126,8 +126,6 @@ endif::[] Only enable write caching on workloads that can tolerate some data loss in the case of multiple, simultaneous broker failures. Leaving write caching disabled safeguards your data against complete data center or availability zone failures. -ifndef::env-cloud[] - ==== Configure at cluster level To enable write caching by default in all user topics, set the cluster-level property xref:reference:cluster-properties.adoc#write_caching_default[`write_caching_default`]: @@ -136,8 +134,6 @@ To enable write caching by default in all user topics, set the cluster-level pro With `write_caching_default` set to true at the cluster level, Redpanda fsyncs to disk according to xref:reference:cluster-properties.adoc#raft_replica_max_pending_flush_bytes[`raft_replica_max_pending_flush_bytes`] and xref:reference:cluster-properties.adoc#raft_replica_max_flush_delay_ms[`raft_replica_max_flush_delay_ms`], whichever is reached first. -endif::[] - ==== Configure at topic level To override the cluster-level setting at the topic level, set the topic-level property `write.caching`: diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index e396b540c3..0560eff27a 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -158,6 +158,7 @@ Enables or disables audit logging. When you set this to true, Redpanda checks fo *Default:* `false` --- + // end::audit-enabled[] // tag::audit_enabled_event_types[] @@ -174,6 +175,7 @@ List of strings in JSON style identifying the event types to include in the audi *Default:* `[management, authenticate, admin]` --- + // end::audit_enabled_event_types[] // tag::audit_excluded_principals[] @@ -190,6 +192,7 @@ List of user principals to exclude from auditing. *Default:* `null` --- + // end::audit_excluded_principals[] === audit_excluded_topics @@ -288,6 +291,7 @@ If you produce to a topic that doesn't exist, the topic will be created with def *Default:* `false` --- + // end::auto_create_topics_enabled[] === cluster_id @@ -628,6 +632,7 @@ The maximum size for a deployable WebAssembly binary that the broker can store. *Default:* `10485760` --- + // end::data_transforms_binary_max_size[] // tag::data_transforms_commit_interval_ms[] @@ -648,6 +653,7 @@ The commit interval at which data transforms progress. *Default:* `3000` --- + // end::data_transforms_commit_interval_ms[] // tag::data_transforms_enabled[] @@ -664,6 +670,7 @@ Enables WebAssembly-powered data transforms directly in the broker. When `data_t *Default:* `false` --- + // end::data_transforms_enabled[] @@ -717,6 +724,7 @@ Transform log lines truncate to this length. Truncation occurs after any charact *Default:* `1024` --- + // end::::data_transforms_logging_line_max_bytes[] === data_transforms_per_core_memory_reservation @@ -747,6 +755,7 @@ The amount of memory to give an instance of a data transform (Wasm) virtual mach *Default:* `2097152` --- + // end::data_transforms_per_function_memory_limit[] === data_transforms_read_buffer_memory_percentage @@ -923,6 +932,7 @@ Default number of partitions per topic. *Default:* `1` --- + // end::default_topic_partitions[] === default_topic_replications @@ -1019,6 +1029,7 @@ Disable registering the metrics exposed on the internal `/metrics` endpoint. *Default:* `false` --- + // end::disable_metrics[] // tag::disable_public_metrics[] @@ -1035,6 +1046,7 @@ Disable registering the metrics exposed on the `/public_metrics` endpoint. *Default:* `false` --- + // end::disable_public_metrics[] === disk_reservation_percent @@ -1098,6 +1110,7 @@ Enable idempotent producers. *Default:* `true` --- + // end::enable_idempotence[] === enable_leader_balancer @@ -1220,6 +1233,7 @@ Mode to enable server-side schema ID validation. *Default:* `none` --- + // end::enable_schem_id_validation[] // tag::enable_transactions[] @@ -1236,6 +1250,7 @@ Enable transactions (atomic writes). *Default:* `true` --- + // end::enable_transactions[] === enable_usage @@ -1282,6 +1297,7 @@ Maximum number of bytes returned in a fetch request. *Default:* `57671680` --- + // end::fetch_max_bytes[] === fetch_pid_d_coeff @@ -1330,6 +1346,7 @@ The maximum debounce time the fetch PID controller will apply, in milliseconds. *Default:* `100` --- + // end::fetch_pid_max_debounce_ms[] === fetch_pid_p_coeff @@ -1382,6 +1399,7 @@ The strategy used to fulfill fetch requests. *Default:* `non_polling` --- + // end::fetch_read_strategy[] // tag::fetch_reads_debounce_timeout[] @@ -1402,6 +1420,7 @@ Time to wait for the next read in fetch requests when the requested minimum byte *Default:* `1` --- + // end::fetch_reads_debounce_timeout[] === fetch_session_eviction_timeout_ms @@ -1440,6 +1459,7 @@ Delay added to the rebalance phase to wait for new members. *Default:* `3000` --- + // end::group_initial_rebalance_delay[] // tag::group_max_session_timeout_ms[] @@ -1460,6 +1480,7 @@ The maximum allowed session timeout for registered consumers. Longer timeouts gi *Default:* `300000` --- + // end::group_max_session_timeout_ms[] // tag::group_min_session_timeout_ms[] @@ -1480,6 +1501,7 @@ The minimum allowed session timeout for registered consumers. Shorter timeouts r *Default:* `6000` --- + // end::group_min_session_timeout_ms[] // tag::group_new_member_join_timeout[] @@ -1500,6 +1522,7 @@ Timeout for new member joins. *Default:* `30000` --- + // end::group_new_member_join_timeout[] // tag::group_offset_retention_check_ms[] @@ -1520,6 +1543,7 @@ Frequency rate at which the system should check for expired group offsets. *Default:* `600000` (10min) --- + // end::group_offset_retention_check_ms[] // tag::group_offset_retention_sec[] @@ -1540,6 +1564,7 @@ Consumer group offset retention seconds. To disable offset retention, set this t *Default:* `604800` (one week) --- + // end::group_offset_retention_sec[] // tag::group_topic_partitions[] @@ -1560,6 +1585,7 @@ Number of partitions in the internal group membership topic. *Default:* `16` --- + // end::group_topic_partitions[] === health_manager_tick_interval @@ -1649,6 +1675,7 @@ The frequency at which the Iceberg coordinator commits topic files to the catalo *Default:* `60000` --- + // end::iceberg_catalog_commit_interval_ms[] // tag::iceberg_catalog_type[] @@ -1665,6 +1692,7 @@ Iceberg catalog type that Redpanda will use to commit table metadata updates. Su *Default:* `object_storage` --- + // end::iceberg_catalog_type[] // tag::iceberg_delete[] @@ -1681,6 +1709,7 @@ Default value for the `redpanda.iceberg.delete` topic property that determines i *Default:* `true` --- + // end::iceberg_delete[] // tag::iceberg_enabled[] @@ -1697,6 +1726,7 @@ Enables the translation of topic data into Iceberg tables. Setting `iceberg_enab *Default:* `false` --- + // end::iceberg_enabled[] // tag::iceberg_rest_catalog_client_id[] @@ -1713,6 +1743,7 @@ Iceberg REST catalog user ID. This ID is used to query the catalog API for the O *Default:* `null` --- + // end::iceberg_rest_catalog_client_id[] // tag::iceberg_rest_catalog_client_secret[] @@ -1744,6 +1775,7 @@ Path to certificate revocation list for `iceberg_rest_catalog_trust_file`. *Default:* `null` --- + // end::iceberg_rest_catalog_client_secret[] // tag::iceberg_rest_catalog_endpoint[] @@ -1760,6 +1792,7 @@ URL of Iceberg REST catalog endpoint. *Default:* `null` --- + // end::iceberg_rest_catalog_endpoint // tag::iceberg_rest_catalog_prefix[] @@ -1776,6 +1809,7 @@ Prefix part of the Iceberg REST catalog URL. Prefix is appended to the catalog p *Default:* `null` --- + // end::iceberg_rest_catalog_prefix[] // tag::iceberg_rest_catalog_request_timeout_ms[] @@ -1796,6 +1830,7 @@ Maximum length of time that Redpanda waits for a response from the REST catalog *Default:* `10000` --- + // end::iceberg_rest_catalog_request_timeout_ms[] // tag::iceberg_rest_catalog_token[] @@ -1812,6 +1847,7 @@ Token used to access the REST Iceberg catalog. If the token is present, Redpanda *Default:* `null` --- + // end::iceberg_rest_catalog_token[] === iceberg_rest_catalog_trust_file @@ -1963,6 +1999,7 @@ Maximum size of a batch processed by the server. If the batch is compressed, the *Default:* `1048576` --- + // end::kafka_batch_max_bytes[] === kafka_client_group_byte_rate_quota @@ -2168,6 +2205,7 @@ Limit fetch responses to this many bytes, even if the total of partition bytes l *Default:* `67108864` --- + // end::kafka_max_bytes_per_fetch[] === kafka_memory_batch_size_estimate_for_fetch @@ -2291,6 +2329,7 @@ Enable Kafka queue depth control. *Default:* `false` --- + // end::kafka_qdc_enable[] === kafka_qdc_idle_depth @@ -2514,6 +2553,7 @@ Maximum size of a single request processed using the Kafka API. *Default:* `104857600` --- + // end::kafka_request_max_bytes[] === kafka_rpc_server_stream_recv_buf @@ -2620,6 +2660,7 @@ TCP keepalive idle timeout in seconds for Kafka connections. This describes the *Default:* `120` --- + // end::kafka_tcp_keepalive_timeout[] === kafka_tcp_keepalive_probe_interval_seconds @@ -2951,6 +2992,7 @@ The topic property xref:./topic-properties.adoc#cleanuppolicy[`cleanup.policy`] *Default:* `delete` --- + // end::log_cleanup_policy[] === log_compaction_interval_ms @@ -3083,6 +3125,7 @@ The amount of time to keep a log file before deleting it (in milliseconds). If s *Default:* `604800000` (one week) --- + // end::log_retention_ms[] // tag::log_segment_ms[] @@ -3110,6 +3153,7 @@ The topic property xref:./topic-properties.adoc#segmentms[`segment.ms`] override * <> --- + // end::log_segment_ms[] === log_segment_ms_max @@ -3164,6 +3208,7 @@ Default log segment size in bytes for topics which do not set `segment.bytes`. *Default:* `134217728` --- + // end::log_segment_size[] === log_segment_size_jitter_percent @@ -3996,6 +4041,7 @@ Raft learner recovery rate limit. Throttles the rate of data communicated to nod *Default:* `104857600` --- + // end::raft_learner_recovery_rate[] === raft_max_concurrent_append_requests_per_follower @@ -4351,6 +4397,7 @@ The topic property xref:./topic-properties.adoc#retentionbytes[`retention.bytes` *Default:* `null` --- + // end::retention_bytes[] === retention_local_strict @@ -4405,6 +4452,7 @@ NOTE: Both `retention_local_target_bytes_default` and `retention_local_target_ms *Default:* `null` --- + // end::retention_local_target_bytes_default[] === retention_local_target_capacity_bytes @@ -4471,6 +4519,7 @@ NOTE: Both <> | xref:./cluster-properties.adoc#log_cleanup_policy[`log_cleanup_policy`] - +ifdev::env-cloud[] | <> | xref:./cluster-properties.adoc#raft_replica_max_pending_flush_bytes[`raft_replica_max_pending_flush_bytes`] @@ -22,7 +29,7 @@ NOTE: All topic properties take effect immediately after being set. | <> | xref:./cluster-properties.adoc#initial_retention_local_target_ms_default[`initial_retention_local_target_ms_default`] - +endif::[] | <> | xref:./cluster-properties.adoc#retention_bytes[`retention_bytes`] @@ -34,19 +41,19 @@ NOTE: All topic properties take effect immediately after being set. | <> | xref:reference:properties/cluster-properties.adoc#log_segment_size[`log_segment_size`] - +ifndef::env-cloud[] | <> | xref:./cluster-properties.adoc#log_compression_type[`log_compression_type`] | <> | xref:./cluster-properties.adoc#log_message_timestamp_type[`log_message_timestamp_type`] - +endif::[] | <> | xref:reference:properties/cluster-properties.adoc#kafka_batch_max_bytes[`kafka_batch_max_bytes`] - +ifndef::env-cloud[] | <> | xref:./cluster-properties.adoc#default_topic_replication[`default_topic_replication`] - +endif::[] | <> | xref:./cluster-properties.adoc#write_caching_default[`write_caching_default`] |=== @@ -604,3 +611,5 @@ If both `delete.retention.ms` and the cluster property config_ref:tombstone_rete - xref:develop:produce-data/configure-producers.adoc[Configure Producers] - xref:develop:config-topics.adoc[Manage Topics] + +// end::single-source[] From 792e2f7c31aa3177fba7bc804490da67b44e799d Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Thu, 3 Apr 2025 23:13:35 -0600 Subject: [PATCH 17/43] typo --- modules/reference/pages/properties/topic-properties.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/reference/pages/properties/topic-properties.adoc b/modules/reference/pages/properties/topic-properties.adoc index 52a2c8c4e5..79bb15992b 100644 --- a/modules/reference/pages/properties/topic-properties.adoc +++ b/modules/reference/pages/properties/topic-properties.adoc @@ -20,7 +20,7 @@ NOTE: All topic properties take effect immediately after being set. | <> | xref:./cluster-properties.adoc#log_cleanup_policy[`log_cleanup_policy`] -ifdev::env-cloud[] +ifndef::env-cloud[] | <> | xref:./cluster-properties.adoc#raft_replica_max_pending_flush_bytes[`raft_replica_max_pending_flush_bytes`] From 95abfe236c8fbae98395c30011a49b2bb08141b8 Mon Sep 17 00:00:00 2001 From: Jake Cahill <45230295+JakeSCahill@users.noreply.github.com> Date: Fri, 4 Apr 2025 09:13:35 +0100 Subject: [PATCH 18/43] Fix broken tag --- modules/reference/pages/properties/cluster-properties.adoc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 0560eff27a..4347b7a843 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -159,7 +159,7 @@ Enables or disables audit logging. When you set this to true, Redpanda checks fo --- -// end::audit-enabled[] +// end::audit_enabled[] // tag::audit_enabled_event_types[] === audit_enabled_event_types @@ -1761,6 +1761,7 @@ Secret to authenticate against Iceberg REST catalog. Required if catalog type is --- +// end::iceberg_rest_catalog_client_secret[] === iceberg_rest_catalog_crl_file @@ -1776,8 +1777,6 @@ Path to certificate revocation list for `iceberg_rest_catalog_trust_file`. --- -// end::iceberg_rest_catalog_client_secret[] - // tag::iceberg_rest_catalog_endpoint[] === iceberg_rest_catalog_endpoint From f66f6fa55e910c949805e2a28188b0f1755f9632 Mon Sep 17 00:00:00 2001 From: Jake Cahill <45230295+JakeSCahill@users.noreply.github.com> Date: Fri, 4 Apr 2025 09:20:33 +0100 Subject: [PATCH 19/43] Fix tags --- modules/reference/pages/properties/cluster-properties.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 4347b7a843..0186dc5774 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -725,7 +725,7 @@ Transform log lines truncate to this length. Truncation occurs after any charact --- -// end::::data_transforms_logging_line_max_bytes[] +// end::data_transforms_logging_line_max_bytes[] === data_transforms_per_core_memory_reservation @@ -1234,7 +1234,7 @@ Mode to enable server-side schema ID validation. --- -// end::enable_schem_id_validation[] +// end::enable_schema_id_validation[] // tag::enable_transactions[] === enable_transactions @@ -1792,7 +1792,7 @@ URL of Iceberg REST catalog endpoint. --- -// end::iceberg_rest_catalog_endpoint +// end::iceberg_rest_catalog_endpoint[] // tag::iceberg_rest_catalog_prefix[] === iceberg_rest_catalog_prefix From 49214c1b19bb06c4a30079c2b4fa773cfe556f6b Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sat, 5 Apr 2025 09:39:09 -0600 Subject: [PATCH 20/43] unconditionalize fixes --- modules/develop/pages/config-topics.adoc | 4 ++++ .../pages/properties/cluster-properties.adoc | 7 +++++-- .../pages/properties/topic-properties.adoc | 19 ++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/develop/pages/config-topics.adoc b/modules/develop/pages/config-topics.adoc index 98bcca1e5c..4e95f46b49 100644 --- a/modules/develop/pages/config-topics.adoc +++ b/modules/develop/pages/config-topics.adoc @@ -126,6 +126,8 @@ endif::[] Only enable write caching on workloads that can tolerate some data loss in the case of multiple, simultaneous broker failures. Leaving write caching disabled safeguards your data against complete data center or availability zone failures. +ifdef::env-cloud[] + ==== Configure at cluster level To enable write caching by default in all user topics, set the cluster-level property xref:reference:cluster-properties.adoc#write_caching_default[`write_caching_default`]: @@ -134,6 +136,8 @@ To enable write caching by default in all user topics, set the cluster-level pro With `write_caching_default` set to true at the cluster level, Redpanda fsyncs to disk according to xref:reference:cluster-properties.adoc#raft_replica_max_pending_flush_bytes[`raft_replica_max_pending_flush_bytes`] and xref:reference:cluster-properties.adoc#raft_replica_max_flush_delay_ms[`raft_replica_max_flush_delay_ms`], whichever is reached first. +endif::[] + ==== Configure at topic level To override the cluster-level setting at the topic level, set the topic-level property `write.caching`: diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 0186dc5774..eecdf1d93c 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -195,6 +195,8 @@ List of user principals to exclude from auditing. // end::audit_excluded_principals[] + +// tag::audit_excluded_topics[] === audit_excluded_topics List of topics to exclude from auditing. @@ -209,6 +211,8 @@ List of topics to exclude from auditing. --- +// tag::audit_excluded_topics[] + === audit_log_num_partitions Defines the number of partitions used by a newly-created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. @@ -275,7 +279,7 @@ Defines the maximum amount of memory in bytes used by the audit buffer in each s --- -// tag::auto_create_topics_enabled[] + === auto_create_topics_enabled Allow automatic topic creation. To prevent excess topics, this property is not supported on Redpanda Cloud BYOC and Dedicated clusters. You should explicitly manage topic creation for these Redpanda Cloud clusters. @@ -292,7 +296,6 @@ If you produce to a topic that doesn't exist, the topic will be created with def --- -// end::auto_create_topics_enabled[] === cluster_id diff --git a/modules/reference/pages/properties/topic-properties.adoc b/modules/reference/pages/properties/topic-properties.adoc index 79bb15992b..dd792c9fa5 100644 --- a/modules/reference/pages/properties/topic-properties.adoc +++ b/modules/reference/pages/properties/topic-properties.adoc @@ -1,17 +1,11 @@ = Topic Configuration Properties :page-aliases: reference:topic-properties.adoc -// tag::single-source[] :description: Reference of topic configuration properties. A topic-level property sets a Redpanda or Kafka configuration for a particular topic. -ifndef::env-cloud[] Many topic-level properties have corresponding xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property. -endif::[] -ifdev::env-cloud[] -Many topic-level properties have corresponding xref:manage:cluster-maintenance/config-cluster.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property. -endif::[] NOTE: All topic properties take effect immediately after being set. @@ -20,7 +14,7 @@ NOTE: All topic properties take effect immediately after being set. | <> | xref:./cluster-properties.adoc#log_cleanup_policy[`log_cleanup_policy`] -ifndef::env-cloud[] + | <> | xref:./cluster-properties.adoc#raft_replica_max_pending_flush_bytes[`raft_replica_max_pending_flush_bytes`] @@ -29,7 +23,7 @@ ifndef::env-cloud[] | <> | xref:./cluster-properties.adoc#initial_retention_local_target_ms_default[`initial_retention_local_target_ms_default`] -endif::[] + | <> | xref:./cluster-properties.adoc#retention_bytes[`retention_bytes`] @@ -41,19 +35,19 @@ endif::[] | <> | xref:reference:properties/cluster-properties.adoc#log_segment_size[`log_segment_size`] -ifndef::env-cloud[] + | <> | xref:./cluster-properties.adoc#log_compression_type[`log_compression_type`] | <> | xref:./cluster-properties.adoc#log_message_timestamp_type[`log_message_timestamp_type`] -endif::[] + | <> | xref:reference:properties/cluster-properties.adoc#kafka_batch_max_bytes[`kafka_batch_max_bytes`] -ifndef::env-cloud[] + | <> | xref:./cluster-properties.adoc#default_topic_replication[`default_topic_replication`] -endif::[] + | <> | xref:./cluster-properties.adoc#write_caching_default[`write_caching_default`] |=== @@ -612,4 +606,3 @@ If both `delete.retention.ms` and the cluster property config_ref:tombstone_rete - xref:develop:produce-data/configure-producers.adoc[Configure Producers] - xref:develop:config-topics.adoc[Manage Topics] -// end::single-source[] From 22cc20e50ddb91e0926781b6a29f5c72d9a99366 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sat, 5 Apr 2025 10:39:20 -0600 Subject: [PATCH 21/43] fix conditionals --- modules/develop/pages/data-transforms/monitor.adoc | 6 +----- .../develop/pages/produce-data/configure-producers.adoc | 2 ++ modules/develop/partials/run-transforms.adoc | 8 +++++++- .../reference/pages/properties/cluster-properties.adoc | 7 ------- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/modules/develop/pages/data-transforms/monitor.adoc b/modules/develop/pages/data-transforms/monitor.adoc index 520a4019c2..c555d8ceb0 100644 --- a/modules/develop/pages/data-transforms/monitor.adoc +++ b/modules/develop/pages/data-transforms/monitor.adoc @@ -50,10 +50,6 @@ ifndef::env-cloud[] - Consider increasing the allocated memory for the Wasm engine. Adjust the xref:develop:data-transforms/configure.adoc#resources[`data_transforms_per_core_memory_reservation`] and xref:develop:data-transforms/configure.adoc#resources[`data_transforms_per_function_memory_limit settings`] to provide more memory to each function and the overall Wasm engine. endif::[] -ifdef::env-cloud[] -- Consider increasing the allocated memory for the Wasm engine. Adjust the [`data_transforms_per_function_memory_limit settings`] to provide more memory to each function and the overall Wasm engine. -endif::[] - == Throughput Keeping track of read and write bytes and processor lag helps in understanding the data flow through your transforms, enabling better capacity planning and scaling: @@ -80,7 +76,7 @@ TIP: You can also xref:console:ui/data-transforms.adoc#logs[view logs in {ui}]. endif::[] ifdef::env-cloud[] -TIP: You can also xref:develop/data-transforms.adoc#logs[view logs in {ui}]. +TIP: You can also view logs in Redpanda Cloud]. endif::[] By default, Redpanda provides several settings to manage logging for data transforms, such as buffer capacity, flush interval, and maximum log line length. These settings ensure that logging operates efficiently without overwhelming the system. However, you may need to adjust these settings based on your specific requirements and workloads. For information on how to configure logging, see the xref:develop:data-transforms/configure.adoc#log[Configure transform logging] section of the configuration guide. diff --git a/modules/develop/pages/produce-data/configure-producers.adoc b/modules/develop/pages/produce-data/configure-producers.adoc index a0d138f2bb..8d0dbcd154 100644 --- a/modules/develop/pages/produce-data/configure-producers.adoc +++ b/modules/develop/pages/produce-data/configure-producers.adoc @@ -21,7 +21,9 @@ The `acks` property sets the number of acknowledgments the producer requires the Redpanda guarantees data safety with fsync, which means flushing to disk. * With `acks=all`, every write is fsynced by default. +ifndef::env-cloud[] * With other `acks` settings, or with `write_caching_default=true` at the cluster level, Redpanda fsyncs to disk according to `raft_replica_max_pending_flush_bytes` and `raft_replica_max_flush_delay_ms`, whichever is reached first. +endif::[] * With `write.caching` enabled at the topic level, Redpanda fsyncs to disk according to `flush.ms` and `flush.bytes`, whichever is reached first. === `acks=0` diff --git a/modules/develop/partials/run-transforms.adoc b/modules/develop/partials/run-transforms.adoc index b995d60ff5..9e800495c2 100644 --- a/modules/develop/partials/run-transforms.adoc +++ b/modules/develop/partials/run-transforms.adoc @@ -9,9 +9,10 @@ See also: xref:develop:data-transforms/how-transforms-work.adoc[]. == Prerequisites You must have the following: - +ifnef::env-cloud[] - xref:deploy:deployment-option/self-hosted/index.adoc[A Redpanda cluster] running at least version {page-component-version}. - External access to the Kafka API and the Admin API. +endif::[] ifdef::env-kubernetes[] + Ensure that your Redpanda cluster has xref:manage:kubernetes/networking/external/index.adoc[external access] enabled and is accessible from your host machine using the advertised addresses. @@ -21,7 +22,12 @@ endif::[] - Development tools installed on your host machine: ** For Golang, you must have at least version 1.20 of https://go.dev/doc/install[Go^]. ** For Rust, you must have the latest stable version of https://rustup.rs/[Rust]. +ifndef::env-cloud[] - The xref:get-started:rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. +endif::[] +ifdef::env-cloud[] +- The xref:manage:rpk/rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. +endif::[] ** For JavaScript and TypeScript projects, you must have the https://nodejs.org/en/download/package-manager[latest long-term-support release of Node.js]. ifdef::env-kubernetes[] + diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index eecdf1d93c..fefc94f308 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -5597,16 +5597,13 @@ Timeout to wait for leadership in metadata cache. --- -// tag::write_caching_default[] === write_caching_default The default write caching mode to apply to user topics. Write caching acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to be written to disk. With `acks=all`, this provides lower latency while still ensuring that a majority of brokers acknowledge the write. -ifndef::env-cloud[] Fsyncs follow <> and <>, whichever is reached first. The `write_caching_default` cluster property can be overridden with the xref:topic-properties.adoc#writecaching[`write.caching`] topic property. -endif::[] *Requires restart:* no @@ -5620,16 +5617,12 @@ endif::[] *Default*: For clusters in production mode, the default is `false`. For clusters in development mode, the default is `true`. -ifndef::env-cloud[] *Related topics*: * xref:develop:config-topics.adoc#configure-write-caching[Write caching] -endif::[] --- -// end::write_caching_default[] - // tag::zstd_decompress_workspace_bytes[] === zstd_decompress_workspace_bytes From 65959ceb81ff2ab890992082b2e46c7ab392017d Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sat, 5 Apr 2025 21:25:24 -0600 Subject: [PATCH 22/43] fix conditionals --- modules/develop/pages/config-topics.adoc | 2 +- modules/develop/pages/data-transforms/deploy.adoc | 2 +- .../pages/produce-data/configure-producers.adoc | 12 ++++++++++++ modules/develop/partials/run-transforms.adoc | 6 +++++- .../pages/properties/cluster-properties.adoc | 3 ++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/modules/develop/pages/config-topics.adoc b/modules/develop/pages/config-topics.adoc index 4e95f46b49..347217b5cc 100644 --- a/modules/develop/pages/config-topics.adoc +++ b/modules/develop/pages/config-topics.adoc @@ -126,7 +126,7 @@ endif::[] Only enable write caching on workloads that can tolerate some data loss in the case of multiple, simultaneous broker failures. Leaving write caching disabled safeguards your data against complete data center or availability zone failures. -ifdef::env-cloud[] +ifndef::env-cloud[] ==== Configure at cluster level diff --git a/modules/develop/pages/data-transforms/deploy.adoc b/modules/develop/pages/data-transforms/deploy.adoc index 361584c93b..82e5332a20 100644 --- a/modules/develop/pages/data-transforms/deploy.adoc +++ b/modules/develop/pages/data-transforms/deploy.adoc @@ -131,7 +131,7 @@ TIP: You can also xref:console:ui/data-transforms.adoc#delete[delete transform f endif::[] ifdef::env-cloud[] -TIP: You can also xref:develop:data-transforms.adoc#delete[delete transform functions in Redpanda Cloud]. +TIP: You can also delete transform functions in Redpanda Cloud. endif::[] diff --git a/modules/develop/pages/produce-data/configure-producers.adoc b/modules/develop/pages/produce-data/configure-producers.adoc index 8d0dbcd154..46a2db7d4e 100644 --- a/modules/develop/pages/produce-data/configure-producers.adoc +++ b/modules/develop/pages/produce-data/configure-producers.adoc @@ -68,6 +68,7 @@ to the majority of the brokers responsible for the partition in the cluster. As soon as the fsync call is complete, the message is considered acknowledged and is made visible to readers. +ifndef::env-cloud[] NOTE: This property has an important distinction compared to Kafka's behavior. In Kafka, a message is considered acknowledged without the requirement that it has been fsynced. Messages that have not been fsynced to disk may be lost in the @@ -75,6 +76,17 @@ event of a broker crash. So when using `acks=all`, the Redpanda default configuration is more resilient than Kafka's. You can also consider using xref:develop:config-topics.adoc#configure-write-caching[write caching], which is a relaxed mode of `acks=all` that acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to fsync to disk. This provides lower latency while still ensuring that a majority of brokers acknowledge the write. +endif::[] + +ifdef::env-cloud[] +NOTE: This property has an important distinction compared to Kafka's behavior. In +Kafka, a message is considered acknowledged without the requirement that it has +been fsynced. Messages that have not been fsynced to disk may be lost in the +event of a broker crash. So when using `acks=all`, the Redpanda default +configuration is more resilient than Kafka's. You can also consider +using write caching, which is a relaxed mode of `acks=all` that acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to fsync to disk. This provides lower latency while still ensuring that a majority of brokers acknowledge the write. +endif::[] + === `retries` This property controls the number of times a message is re-sent to the broker diff --git a/modules/develop/partials/run-transforms.adoc b/modules/develop/partials/run-transforms.adoc index 9e800495c2..18f390ac22 100644 --- a/modules/develop/partials/run-transforms.adoc +++ b/modules/develop/partials/run-transforms.adoc @@ -1,3 +1,5 @@ +// tag::single-source[] + Data transforms let you run common data streaming tasks, like filtering, scrubbing, and transcoding, within Redpanda. For example, you may have consumers that require you to redact credit card numbers or convert JSON to Avro. Data transforms can also interact with the Redpanda Schema Registry to work with encoded data types. Data transforms use a WebAssembly (Wasm) engine inside a Redpanda broker. A Wasm function acts on a single record in an input topic. You can develop and manage data transforms with xref:reference:rpk/rpk-transform/rpk-transform.adoc[`rpk transform`] commands. @@ -9,7 +11,7 @@ See also: xref:develop:data-transforms/how-transforms-work.adoc[]. == Prerequisites You must have the following: -ifnef::env-cloud[] +ifndef::env-cloud[] - xref:deploy:deployment-option/self-hosted/index.adoc[A Redpanda cluster] running at least version {page-component-version}. - External access to the Kafka API and the Admin API. endif::[] @@ -688,3 +690,5 @@ rpk transform delete data-transforms-tutorial --no-confirm - xref:reference:data-transforms/golang-sdk.adoc[] - xref:reference:data-transforms/rust-sdk.adoc[] - xref:reference:rpk/rpk-transform/rpk-transform.adoc[`rpk transform` commands] + +// end::single-source[] \ No newline at end of file diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index fefc94f308..01c51016a1 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -211,7 +211,7 @@ List of topics to exclude from auditing. --- -// tag::audit_excluded_topics[] +// end::audit_excluded_topics[] === audit_log_num_partitions @@ -5597,6 +5597,7 @@ Timeout to wait for leadership in metadata cache. --- + === write_caching_default The default write caching mode to apply to user topics. Write caching acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to be written to disk. With `acks=all`, this provides lower latency while still ensuring that a majority of brokers acknowledge the write. From 31787e3d92b1a0a55c38243eeee470d40ad1f56d Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sat, 5 Apr 2025 22:51:22 -0600 Subject: [PATCH 23/43] conditionalize console --- modules/console/pages/ui/data-transforms.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/console/pages/ui/data-transforms.adoc b/modules/console/pages/ui/data-transforms.adoc index d014757176..c404ce0da7 100644 --- a/modules/console/pages/ui/data-transforms.adoc +++ b/modules/console/pages/ui/data-transforms.adoc @@ -8,8 +8,9 @@ Before you begin, ensure that you have the following: - +ifndef::env-cloud[] - Redpanda Console installed. +endif::[] - xref:develop:data-transforms/configure.adoc#enable-transforms[Data transforms enabled] in your Redpanda cluster. - At least one transform function deployed to your Redpanda cluster. From 3e23dd1c3eb830c851743de5e58f90bde6db462d Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sun, 6 Apr 2025 17:24:04 -0600 Subject: [PATCH 24/43] conditionalize wasm properties in text --- .../pages/data-transforms/configure.adoc | 9 +++++--- .../data-transforms/how-transforms-work.adoc | 2 ++ modules/develop/partials/run-transforms.adoc | 22 ++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index b454fde984..71ac0e5030 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -42,7 +42,7 @@ env: You can set the name of the transform function, environment variables, and input and output topics on the command-line when you deploy the transform. These command-line settings take precedence over those specified in the `transform.yaml` file. -See xref:develop:data-transforms/deploy.adoc[]. +See xref:develop:data-transforms/deploy.adoc[] [[built-in]] === Built-In environment variables @@ -76,6 +76,8 @@ Set the following based on the number of functions you have and the amount of me The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. When that limit is hit, Redpanda cannot allocate memory for the VM and the transforms stay in `errored` states. endif::[] +ifndef::env-cloud[] +[[max-threads]] [[binary-size]] === Configure maximum binary size @@ -89,12 +91,13 @@ Increase this setting if your Wasm binaries are larger than the default limit. S You can set the interval at which data transforms commit their progress using the xref:reference:properties/cluster-properties.adoc#data_transforms_commit_interval_ms[`data_transforms_commit_interval_ms`] property. Adjust this setting to control how frequently the transform function's progress is committed. Shorter intervals may provide more frequent progress updates but can increase load. Longer intervals reduce load but may delay progress updates. +endif::[] [[log]] === Configure transform logging -ifndef::env-cloud[] -Redpanda provides several properties to configure logging for data transforms: +The following properties configure logging for data transforms: +ifndef::env-cloud[] - xref:reference:properties/cluster-properties.adoc#data_transforms_logging_buffer_capacity_bytes[`data_transforms_logging_buffer_capacity_bytes`]: Increase this value if your transform logs are large or if you need to buffer more log data before flushing. Reducing this value may cause more frequent log flushing. - xref:reference:properties/cluster-properties.adoc#data_transforms_logging_flush_interval_ms[`data_transforms_logging_flush_interval_ms`]: Adjust this value to control how frequently logs are flushed to the `transform_logs` topic. Shorter intervals provide more frequent log updates but can increase load. Longer intervals reduce load but may delay log updates. diff --git a/modules/develop/pages/data-transforms/how-transforms-work.adoc b/modules/develop/pages/data-transforms/how-transforms-work.adoc index 833f489814..0bb1f79f21 100644 --- a/modules/develop/pages/data-transforms/how-transforms-work.adoc +++ b/modules/develop/pages/data-transforms/how-transforms-work.adoc @@ -22,7 +22,9 @@ To execute a transform function, Redpanda uses just-in-time (JIT) compilation to When you deploy a data transform to a Redpanda broker, it stores the Wasm bytecode and associated metadata, such as input and output topics and environment variables. The broker then replicates this data across the cluster using internal Kafka topics. When the data is distributed, each shard runs its own instance of the transform function. This process includes several resource management features: - Each shard can run only one instance of the transform function at a time to ensure efficient resource utilization and prevent overload. +ifndef::env-cloud[] - Memory for each function is reserved within the broker with the `data_transforms_per_core_memory_reservation` and `data_transforms_per_function_memory_limit` properties. See xref:develop:data-transforms/configure.adoc#resources[Configure memory for data transforms]. +endif::[] - CPU time is dynamically allocated to the Wasm runtime to ensure that the code does not run forever and cannot block the broker from handling traffic or doing other work, such as Tiered Storage uploads. == Flow of data transforms diff --git a/modules/develop/partials/run-transforms.adoc b/modules/develop/partials/run-transforms.adoc index 18f390ac22..8b92d76adf 100644 --- a/modules/develop/partials/run-transforms.adoc +++ b/modules/develop/partials/run-transforms.adoc @@ -6,11 +6,12 @@ Data transforms use a WebAssembly (Wasm) engine inside a Redpanda broker. A Wasm NOTE: You should build and deploy transforms from a separate, non-production machine (host machine). Using a separate host machine avoids potential resource conflicts and stability issues on the nodes that run your brokers. -See also: xref:develop:data-transforms/how-transforms-work.adoc[]. +See also: xref:develop:data-transforms/how-transforms-work.adoc[] == Prerequisites You must have the following: + ifndef::env-cloud[] - xref:deploy:deployment-option/self-hosted/index.adoc[A Redpanda cluster] running at least version {page-component-version}. - External access to the Kafka API and the Admin API. @@ -21,16 +22,17 @@ Ensure that your Redpanda cluster has xref:manage:kubernetes/networking/external + TIP: For a tutorial on setting up a Redpanda cluster with external access, see xref:deploy:deployment-option/self-hosted/kubernetes/get-started-dev.adoc[]. endif::[] + - Development tools installed on your host machine: -** For Golang, you must have at least version 1.20 of https://go.dev/doc/install[Go^]. -** For Rust, you must have the latest stable version of https://rustup.rs/[Rust]. + * For Golang, you must have at least version 1.20 of https://go.dev/doc/install[Go^]. + * For Rust, you must have the latest stable version of https://rustup.rs/[Rust]. ifndef::env-cloud[] - The xref:get-started:rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. endif::[] ifdef::env-cloud[] - The xref:manage:rpk/rpk-install.adoc[`rpk` command-line client] installed on your host machine and configured to connect to your Redpanda cluster. endif::[] -** For JavaScript and TypeScript projects, you must have the https://nodejs.org/en/download/package-manager[latest long-term-support release of Node.js]. + * For JavaScript and TypeScript projects, you must have the https://nodejs.org/en/download/package-manager[latest long-term-support release of Node.js]. ifdef::env-kubernetes[] + You can use a xref:manage:kubernetes/networking/k-connect-to-redpanda.adoc#rpk-profile[pre-configured `rpk` profile]: @@ -321,8 +323,12 @@ rpk transform deploy --input-topic=input-topic --output-topic=output-topic ```bash echo "hello\nworld" | rpk topic produce input-topic ``` - +ifdef::env-cloud[] +. In Redpanda Cloud, check the records in both the input topic and the output topic. They should be the same. +endif::[] +ifndef::env-cloud[] . http://localhost:8080/topics[Open Redpanda Console] and check the records in both the input topic and the output topic. They should be the same. +endif::[] + You can also verify the content of the output topic in the command-line: + @@ -531,8 +537,12 @@ rpk transform deploy --input-topic=input-topic --output-topic=output-topic ```bash echo "apples,10\npears,11\noranges,5" | rpk topic produce input-topic -k market-stock ``` - +ifdef::env-cloud[] +. In Redpanda Cloud, check the records in both the input topic and the output topic. You should see the following values: +endif::[] +ifndef::env-cloud[] . http://localhost:8080/topics[Open Redpanda Console] and check the records in both the input topic and the output topic. You should see the following values: +endif::[] + [source,json,role="no-copy"] ---- From c79d85b32a4ef14cdcee27f32306bebebea3b469 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Sun, 6 Apr 2025 18:19:07 -0600 Subject: [PATCH 25/43] fix audit_excluded_topics --- modules/manage/partials/audit-logging.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index f225b64033..33e18e901b 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -33,6 +33,7 @@ Redpanda's audit logging mechanism supports several options to control the volum * `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.excludedTopics`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`] cluster property. This option is a list of JSON strings identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. From 4d9e3d6fa4a98675597a6dce93f8d9f21bd6f042 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 10:47:01 -0600 Subject: [PATCH 26/43] rename topic-iceberg-integration to about-iceberg-topics --- modules/ROOT/nav.adoc | 2 +- .../pages/release-notes/redpanda.adoc | 2 +- ...gration.adoc => about-iceberg-topics.adoc} | 2 +- .../pages/iceberg/query-iceberg-topics.adoc | 2 +- ...anda-topics-iceberg-snowflake-catalog.adoc | 2 +- .../pages/kubernetes/k-manage-topics.adoc | 2 +- .../pages/properties/cluster-properties.adoc | 102 ++++++++++-------- .../pages/properties/topic-properties.adoc | 8 +- 8 files changed, 67 insertions(+), 55 deletions(-) rename modules/manage/pages/iceberg/{topic-iceberg-integration.adoc => about-iceberg-topics.adoc} (85%) diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index d257a17984..95bc056c4f 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -180,7 +180,7 @@ *** xref:manage:topic-recovery.adoc[Topic Recovery] *** xref:manage:whole-cluster-restore.adoc[Whole Cluster Restore] ** xref:manage:iceberg/index.adoc[Iceberg] -*** xref:manage:iceberg/topic-iceberg-integration.adoc[About Iceberg Topics] +*** xref:manage:iceberg/about-iceberg-topics.adoc[About Iceberg Topics] *** xref:manage:iceberg/use-iceberg-catalogs.adoc[Use Iceberg Catalogs] *** xref:manage:iceberg/query-iceberg-topics.adoc[Query Iceberg Topics] *** xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[Query Iceberg Topics with Snowflake] diff --git a/modules/get-started/pages/release-notes/redpanda.adoc b/modules/get-started/pages/release-notes/redpanda.adoc index 71208189ad..4693d528ea 100644 --- a/modules/get-started/pages/release-notes/redpanda.adoc +++ b/modules/get-started/pages/release-notes/redpanda.adoc @@ -58,7 +58,7 @@ The admin panel has been removed from the Redpanda Console UI. To manage users, == Iceberg improvements -xref:manage:iceberg/topic-iceberg-integration.adoc[Iceberg-enabled topics] now support custom partitioning for improved query performance, snapshot expiry, and a dead-letter queue for invalid records. Schema evolution is also supported with schema mutations implemented according to the Iceberg specification. +xref:manage:iceberg/about-iceberg-topics.adoc[Iceberg-enabled topics] now support custom partitioning for improved query performance, snapshot expiry, and a dead-letter queue for invalid records. Schema evolution is also supported with schema mutations implemented according to the Iceberg specification. == Protobuf normalization in Schema Registry diff --git a/modules/manage/pages/iceberg/topic-iceberg-integration.adoc b/modules/manage/pages/iceberg/about-iceberg-topics.adoc similarity index 85% rename from modules/manage/pages/iceberg/topic-iceberg-integration.adoc rename to modules/manage/pages/iceberg/about-iceberg-topics.adoc index dd43de60fe..65f92a50e4 100644 --- a/modules/manage/pages/iceberg/topic-iceberg-integration.adoc +++ b/modules/manage/pages/iceberg/about-iceberg-topics.adoc @@ -1,7 +1,7 @@ = About Iceberg Topics :description: Learn how Redpanda can integrate topics with Apache Iceberg. :page-categories: Iceberg, Tiered Storage, Management, High Availability, Data Replication, Integration -:page-aliases: manage:topic-iceberg-integration.adoc +:page-aliases: manage:topic-iceberg-integration.adoc, manage:iceberg/topic-iceberg-integration.adoc [NOTE] ==== diff --git a/modules/manage/pages/iceberg/query-iceberg-topics.adoc b/modules/manage/pages/iceberg/query-iceberg-topics.adoc index 3f64830431..7941b28fd3 100644 --- a/modules/manage/pages/iceberg/query-iceberg-topics.adoc +++ b/modules/manage/pages/iceberg/query-iceberg-topics.adoc @@ -7,7 +7,7 @@ include::shared:partial$enterprise-license.adoc[] ==== -When you access Iceberg topics from a data lakehouse or other Iceberg-compatible tools, how you consume the data depends on the topic xref:manage:iceberg/topic-iceberg-integration.adoc#enable-iceberg-integration[Iceberg mode] and whether you've registered a schema for the topic in the xref:manage:schema-reg/schema-reg-overview.adoc[Redpanda Schema Registry]. In either mode, you do not need to rely on complex ETL jobs or pipelines to access real-time data from Redpanda. +When you access Iceberg topics from a data lakehouse or other Iceberg-compatible tools, how you consume the data depends on the topic xref:manage:iceberg/about-iceberg-topics.adoc#enable-iceberg-integration[Iceberg mode] and whether you've registered a schema for the topic in the xref:manage:schema-reg/schema-reg-overview.adoc[Redpanda Schema Registry]. In either mode, you do not need to rely on complex ETL jobs or pipelines to access real-time data from Redpanda. include::manage:partial$iceberg/query-iceberg-topics.adoc[] diff --git a/modules/manage/pages/iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc b/modules/manage/pages/iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc index 165fa662bd..aebd7a7e16 100644 --- a/modules/manage/pages/iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc +++ b/modules/manage/pages/iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc @@ -92,7 +92,7 @@ Successfully updated configuration. New configuration version is 2. . You must restart your cluster so that the configuration changes take effect. -. Enable the integration for a topic by configuring the topic property `redpanda.iceberg.mode`. This mode creates an Iceberg table for the topic consisting of two columns, one for the record metadata including the key, and another binary column for the record's value. See xref:manage:iceberg/topic-iceberg-integration.adoc#enable-iceberg-integration[Enable Iceberg integration] for more details on Iceberg modes. The following examples show how to use xref:get-started:rpk-install.adoc[`rpk`] to either create a new topic, or alter the configuration for an existing topic, to set the Iceberg mode to `key_value`. +. Enable the integration for a topic by configuring the topic property `redpanda.iceberg.mode`. This mode creates an Iceberg table for the topic consisting of two columns, one for the record metadata including the key, and another binary column for the record's value. See xref:manage:iceberg/about-iceberg-topics.adoc#enable-iceberg-integration[Enable Iceberg integration] for more details on Iceberg modes. The following examples show how to use xref:get-started:rpk-install.adoc[`rpk`] to either create a new topic, or alter the configuration for an existing topic, to set the Iceberg mode to `key_value`. + .Create a new topic and set `redpanda.iceberg.mode`: [,bash] diff --git a/modules/manage/pages/kubernetes/k-manage-topics.adoc b/modules/manage/pages/kubernetes/k-manage-topics.adoc index e204360cf7..7908776a64 100644 --- a/modules/manage/pages/kubernetes/k-manage-topics.adoc +++ b/modules/manage/pages/kubernetes/k-manage-topics.adoc @@ -139,7 +139,7 @@ include::shared:partial$enterprise-license.adoc[] In addition to the general prerequisites for managing topics, you must have the following: -- xref:manage:topic-iceberg-integration.adoc[Iceberg support] must be enabled on your Redpanda cluster. +- xref:manage:about-iceberg-topics.adoc[Iceberg support] must be enabled on your Redpanda cluster. - xref:manage:kubernetes/tiered-storage/k-tiered-storage.adoc[Tiered Storage] must be enabled on your Redpanda cluster. To create an Iceberg topic, set the `redpanda.iceberg.mode` configuration in the `additionalConfig` property of the `Topic` resource. diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 716cae460d..fe50d6f70c 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -1784,10 +1784,7 @@ Proportional coefficient for the Iceberg backlog controller. Number of shares as **Related topics**: -- xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/query-iceberg-topics.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -1806,7 +1803,7 @@ Base path for the object-storage-backed Iceberg catalog. After Iceberg is enable **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -1830,8 +1827,6 @@ The frequency at which the Iceberg coordinator commits topic files to the catalo **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -1840,7 +1835,7 @@ The frequency at which the Iceberg coordinator commits topic files to the catalo // tag::iceberg_catalog_type[] === iceberg_catalog_type -Iceberg catalog type that Redpanda will use to commit table metadata updates. Supported types: 'rest', 'object_storage'. +Iceberg catalog type that Redpanda will use to commit table metadata updates. Supported types: `rest`, `object_storage`. *Requires restart:* Yes @@ -1853,14 +1848,21 @@ Iceberg catalog type that Redpanda will use to commit table metadata updates. Su **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- +// end::iceberg_catalog_type[] + +// tag::iceberg_default_partition_spec[] === iceberg_default_partition_spec +ifndef::env-cloud[] Default value for the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-partition-spec[`redpanda.iceberg.partition.spec`] topic property that determines the partition spec for the Iceberg table corresponding to the topic. +endif::[] + +ifdef::env-cloud[] +Default value for the `redpanda.iceberg.partition.spec` topic property that determines the partition spec for the Iceberg table corresponding to the topic. +endif::[] *Requires restart:* No @@ -1874,11 +1876,11 @@ Partitions the topic by extracting the hour from `redpanda.timestamp`, grouping **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc#enable-iceberg-integration[Enable Iceberg] +- xref:manage:iceberg/about-iceberg-topics.adoc#enable-iceberg-integration[Enable Iceberg] --- -// end::iceberg_catalog_type[] +// end::iceberg_default_partition_spec[] // tag::iceberg_delete[] === iceberg_delete @@ -1899,6 +1901,8 @@ Default value for the `redpanda.iceberg.delete` topic property that determines i --- +end::iceberg_delete[] + === iceberg_disable_automatic_snapshot_expiry Whether to disable automatic Iceberg snapshot expiry. This property may be useful if the Iceberg catalog expects to perform snapshot expiry on its own. @@ -1933,12 +1937,16 @@ Whether to disable tagging of Iceberg snapshots. These tags are used to ensure t --- -// end::iceberg_delete[] - // tag::iceberg_enabled[] === iceberg_enabled +ifndef::env-cloud[] Enables the translation of topic data into Iceberg tables. Setting `iceberg_enabled` to `true` activates the feature at the cluster level, but each topic must also set the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-enabled[`redpanda.iceberg.enabled`] topic-level property to `true` to use it. If `iceberg_enabled` is set to `false`, then the feature is disabled for all topics in the cluster, overriding any topic-level settings. +endif::[] + +ifdef::env-cloud[] +Enables the translation of topic data into Iceberg tables. Setting `iceberg_enabled` to `true` activates the feature at the cluster level, but each topic must also set the `redpanda.iceberg.enabled` topic-level property to `true` to use it. If `iceberg_enabled` is set to `false`, then the feature is disabled for all topics in the cluster, overriding any topic-level settings. +endif::[] *Requires restart:* Yes @@ -1950,16 +1958,22 @@ Enables the translation of topic data into Iceberg tables. Setting `iceberg_enab **Related topics**: -- xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/query-iceberg-topics.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] - xref:manage:iceberg/topic-iceberg-integration.adoc[] --- +// end::iceberg_enabled[] + +// tag::iceberg_invalid_record_action[] === iceberg_invalid_record_action +ifndef::env-cloud[] Default value for the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-invalid-record-action[`redpanda.iceberg.invalid.record.action`] topic property. +endif::[] + +ifdef::env-cloud[] +Default value for the `redpanda.iceberg.invalid.record.action` topic property. +endif::[] *Requires restart:* No @@ -1973,6 +1987,9 @@ Default value for the xref:reference:properties/topic-properties.adoc#redpanda-i --- +// end::iceberg_invalid_record_action[] + +// tag::iceberg_rest_catalog_authentication_mode[] === iceberg_rest_catalog_authentication_mode The authentication mode for client requests made to the Iceberg catalog. Choose from: `none`, `bearer`, and `oauth2`. In `bearer` mode, the token specified in `iceberg_rest_catalog_token` is used unconditionally, and no attempts are made to refresh the token. In `oauth2` mode, the credentials specified in `iceberg_rest_catalog_client_id` and `iceberg_rest_catalog_client_secret` are used to obtain a bearer token from the URI defined by `iceberg_rest_catalog_oauth2_server_uri.`. @@ -1986,12 +2003,10 @@ The authentication mode for client requests made to the Iceberg catalog. Choose **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- -// end::iceberg_enabled[] +// end::iceberg_rest_catalog_authentication_mode[] // tag::iceberg_rest_catalog_client_id[] === iceberg_rest_catalog_client_id @@ -2009,8 +2024,6 @@ Iceberg REST catalog user ID. This ID is used to query the catalog API for the O **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -2032,11 +2045,12 @@ Secret to authenticate against Iceberg REST catalog. Required if catalog type is **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- +// end::iceberg_rest_catalog_client_secret[] + +// tag::iceberg_rest_catalog_crl[] === iceberg_rest_catalog_crl The contents of a certificate revocation list for `iceberg_rest_catalog_trust`. Takes precedence over `iceberg_rest_catalog_crl_file`. @@ -2051,7 +2065,7 @@ The contents of a certificate revocation list for `iceberg_rest_catalog_trust`. --- -// end::iceberg_rest_catalog_client_secret[] +// end::iceberg_rest_catalog_crl[] === iceberg_rest_catalog_crl_file @@ -2068,8 +2082,6 @@ Path to certificate revocation list for `iceberg_rest_catalog_trust_file`. **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -2089,11 +2101,10 @@ URL of Iceberg REST catalog endpoint. **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- +// tag::iceberg_rest_catalog_oauth2_server_uri[] === iceberg_rest_catalog_oauth2_server_uri The OAuth URI used to retrieve access tokens for Iceberg catalog authentication. If left undefined, the deprecated Iceberg catalog endpoint `/v1/oauth/tokens` is used instead. @@ -2109,12 +2120,10 @@ The OAuth URI used to retrieve access tokens for Iceberg catalog authentication. **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- -// end::iceberg_rest_catalog_endpoint[] +// end::iceberg_rest_catalog_oauth2_server_uri[] // tag::iceberg_rest_catalog_prefix[] === iceberg_rest_catalog_prefix @@ -2132,8 +2141,6 @@ Prefix part of the Iceberg REST catalog URL. Prefix is appended to the catalog p **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -2159,8 +2166,6 @@ Maximum length of time that Redpanda waits for a response from the REST catalog **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -2184,14 +2189,19 @@ Required if <>. +The contents of a certificate chain to trust for the REST Iceberg catalog. + +ifndef::end-cloud[] +Takes precedence over <>. +endif::[] *Requires restart:* Yes @@ -2203,7 +2213,7 @@ The contents of a certificate chain to trust for the REST Iceberg catalog. Takes --- -// end::iceberg_rest_catalog_token[] +// end::iceberg_rest_catalog_trust[] === iceberg_rest_catalog_trust_file @@ -2220,8 +2230,6 @@ Path to a file containing a certificate chain to trust for the REST Iceberg cata **Related topics**: - xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] --- @@ -2245,9 +2253,16 @@ Average size per partition of the datalake translation backlog that the backlog --- +// tag::iceberg_target_lag_ms[] === iceberg_target_lag_ms +ifndef::env-cloud[] Default value for the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-target-lag-ms[`redpanda.iceberg.target.lag.ms`] topic property, which controls how often the data in an Iceberg table is refreshed with new data from the corresponding Redpanda topic. Redpanda attempts to commit all data produced to the topic within the lag target, subject to resource availability. +endif::[] + +ifdef::env-cloud[] +Default value for the `redpanda.iceberg.target.lag.ms` topic property, which controls how often the data in an Iceberg table is refreshed with new data from the corresponding Redpanda topic. Redpanda attempts to commit all data produced to the topic within the lag target, subject to resource availability. +endif::[] *Unit:* milliseconds @@ -2263,9 +2278,6 @@ Default value for the xref:reference:properties/topic-properties.adoc#redpanda-i **Related topics**: -- xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/query-iceberg-topics.adoc[] -- xref:manage:iceberg/redpanda-topics-iceberg-snowflake-catalog.adoc[] - xref:manage:iceberg/topic-iceberg-integration.adoc[] --- diff --git a/modules/reference/pages/properties/topic-properties.adoc b/modules/reference/pages/properties/topic-properties.adoc index 1f57b30787..90979803d7 100644 --- a/modules/reference/pages/properties/topic-properties.adoc +++ b/modules/reference/pages/properties/topic-properties.adoc @@ -543,7 +543,7 @@ Enable the Iceberg integration for the topic. You can choose one of three modes. **Related topics**: -- xref:manage:topic-iceberg-integration.adoc[] +- xref:manage:about-iceberg-topics.adoc[] ==== redpanda.iceberg.delete @@ -553,7 +553,7 @@ Whether the corresponding Iceberg table is deleted upon deleting the topic. **Related topics**: -- xref:manage:topic-iceberg-integration.adoc[] +- xref:manage:about-iceberg-topics.adoc[] ==== redpanda.iceberg.partition.spec @@ -563,7 +563,7 @@ The https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] specifi **Related topics**: -- xref:manage:topic-iceberg-integration.adoc[] +- xref:manage:about-iceberg-topics.adoc[] ==== redpanda.iceberg.invalid.record.action @@ -578,7 +578,7 @@ Whether to write invalid records to a dead-letter queue (DLQ). **Related topics**: -- xref:manage:topic-iceberg-integration.adoc[] +- xref:manage:about-iceberg-topics.adoc[] === Redpanda topic properties From 9dfe691bb2a5312db490fb76bdf13a323d1ce91b Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 11:07:55 -0600 Subject: [PATCH 27/43] rename about-iceberg-topics.adoc --- .../pages/properties/cluster-properties.adoc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index fe50d6f70c..205a0881b1 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -1897,7 +1897,7 @@ Default value for the `redpanda.iceberg.delete` topic property that determines i **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -1931,9 +1931,7 @@ Whether to disable tagging of Iceberg snapshots. These tags are used to ensure t **Related topics**: -- xref:manage:iceberg/use-iceberg-catalogs.adoc[] -- xref:manage:iceberg/query-iceberg-topics.adoc[] -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -1958,7 +1956,7 @@ endif::[] **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -1983,7 +1981,7 @@ endif::[] **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc#manage-dead-letter-queue[Manage dead-letter queue] +- xref:manage:iceberg/about-iceberg-topics.adoc#manage-dead-letter-queue[Manage dead-letter queue] --- @@ -2249,7 +2247,7 @@ Average size per partition of the datalake translation backlog that the backlog **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- @@ -2258,10 +2256,12 @@ Average size per partition of the datalake translation backlog that the backlog ifndef::env-cloud[] Default value for the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-target-lag-ms[`redpanda.iceberg.target.lag.ms`] topic property, which controls how often the data in an Iceberg table is refreshed with new data from the corresponding Redpanda topic. Redpanda attempts to commit all data produced to the topic within the lag target, subject to resource availability. + endif::[] ifdef::env-cloud[] Default value for the `redpanda.iceberg.target.lag.ms` topic property, which controls how often the data in an Iceberg table is refreshed with new data from the corresponding Redpanda topic. Redpanda attempts to commit all data produced to the topic within the lag target, subject to resource availability. + endif::[] *Unit:* milliseconds @@ -2278,7 +2278,7 @@ endif::[] **Related topics**: -- xref:manage:iceberg/topic-iceberg-integration.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] --- From 0835a1d0dd2d656fa40c1c6de3373e2f71d4bec9 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 12:10:40 -0600 Subject: [PATCH 28/43] fix errors --- .../produce-data/configure-producers.adoc | 1 + .../pages/kubernetes/k-manage-topics.adoc | 2 +- modules/manage/partials/audit-logging.adoc | 1 - .../iceberg/about-iceberg-topics.adoc | 28 ++++++++++++++++++- .../pages/properties/cluster-properties.adoc | 7 ++++- .../pages/properties/topic-properties.adoc | 8 +++--- 6 files changed, 39 insertions(+), 8 deletions(-) diff --git a/modules/develop/pages/produce-data/configure-producers.adoc b/modules/develop/pages/produce-data/configure-producers.adoc index 46a2db7d4e..6c954b6564 100644 --- a/modules/develop/pages/produce-data/configure-producers.adoc +++ b/modules/develop/pages/produce-data/configure-producers.adoc @@ -85,6 +85,7 @@ been fsynced. Messages that have not been fsynced to disk may be lost in the event of a broker crash. So when using `acks=all`, the Redpanda default configuration is more resilient than Kafka's. You can also consider using write caching, which is a relaxed mode of `acks=all` that acknowledges a message as soon as it is received and acknowledged on a majority of brokers, without waiting for it to fsync to disk. This provides lower latency while still ensuring that a majority of brokers acknowledge the write. + endif::[] === `retries` diff --git a/modules/manage/pages/kubernetes/k-manage-topics.adoc b/modules/manage/pages/kubernetes/k-manage-topics.adoc index 7908776a64..b89d859bce 100644 --- a/modules/manage/pages/kubernetes/k-manage-topics.adoc +++ b/modules/manage/pages/kubernetes/k-manage-topics.adoc @@ -139,7 +139,7 @@ include::shared:partial$enterprise-license.adoc[] In addition to the general prerequisites for managing topics, you must have the following: -- xref:manage:about-iceberg-topics.adoc[Iceberg support] must be enabled on your Redpanda cluster. +- xref:manage:iceberg/about-iceberg-topics.adoc[Iceberg support] must be enabled on your Redpanda cluster. - xref:manage:kubernetes/tiered-storage/k-tiered-storage.adoc[Tiered Storage] must be enabled on your Redpanda cluster. To create an Iceberg topic, set the `redpanda.iceberg.mode` configuration in the `additionalConfig` property of the `Topic` resource. diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 8981b724bf..da137ce5a4 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -33,7 +33,6 @@ Redpanda's audit logging mechanism supports several options to control the volum * `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. -* `auditLogging.excludedTopics`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`] cluster property. This option is a list of JSON strings identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. diff --git a/modules/manage/partials/iceberg/about-iceberg-topics.adoc b/modules/manage/partials/iceberg/about-iceberg-topics.adoc index ed217b7c05..dab52d2ac6 100644 --- a/modules/manage/partials/iceberg/about-iceberg-topics.adoc +++ b/modules/manage/partials/iceberg/about-iceberg-topics.adoc @@ -62,7 +62,13 @@ endif::[] == Enable Iceberg integration +ifndef::env-cloud[] To create an Iceberg table for a Redpanda topic, you must set the cluster configuration property config_ref:iceberg_enabled,true,properties/cluster-properties[`iceberg_enabled`] to `true`, and also configure the topic property xref:reference:properties/topic-properties.adoc#redpanda-iceberg-mode[`redpanda.iceberg.mode`]. You can choose to provide a schema if you need the Iceberg table to be structured with defined columns. +endif::[] + +ifdef::env-cloud[] +To create an Iceberg table for a Redpanda topic, you must set the cluster configuration property config_ref:iceberg_enabled,true,properties/cluster-properties[`iceberg_enabled`] to `true`, and also configure the topic property `redpanda.iceberg.mode`. You can choose to provide a schema if you need the Iceberg table to be structured with defined columns. +endif::[] . Set the `iceberg_enabled` configuration option on your cluster to `true`. You must restart your cluster if you change this configuration for a running cluster. ifdef::env-cloud[] @@ -115,7 +121,12 @@ TOPIC STATUS OK ---- + +ifndef::env-cloud[] To improve query performance, consider implementing custom https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] for the Iceberg topic. Use the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-partition-spec[`redpanda.iceberg.partition.spec`] topic property to define the partitioning scheme: +endif::[] +ifdef::env-cloud[] +To improve query performance, consider implementing custom https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] for the Iceberg topic. Use the `redpanda.iceberg.partition.spec` topic property to define the partitioning scheme: +endif::[] + [,bash,] ---- @@ -183,7 +194,12 @@ The Iceberg table resides in a namespace called `redpanda` and has the same name == About schema support and translation to Iceberg format -The xref:reference:properties/topic-properties.adoc#redpanda-iceberg-mode[`redpanda.iceberg.mode`] property determines how Redpanda maps the topic data to the Iceberg table structure. You can have the generated Iceberg table match the structure of a Avro or Protobuf schema in the Schema Registry, or you can use the `key_value` mode where Redpanda stores the record values as-is in the table. +ifndef::env-cloud[] +The xref:reference:properties/topic-properties.adoc#redpanda-iceberg-mode[`redpanda.iceberg.mode`] topic property determines how Redpanda maps the topic data to the Iceberg table structure. You can have the generated Iceberg table match the structure of a Avro or Protobuf schema in the Schema Registry, or you can use the `key_value` mode where Redpanda stores the record values as-is in the table. +endif::[] +ifdef::env-cloud[] +The `redpanda.iceberg.mode` topic property determines how Redpanda maps the topic data to the Iceberg table structure. You can have the generated Iceberg table match the structure of a Avro or Protobuf schema in the Schema Registry, or you can use the `key_value` mode where Redpanda stores the record values as-is in the table. +endif::[] The JSON Schema format is not supported. If your topic data is in JSON, use the `key_value` mode. @@ -410,7 +426,12 @@ Querying the Iceberg table for `demo-topic` includes the new column `ts`: Errors may occur when translating records in the `value_schema_id_prefix` mode to the Iceberg table format; for example, if you do not use the Schema Registry wire format with the magic byte, if the schema ID in the record is not found in the Schema Registry, or if an Avro or Protobuf data type cannot be translated to an Iceberg type. +ifndef::env-cloud[] If Redpanda encounters an error while writing a record to the Iceberg table, Redpanda writes the record to a separate dead-letter queue (DLQ) Iceberg table named `~dlq`. To disable the default behavior for a topic and drop the record, set the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-invalid-record-action[`redpanda.iceberg.invalid.record.action`] topic property to `drop`. You can also configure the default cluster-wide behavior for invalid records by setting the `iceberg_invalid_record_action` property. +endif::[] +ifdef::env-cloud[] +If Redpanda encounters an error while writing a record to the Iceberg table, Redpanda writes the record to a separate dead-letter queue (DLQ) Iceberg table named `~dlq`. To disable the default behavior for a topic and drop the record, set the `redpanda.iceberg.invalid.record.action` topic property to `drop`. You can also configure the default cluster-wide behavior for invalid records by setting the `iceberg_invalid_record_action` property. +endif::[] The DLQ table itself uses the `key_value` schema, consisting of two columns: the record metadata including the key, and a binary column for the record's value. @@ -484,7 +505,12 @@ You may need to increase the size of your Redpanda cluster to accommodate the ad === Use custom partitioning +ifndef::env-cloud[] To improve query performance, consider implementing custom https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] for the Iceberg topic. Use the xref:reference:properties/topic-properties.adoc#redpanda-iceberg-partition-spec[`redpanda.iceberg.partition.spec`] topic property to define the partitioning scheme: +endif::[] +ifdef::env-cloud[] +To improve query performance, consider implementing custom https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] for the Iceberg topic. Use the `redpanda.iceberg.partition.spec` topic property to define the partitioning scheme: +endif::[] [,bash,] ---- diff --git a/modules/reference/pages/properties/cluster-properties.adoc b/modules/reference/pages/properties/cluster-properties.adoc index 205a0881b1..88e967d41d 100644 --- a/modules/reference/pages/properties/cluster-properties.adoc +++ b/modules/reference/pages/properties/cluster-properties.adoc @@ -1901,7 +1901,7 @@ Default value for the `redpanda.iceberg.delete` topic property that determines i --- -end::iceberg_delete[] +// end::iceberg_delete[] === iceberg_disable_automatic_snapshot_expiry @@ -2102,6 +2102,8 @@ URL of Iceberg REST catalog endpoint. --- +// end::iceberg_rest_catalog_endpoint[] + // tag::iceberg_rest_catalog_oauth2_server_uri[] === iceberg_rest_catalog_oauth2_server_uri @@ -2282,6 +2284,8 @@ endif::[] --- +// end::iceberg_target_lag_ms[] + === id_allocator_batch_size The ID allocator allocates messages in batches (each batch is a one log record) and then serves requests from memory without touching the log until the batch is exhausted. @@ -3256,6 +3260,7 @@ The topic property xref:./topic-properties.adoc#cleanuppolicy[`cleanup.policy`] *Default:* `delete` --- + // end::log_cleanup_policy[] === log_compaction_adjacent_merge_self_compaction_count diff --git a/modules/reference/pages/properties/topic-properties.adoc b/modules/reference/pages/properties/topic-properties.adoc index 90979803d7..c8fd9b57b3 100644 --- a/modules/reference/pages/properties/topic-properties.adoc +++ b/modules/reference/pages/properties/topic-properties.adoc @@ -543,7 +543,7 @@ Enable the Iceberg integration for the topic. You can choose one of three modes. **Related topics**: -- xref:manage:about-iceberg-topics.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] ==== redpanda.iceberg.delete @@ -553,7 +553,7 @@ Whether the corresponding Iceberg table is deleted upon deleting the topic. **Related topics**: -- xref:manage:about-iceberg-topics.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] ==== redpanda.iceberg.partition.spec @@ -563,7 +563,7 @@ The https://iceberg.apache.org/docs/nightly/partitioning/[partitioning^] specifi **Related topics**: -- xref:manage:about-iceberg-topics.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] ==== redpanda.iceberg.invalid.record.action @@ -578,7 +578,7 @@ Whether to write invalid records to a dead-letter queue (DLQ). **Related topics**: -- xref:manage:about-iceberg-topics.adoc[] +- xref:manage:iceberg/about-iceberg-topics.adoc[] === Redpanda topic properties From 17600580d55140c182d574930f806fa13184100c Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 16:50:19 -0600 Subject: [PATCH 29/43] Update modules/console/pages/ui/edit-topic-configuration.adoc Co-authored-by: Kat Batuigas <36839689+kbatuigas@users.noreply.github.com> --- modules/console/pages/ui/edit-topic-configuration.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/console/pages/ui/edit-topic-configuration.adoc b/modules/console/pages/ui/edit-topic-configuration.adoc index 6fb1fd1c4b..695fe3337d 100644 --- a/modules/console/pages/ui/edit-topic-configuration.adoc +++ b/modules/console/pages/ui/edit-topic-configuration.adoc @@ -1,4 +1,4 @@ -= Edit Topic Configuration in the {ui} += Edit Topic Configuration in {ui} :page-aliases: manage:console/edit-topic-configuration.adoc :description: Use {ui} to edit the configuration of existing topics in a cluster. // tag::single-source[] From af96d92d72ab0723b0b17dd9238b8a9ea79cd76c Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 17:08:13 -0600 Subject: [PATCH 30/43] incorporate feedback from code review --- modules/console/pages/ui/schema-reg.adoc | 2 +- modules/develop/pages/data-transforms/configure.adoc | 1 - modules/develop/pages/data-transforms/monitor.adoc | 2 +- modules/manage/pages/schema-reg/schema-reg-api.adoc | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/console/pages/ui/schema-reg.adoc b/modules/console/pages/ui/schema-reg.adoc index 34a549381e..8a5668efdd 100644 --- a/modules/console/pages/ui/schema-reg.adoc +++ b/modules/console/pages/ui/schema-reg.adoc @@ -8,7 +8,7 @@ In {ui}, the *Schema Registry* menu lists registered and verified schemas, inclu [NOTE] ==== -The Schema Registry is built into Redpanda, and you can use it with the Schema Registry API or with {ui}. This section describes Schema Registry operations available in {ui}. +The Schema Registry is built into Redpanda, and you can use it with the Schema Registry API or with the UI. This section describes Schema Registry operations available in the UI. ==== ifndef::env-cloud[] diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index 71ac0e5030..3b5b8ffa54 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -77,7 +77,6 @@ The maximum number of functions that can be deployed to a cluster is equal to `d endif::[] ifndef::env-cloud[] -[[max-threads]] [[binary-size]] === Configure maximum binary size diff --git a/modules/develop/pages/data-transforms/monitor.adoc b/modules/develop/pages/data-transforms/monitor.adoc index c555d8ceb0..afd30a6b03 100644 --- a/modules/develop/pages/data-transforms/monitor.adoc +++ b/modules/develop/pages/data-transforms/monitor.adoc @@ -76,7 +76,7 @@ TIP: You can also xref:console:ui/data-transforms.adoc#logs[view logs in {ui}]. endif::[] ifdef::env-cloud[] -TIP: You can also view logs in Redpanda Cloud]. +TIP: You can also view logs in the UI. endif::[] By default, Redpanda provides several settings to manage logging for data transforms, such as buffer capacity, flush interval, and maximum log line length. These settings ensure that logging operates efficiently without overwhelming the system. However, you may need to adjust these settings based on your specific requirements and workloads. For information on how to configure logging, see the xref:develop:data-transforms/configure.adoc#log[Configure transform logging] section of the configuration guide. diff --git a/modules/manage/pages/schema-reg/schema-reg-api.adoc b/modules/manage/pages/schema-reg/schema-reg-api.adoc index 1d3c9e195a..89a78c3c5f 100644 --- a/modules/manage/pages/schema-reg/schema-reg-api.adoc +++ b/modules/manage/pages/schema-reg/schema-reg-api.adoc @@ -7,7 +7,7 @@ Schemas provide human-readable documentation for an API. They verify that data c [NOTE] ==== -The Schema Registry is built into Redpanda, and you can use it with the API or {ui}. This section describes operations available in the xref:api:ROOT:pandaproxy-schema-registry.adoc[Schema Registry API]. +The Schema Registry is built into Redpanda, and you can use it with the API or the UI. This section describes operations available in the xref:api:ROOT:pandaproxy-schema-registry.adoc[Schema Registry API]. ==== The Redpanda Schema Registry has API endpoints that allow you to perform the following tasks: From d9f6e106e0f01ad7d9a6bb5f3bfc45eb2754f0a3 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 17:22:01 -0600 Subject: [PATCH 31/43] update to latest description --- modules/console/pages/ui/programmable-push-filters.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/console/pages/ui/programmable-push-filters.adoc b/modules/console/pages/ui/programmable-push-filters.adoc index cf1618a08c..00381edf3e 100644 --- a/modules/console/pages/ui/programmable-push-filters.adoc +++ b/modules/console/pages/ui/programmable-push-filters.adoc @@ -1,7 +1,7 @@ = Filter Messages with JavaScript in {ui} :page-aliases: console:features/programmable-push-filters.adoc, reference:console/programmable-push-filters.adoc // Do not put page aliases in the single-sourced content -:description: Learn how to filter Kafka records in {ui} based on your provided JavaScript code. +:description: Learn how to filter Kafka records using custom JavaScript code within {ui}. // tag::single-source[] You can use push-down filters in {ui} to search through large Kafka topics that may contain millions of records. Filters are JavaScript functions executed on the backend, evaluating each record individually. Your function must return a boolean: From beac26f513c7306edd444f900f73ae9ff6ea3db6 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 18:18:01 -0600 Subject: [PATCH 32/43] contact RP to configure auditable events --- modules/manage/partials/audit-logging.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index da137ce5a4..ea659e24af 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -35,6 +35,8 @@ Redpanda's audit logging mechanism supports several options to control the volum * `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. +NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. + To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. endif::[] From c4645d3963fa3cba3432f429eb186a853c375314 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 18:19:22 -0600 Subject: [PATCH 33/43] minor edit --- modules/manage/partials/audit-logging.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index ea659e24af..d3d69ebc5e 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -35,9 +35,10 @@ Redpanda's audit logging mechanism supports several options to control the volum * `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. +To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. + NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. -To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. endif::[] ifndef::env-cloud[] From f5610995f627690fd9eb07558e2e403122830967 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 21:33:44 -0600 Subject: [PATCH 34/43] fix conditionals in audit-logging.adoc --- modules/manage/partials/audit-logging.adoc | 64 +++++++++++++--------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index d3d69ebc5e..5546cad9ac 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -1,3 +1,5 @@ +// tag::single-source[] + ifndef::env-cloud[] [NOTE] ==== @@ -11,9 +13,10 @@ Redpanda's auditing capabilities support recording both administrative and opera With audit logging enabled, there should be no noticeable changes in performance other than slightly elevated CPU usage. +ifndef::env-cloud[] NOTE: Audit logging is configured at the cluster level. Redpanda supports excluding specific topics or principals from auditing to help reduce noise in the log. Audit logging is disabled by default. - +endif::[] == Audit log flow @@ -27,30 +30,42 @@ Messages recorded to the audit log topic comply with the https://schema.ocsf.io/ == Audit log configuration options -ifdef::env-cloud[] +Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. Configuration is applied at the cluster level. -Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. +ifdef::env-cloud[] * `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. -* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. +Use the `rpk cluster config` to configure audit logs. Some options will require a cluster restart. You can verify this using `rpk cluster config status`. + +Some key tuning recommendations for your audit logging settings include: + +* Choose the type of events needed by setting `audit_enabled_event_types` to the desired list of event categories. Keep this as restrictive as possible based on your compliance and security needs to avoid excessive noise in your audit logs. +* Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names in the form of `name` or `User:name`. +* Set `audit_enabled` to `true`. + +The sequence of commands in `rpk` for this audit log configuration is: + + rpk cluster config set audit_enabled_event_types '["management","describe","authenticate"]' + rpk cluster config set audit_excluded_principals '["User:principal1", "principal2"]' + rpk cluster config set audit_enabled true + endif::[] -ifndef::env-cloud[] -Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. Configuration is applied at the cluster level using the standard xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster configuration mechanism]. ifdef::env-kubernetes[You can configure these options directly in either the Helm values or the Redpanda resource.] -ifdef::env-kubernetes[] +ifndef::env-cloud[] * `auditLogging.enabled`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`] cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.partitions`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`] cluster property to define the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. * `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. -* `auditLogging.enabledEventTypes`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`] cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following - `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.enabledEventTypes`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`] cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. * `auditLogging.excludedTopics`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`] cluster property. This option is a list of JSON strings identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. * `auditLogging.excludedPrincipals`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`] cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. * `auditLogging.clientMaxBufferSize`: Sets the value of the xref:reference:cluster-properties.adoc#audit_client_max_buffer_size[`audit_client_max_buffer_size`] cluster property to define the number of bytes allocated by the internal audit client for audit messages. When changing this, you must disable audit logging and then re-enable it for the change to take effect. Consider increasing this if your system generates a very large number of audit records in a short amount of time. Default: `16777216`. @@ -60,15 +75,16 @@ ifdef::env-kubernetes[] Even though audited event messages are stored to a specialized immutable topic, standard topic settings still apply. For example, you can apply the same Tiered Storage, retention time, and replication settings available to normal topics. These particular options are important for controlling the amount of disk space utilized by your audit topics. IMPORTANT: You cannot change the values of `auditLogging.partitions` and `auditLogging.replicationFactor` after enabling audit logging because these settings impact the creation of the `_redpanda.audit_log` topic. The Kafka API allows you to add partitions or alter the replication factor after enabling audit logging, but Redpanda prevents you from altering these two configuration values directly. + endif::[] -ifndef::env-kubernetes[] +ifndef::env-cloud,env-kubernetes[] * xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`]: Integer value defining the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. * xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`]: Optional Integer value defining the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. * xref:reference:cluster-properties.adoc#audit_client_max_buffer_size[`audit_client_max_buffer_size`]: Integer value defining the number of bytes allocated by the internal audit client for audit messages. When changing this, you must disable audit logging and then re-enable it for the change to take effect. Consider increasing this if your system generates a very large number of audit records in a short amount of time. Default: `16777216`. * xref:reference:cluster-properties.adoc#audit_queue_max_buffer_size_per_shard[`audit_queue_max_buffer_size_per_shard`]: Integer value defining the maximum amount of memory in bytes used by the audit buffer in each shard. Once this size is reached, requests to log additional audit messages will return a non-retryable error. You must restart the cluster when changing this value. Default: `1048576`. -* xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following - `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. +* xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. * xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`]: List of strings in JSON style identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. * xref:reference:cluster-properties.adoc#audit_queue_drain_interval_ms[`audit_queue_drain_interval_ms`]: Internally, Redpanda batches audit log messages in memory and periodically writes them to the audit log topic. This defines the period in milliseconds between draining this queue to the audit log topic. Longer intervals may help prevent duplicate messages, especially in high throughput scenarios, but they also increase the risk of data loss during hard shutdowns where the queue is lost. Default: `500`. * xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`]: List of strings in JSON style identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. @@ -76,6 +92,7 @@ ifndef::env-kubernetes[] Even though audited event messages are stored to a specialized immutable topic, standard topic settings still apply. For example, you can apply the same Tiered Storage, retention time, and replication settings available to normal topics. These particular options are important for controlling the amount of disk space utilized by your audit topics. IMPORTANT: You must configure certain audit logging properties before enabling audit logging because these settings impact the creation of the `_redpanda.audit_log` topic itself. These properties include: `audit_log_num_partitions` and `audit_log_replication_factor`. The Kafka API allows you to add partitions or alter the replication factor after enabling audit logging, but Redpanda prevents you from altering these two configuration values directly. + endif::[] == Audit logging event types @@ -148,11 +165,12 @@ All audit log settings are applied at the cluster level. ifdef::env-cloud[] See xref:manage:cluster-maintenance/config-cluster.adoc[] + endif::[] +ifdef::env-kubernetes[] You can configure audit log settings in the Redpanda Helm chart, using Helm values or the Redpanda resource with the Redpanda Operator. -ifdef::env-kubernetes[] [tabs] ====== Operator:: @@ -311,9 +329,6 @@ For details, see xref:manage:kubernetes/security/authentication/k-authentication - `auditLogging.enabled`: Enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If the topic is not found, Redpanda automatically creates one for you. Default: `false`. -endif::[] - -ifndef::env-kubernetes[] Use the `rpk cluster config` to configure audit logs. Some options will require a cluster restart. You can verify this using `rpk cluster config status`. Some key tuning recommendations for your audit logging settings include: @@ -335,17 +350,9 @@ The sequence of commands in `rpk` for this audit log configuration is: rpk cluster config set audit_enabled true rpk topic alter-config _redpanda.audit_log --set retention.ms=259200000 -The sequence of commands in `rpk` for this audit log configuration is: - - rpk cluster config set audit_log_num_partitions 6 - rpk cluster config set audit_log_replication_factor 5 - rpk cluster config set audit_enabled_event_types '["management","describe","authenticate"]' - rpk cluster config set audit_excluded_topics '["topic1","topic2"]' - rpk cluster config set audit_excluded_principals '["User:principal1", "principal2"]' - rpk cluster config set audit_enabled true - rpk topic alter-config _redpanda.audit_log --set retention.ms=259200000 endif::[] +ifndef::env-cloud[] == Optimize costs for audit logging When enabled, audit logging can quickly generate a very large amount of data, especially if all event types are selected. Proper configuration of audit logging is critical to avoid filling your disk or using excess Tiered Storage. The configuration options available help ensure your audit logs contain only the volume of data necessary to meeting your regulatory or legal requirements. @@ -354,22 +361,25 @@ With audit logging, the pattern of message generation may be very different from A typical scenario with audit logging is to route the messages to an analytics platform like Splunk. If your retention period is too long, you will find that you are storing excessive amounts of replicated messages in both Redpanda and in your analytics suite. Identifying the right balance of retention and replication settings minimizes this duplication while retaining your data in a system that provides actionable intelligence. -ifndef::env-cloud[] Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic or by setting xref:reference:cluster-properties.adoc#delete_retention_ms[`delete_retention_ms`] at the cluster level. == Next steps -xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages]. +xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages] include::shared:partial$suggested-reading.adoc[] - xref:reference:topic-properties.adoc[] - xref:develop:config-topics.adoc[] + endif::[] ifdef::env-cloud[] Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting `retention.ms` for the `_redpanda.audit_log` topic. +endif::[] + == Next steps -xref:manage:cluster-maintenance/audit-log-samples.adoc[See samples of audit log messages]. -endif::[] \ No newline at end of file +xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages] + +// end::single-source[] \ No newline at end of file From 4f190b33edf2a7e55d9f0532bc06c45e50604839 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 22:01:42 -0600 Subject: [PATCH 35/43] conditionalize rpk cluster config in auditing --- modules/manage/partials/audit-logging.adoc | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 5546cad9ac..4d2509e8bc 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -35,27 +35,19 @@ Redpanda's audit logging mechanism supports several options to control the volum ifdef::env-cloud[] * `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. -* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. +* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the event types to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. * `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. -Use the `rpk cluster config` to configure audit logs. Some options will require a cluster restart. You can verify this using `rpk cluster config status`. - Some key tuning recommendations for your audit logging settings include: * Choose the type of events needed by setting `audit_enabled_event_types` to the desired list of event categories. Keep this as restrictive as possible based on your compliance and security needs to avoid excessive noise in your audit logs. * Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names in the form of `name` or `User:name`. * Set `audit_enabled` to `true`. -The sequence of commands in `rpk` for this audit log configuration is: - - rpk cluster config set audit_enabled_event_types '["management","describe","authenticate"]' - rpk cluster config set audit_excluded_principals '["User:principal1", "principal2"]' - rpk cluster config set audit_enabled true - endif::[] @@ -163,11 +155,6 @@ a|* All Admin API calls All audit log settings are applied at the cluster level. -ifdef::env-cloud[] -See xref:manage:cluster-maintenance/config-cluster.adoc[] - -endif::[] - ifdef::env-kubernetes[] You can configure audit log settings in the Redpanda Helm chart, using Helm values or the Redpanda resource with the Redpanda Operator. From 2d2a18a703df462fab2318f95e60da3cdc12db7a Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 22:34:09 -0600 Subject: [PATCH 36/43] conditionalize audit-loggin --- modules/manage/partials/audit-logging.adoc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 4d2509e8bc..6bf05e16af 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -1,10 +1,12 @@ // tag::single-source[] ifndef::env-cloud[] + [NOTE] ==== include::shared:partial$enterprise-license.adoc[] ==== + endif::[] Many scenarios for streaming data include the need for fine-grained auditing of user activity related to the system. This is especially true for regulated industries such as finance, healthcare, and the public sector. Complying with https://pcidssguide.com/whats-new-in-pci-dss-v4-0/[PCI DSS v4] standards, for example, requires verbose and detailed activity auditing, alerting, and analysis capabilities. @@ -42,12 +44,6 @@ To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.a NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. -Some key tuning recommendations for your audit logging settings include: - -* Choose the type of events needed by setting `audit_enabled_event_types` to the desired list of event categories. Keep this as restrictive as possible based on your compliance and security needs to avoid excessive noise in your audit logs. -* Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names in the form of `name` or `User:name`. -* Set `audit_enabled` to `true`. - endif::[] @@ -87,6 +83,7 @@ IMPORTANT: You must configure certain audit logging properties before enabling a endif::[] +ifndef::cloud-env == Audit logging event types Redpanda's auditable events fall into one of eight different event types. The APIs associated with each event type are as follows. @@ -149,7 +146,7 @@ a|* All Schema Registry API calls |admin a|* All Admin API calls |=== - +endif::[] == Enable audit logging @@ -320,7 +317,7 @@ Use the `rpk cluster config` to configure audit logs. Some options will require Some key tuning recommendations for your audit logging settings include: -* If you wish to change the number of partitions or the replication factor for your audit log topic, set the `audit_log_num_partitions` and `audit_log_replication_factor` properties respectively. +* To change the number of partitions or the replication factor for your audit log topic, set the `audit_log_num_partitions` and `audit_log_replication_factor` properties respectively. * Choose the type of events needed by setting `audit_enabled_event_types` to the desired list of event categories. Keep this as restrictive as possible based on your compliance and security needs to avoid excessive noise in your audit logs. * Identify non-sensitive topics so that you can exclude them from auditing. Specify this list of topics in `audit_excluded_topics`. * Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names in the form of `name` or `User:name`. From 65517c228974f1fbd2fe6acb3555d3ab9f196c9f Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Mon, 7 Apr 2025 22:40:29 -0600 Subject: [PATCH 37/43] fix condition --- modules/manage/partials/audit-logging.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 6bf05e16af..58a039b627 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -83,7 +83,7 @@ IMPORTANT: You must configure certain audit logging properties before enabling a endif::[] -ifndef::cloud-env +ifndef::env-cloud[] == Audit logging event types Redpanda's auditable events fall into one of eight different event types. The APIs associated with each event type are as follows. @@ -146,6 +146,7 @@ a|* All Schema Registry API calls |admin a|* All Admin API calls |=== + endif::[] == Enable audit logging From 240d4034530f832b0ce695cef1eb3c5f96ca70d6 Mon Sep 17 00:00:00 2001 From: JakeSCahill Date: Tue, 8 Apr 2025 09:03:13 +0100 Subject: [PATCH 38/43] Fix single-sourcing --- .../pages/data-transforms/configure.adoc | 2 +- modules/manage/partials/audit-logging.adoc | 37 +++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/modules/develop/pages/data-transforms/configure.adoc b/modules/develop/pages/data-transforms/configure.adoc index 3b5b8ffa54..984f193412 100644 --- a/modules/develop/pages/data-transforms/configure.adoc +++ b/modules/develop/pages/data-transforms/configure.adoc @@ -74,7 +74,7 @@ Set the following based on the number of functions you have and the amount of me - xref:reference:properties/cluster-properties.adoc#data_transforms_per_function_memory_limit[`data_transforms_per_function_memory_limit`]: Adjust this setting if individual transform functions require more memory to process records efficiently. Reducing it may cause memory errors in complex transforms. The maximum number of functions that can be deployed to a cluster is equal to `data_transforms_per_core_memory_reservation` / `data_transforms_per_function_memory_limit`. When that limit is hit, Redpanda cannot allocate memory for the VM and the transforms stay in `errored` states. -endif::[] +endif::[] ifndef::env-cloud[] [[binary-size]] diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 58a039b627..68bd009e44 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -1,5 +1,7 @@ // tag::single-source[] +:env-cloud: true + ifndef::env-cloud[] [NOTE] @@ -34,22 +36,18 @@ Messages recorded to the audit log topic comply with the https://schema.ocsf.io/ Redpanda's audit logging mechanism supports several options to control the volume and availability of audit records. Configuration is applied at the cluster level. -ifdef::env-cloud[] - -* `auditLogging.enabled`: Sets the value of the `audit_enabled` cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. -* `auditLogging.enabledEventTypes`: Sets the value of the `audit_enabled_event_types` cluster property. This option is a list of JSON strings identifying the event types to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. -* `auditLogging.excludedPrincipals`: Sets the value of the `audit_excluded_principals` cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. - +ifdef::env-cloud[] To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. -NOTE: In Redpanda Cloud, audit logging in enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. +* xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `true`. +* xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. +* xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`]: List of strings in JSON style identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. endif::[] +ifdef::env-kubernetes[] +You can configure these options directly in either the Helm values or the Redpanda resource. -ifdef::env-kubernetes[You can configure these options directly in either the Helm values or the Redpanda resource.] - -ifndef::env-cloud[] * `auditLogging.enabled`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`] cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.partitions`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`] cluster property to define the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. * `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. @@ -151,7 +149,13 @@ endif::[] == Enable audit logging +ifdef::env-cloud[] +Audit logging is enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. +endif::[] + +ifndef::env-cloud[] All audit log settings are applied at the cluster level. +endif::[] ifdef::env-kubernetes[] You can configure audit log settings in the Redpanda Helm chart, using Helm values or the Redpanda resource with the Redpanda Operator. @@ -337,6 +341,14 @@ The sequence of commands in `rpk` for this audit log configuration is: endif::[] +ifdef::env-cloud[] + +== Configure retention for audit logs + +Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic. + +endif::[] + ifndef::env-cloud[] == Optimize costs for audit logging @@ -359,11 +371,6 @@ include::shared:partial$suggested-reading.adoc[] endif::[] -ifdef::env-cloud[] -Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting `retention.ms` for the `_redpanda.audit_log` topic. - -endif::[] - == Next steps xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages] From 572dab6faacaaea71762b431c68e435c2a5b8d06 Mon Sep 17 00:00:00 2001 From: Jake Cahill <45230295+JakeSCahill@users.noreply.github.com> Date: Tue, 8 Apr 2025 09:22:53 +0100 Subject: [PATCH 39/43] Update audit-logging.adoc --- modules/manage/partials/audit-logging.adoc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 68bd009e44..7da561205e 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -1,7 +1,3 @@ -// tag::single-source[] - -:env-cloud: true - ifndef::env-cloud[] [NOTE] @@ -370,8 +366,3 @@ include::shared:partial$suggested-reading.adoc[] - xref:develop:config-topics.adoc[] endif::[] - -== Next steps -xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages] - -// end::single-source[] \ No newline at end of file From 7d24f98905bc108487419115e47e833cc95d3a81 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 8 Apr 2025 08:05:45 -0600 Subject: [PATCH 40/43] fix links for Cloud properties --- modules/manage/partials/audit-logging.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 7da561205e..c30627dc53 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -35,9 +35,9 @@ Redpanda's audit logging mechanism supports several options to control the volum ifdef::env-cloud[] To configure audit logging, see xref:manage:cluster-maintenance/config-cluster.adoc[]. -* xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `true`. -* xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. -* xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`]: List of strings in JSON style identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. +* xref:reference:properties/cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `true`. +* xref:reference:properties/cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. +* xref:reference:properties/cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`]: List of strings in JSON style identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. endif::[] @@ -341,7 +341,7 @@ ifdef::env-cloud[] == Configure retention for audit logs -Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic. +Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting the `retention.ms` property for the `_redpanda.audit_log` topic. endif::[] From f7609a971140d781778395307aea549e5f9e9af9 Mon Sep 17 00:00:00 2001 From: Jake Cahill <45230295+JakeSCahill@users.noreply.github.com> Date: Tue, 8 Apr 2025 15:13:42 +0100 Subject: [PATCH 41/43] Update audit-logging.adoc --- modules/manage/partials/audit-logging.adoc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index c30627dc53..71ccd6e5c4 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -314,14 +314,16 @@ For details, see xref:manage:kubernetes/security/authentication/k-authentication - `auditLogging.enabled`: Enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If the topic is not found, Redpanda automatically creates one for you. Default: `false`. +endif::[] +ifndef::env-cloud,env-kubernetes[] Use the `rpk cluster config` to configure audit logs. Some options will require a cluster restart. You can verify this using `rpk cluster config status`. Some key tuning recommendations for your audit logging settings include: -* To change the number of partitions or the replication factor for your audit log topic, set the `audit_log_num_partitions` and `audit_log_replication_factor` properties respectively. +* To change the number of partitions or the replication factor for your audit log topic, set the `audit_log_num_partitions` and `audit_log_replication_factor` properties, respectively. * Choose the type of events needed by setting `audit_enabled_event_types` to the desired list of event categories. Keep this as restrictive as possible based on your compliance and security needs to avoid excessive noise in your audit logs. * Identify non-sensitive topics so that you can exclude them from auditing. Specify this list of topics in `audit_excluded_topics`. -* Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names in the form of `name` or `User:name`. +* Identify non-sensitive principals so that you can exclude them from auditing. Specify this list of principals in `audit_excluded_principals`. This command accepts names as `name` or `User:name`. * Set `audit_enabled` to `true`. * <>. From d19fe4d0f8c0de445f09a09c8cf0664d21be0c1d Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 8 Apr 2025 08:52:25 -0600 Subject: [PATCH 42/43] style edit, fix typos --- modules/manage/partials/audit-logging.adoc | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 71ccd6e5c4..63b5f607de 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -20,7 +20,7 @@ endif::[] == Audit log flow -The Redpanda audit log mechanism functions similar to the Kafka flow you may be familiar with. When a user interacts with another user or with a topics, Redpanda writes an event to a specialized audit topic. The audit topic is immutable. Only Redpanda can write to it. Users are prevented from writing to the audit topic directly and the Kafka API cannot create or delete it. +The Redpanda audit log mechanism functions similar to the Kafka flow. When a user interacts with another user or with a topic, Redpanda writes an event to a specialized audit topic. The audit topic is immutable. Only Redpanda can write to it. Users are prevented from writing to the audit topic directly and the Kafka API cannot create or delete it. image:shared:audit-logging-flow.png[Audit log flow] @@ -46,13 +46,13 @@ You can configure these options directly in either the Helm values or the Redpan * `auditLogging.enabled`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`] cluster property to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * `auditLogging.partitions`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`] cluster property to define the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. -* `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. +* `auditLogging.replicationFactor`: Sets the value of the xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`] cluster property to define the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda uses the `internal_topic_replication_factor` cluster property value. Default: `null`. * `auditLogging.enabledEventTypes`: Sets the value of the xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`] cluster property. This option is a list of JSON strings identifying the <> to include in the audit log. Valid values include any of the following: `management`, `produce`, `consume`, `describe`, `heartbeat`, `authenticate`, `schema_registry`, `admin`. Default: `'["management","authenticate","admin"]'`. -* `auditLogging.excludedTopics`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`] cluster property. This option is a list of JSON strings identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. +* `auditLogging.excludedTopics`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`] cluster property. This option is a list of JSON strings identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda rejects the command if you do attempt to include that topic. Default: `null`. * `auditLogging.excludedPrincipals`: Sets the value of the xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`] cluster property. This option is a list of JSON strings identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. * `auditLogging.clientMaxBufferSize`: Sets the value of the xref:reference:cluster-properties.adoc#audit_client_max_buffer_size[`audit_client_max_buffer_size`] cluster property to define the number of bytes allocated by the internal audit client for audit messages. When changing this, you must disable audit logging and then re-enable it for the change to take effect. Consider increasing this if your system generates a very large number of audit records in a short amount of time. Default: `16777216`. * `auditLogging.queueDrainIntervalMs`: Sets the value of the xref:reference:cluster-properties.adoc#audit_queue_drain_interval_ms[`audit_queue_drain_interval_ms`] cluster property. Internally, Redpanda batches audit log messages in memory and periodically writes them to the audit log topic. This option defines the period in milliseconds between draining this queue to the audit log topic. Longer intervals may help prevent duplicate messages, especially in high throughput scenarios, but they also increase the risk of data loss during hard shutdowns where the queue is lost. Default: `500`. -* `auditLogging.queueMaxBufferSizePerShard`: Sets the value of the xref:reference:cluster-properties.adoc#audit_queue_max_buffer_size_per_shard[`audit_queue_max_buffer_size_per_shard`] cluster property to define the maximum amount of memory in bytes used by the audit buffer in each shard. Once this size is reached, requests to log additional audit messages will return a non-retryable error. Default: `1048576`. +* `auditLogging.queueMaxBufferSizePerShard`: Sets the value of the xref:reference:cluster-properties.adoc#audit_queue_max_buffer_size_per_shard[`audit_queue_max_buffer_size_per_shard`] cluster property to define the maximum amount of memory in bytes used by the audit buffer in each shard. When this size is reached, requests to log additional audit messages return a non-retryable error. Default: `1048576`. Even though audited event messages are stored to a specialized immutable topic, standard topic settings still apply. For example, you can apply the same Tiered Storage, retention time, and replication settings available to normal topics. These particular options are important for controlling the amount of disk space utilized by your audit topics. @@ -63,11 +63,11 @@ endif::[] ifndef::env-cloud,env-kubernetes[] * xref:reference:cluster-properties.adoc#audit_enabled[`audit_enabled`]: Boolean value to enable audit logging. When you set this to `true`, Redpanda checks for an existing topic named `_redpanda.audit_log`. If none is found, Redpanda automatically creates one for you. Default: `false`. * xref:reference:cluster-properties.adoc#audit_log_num_partitions[`audit_log_num_partitions`]: Integer value defining the number of partitions used by a newly created audit topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for an existing audit log topic. Default: `12`. -* xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`]: Optional Integer value defining the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda will use the `internal_topic_replication_factor` cluster config value. Default: `null`. +* xref:reference:cluster-properties.adoc#audit_log_replication_factor[`audit_log_replication_factor`]: Optional Integer value defining the replication factor for a newly created audit log topic. This configuration applies only to the audit log topic and may be different from the cluster or other topic configurations. This cannot be altered for existing audit log topics. If a value is not provided, Redpanda uses the `internal_topic_replication_factor` cluster property value. Default: `null`. * xref:reference:cluster-properties.adoc#audit_client_max_buffer_size[`audit_client_max_buffer_size`]: Integer value defining the number of bytes allocated by the internal audit client for audit messages. When changing this, you must disable audit logging and then re-enable it for the change to take effect. Consider increasing this if your system generates a very large number of audit records in a short amount of time. Default: `16777216`. -* xref:reference:cluster-properties.adoc#audit_queue_max_buffer_size_per_shard[`audit_queue_max_buffer_size_per_shard`]: Integer value defining the maximum amount of memory in bytes used by the audit buffer in each shard. Once this size is reached, requests to log additional audit messages will return a non-retryable error. You must restart the cluster when changing this value. Default: `1048576`. +* xref:reference:cluster-properties.adoc#audit_queue_max_buffer_size_per_shard[`audit_queue_max_buffer_size_per_shard`]: Integer value defining the maximum amount of memory in bytes used by the audit buffer in each shard. When this size is reached, requests to log additional audit messages return a non-retryable error. You must restart the cluster when changing this value. Default: `1048576`. * xref:reference:cluster-properties.adoc#audit_enabled_event_types[`audit_enabled_event_types`]: List of strings in JSON style identifying the event types to include in the audit log. This may include any of the following: `management, produce, consume, describe, heartbeat, authenticate, schema_registry, admin`. Default: `'["management","authenticate","admin"]'`. -* xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`]: List of strings in JSON style identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda will reject the command if you do attempt to include that topic. Default: `null`. +* xref:reference:cluster-properties.adoc#audit_excluded_topics[`audit_excluded_topics`]: List of strings in JSON style identifying the topics the audit logging system should ignore. This list cannot include the `_redpanda.audit_log` topic. Redpanda rejects the command if you do attempt to include that topic. Default: `null`. * xref:reference:cluster-properties.adoc#audit_queue_drain_interval_ms[`audit_queue_drain_interval_ms`]: Internally, Redpanda batches audit log messages in memory and periodically writes them to the audit log topic. This defines the period in milliseconds between draining this queue to the audit log topic. Longer intervals may help prevent duplicate messages, especially in high throughput scenarios, but they also increase the risk of data loss during hard shutdowns where the queue is lost. Default: `500`. * xref:reference:cluster-properties.adoc#audit_excluded_principals[`audit_excluded_principals`]: List of strings in JSON style identifying the principals the audit logging system should ignore. Principals can be listed as `User:name` or `name`, both are accepted. Default: `null`. @@ -240,7 +240,7 @@ spec: enabled: true ---- -If you don't want to use the Topic resource, you can enable audit logging and Redpanda will create the audit topic for you: +If you don't want to use the Topic resource, you can enable audit logging and Redpanda creates the audit topic for you: .`redpanda-cluster.yaml` [,yaml,lines=9-22] @@ -316,7 +316,7 @@ For details, see xref:manage:kubernetes/security/authentication/k-authentication endif::[] ifndef::env-cloud,env-kubernetes[] -Use the `rpk cluster config` to configure audit logs. Some options will require a cluster restart. You can verify this using `rpk cluster config status`. +Use `rpk cluster config` to configure audit logs. Some options require a cluster restart. You can verify this using `rpk cluster config status`. Some key tuning recommendations for your audit logging settings include: @@ -343,20 +343,20 @@ ifdef::env-cloud[] == Configure retention for audit logs -Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting the `retention.ms` property for the `_redpanda.audit_log` topic. +Assess the retention needs for your audit logs. You may not need to keep the logs for the default seven days. This is controlled by setting the `retention.ms` property for the `_redpanda.audit_log` topic. endif::[] ifndef::env-cloud[] == Optimize costs for audit logging -When enabled, audit logging can quickly generate a very large amount of data, especially if all event types are selected. Proper configuration of audit logging is critical to avoid filling your disk or using excess Tiered Storage. The configuration options available help ensure your audit logs contain only the volume of data necessary to meeting your regulatory or legal requirements. +When enabled, audit logging can quickly generate a very large amount of data, especially if all event types are selected. Proper configuration of audit logging is critical to avoid filling your disk or using excess Tiered Storage. The configuration options available help ensure your audit logs contain only the volume of data necessary to meet your regulatory or legal requirements. With audit logging, the pattern of message generation may be very different from your typical sources of data. These messages reflect usage of your system as opposed to the operational data your topics typically process. As a result, your retention, replication, and Tiered Storage requirements may differ from your other topics. -A typical scenario with audit logging is to route the messages to an analytics platform like Splunk. If your retention period is too long, you will find that you are storing excessive amounts of replicated messages in both Redpanda and in your analytics suite. Identifying the right balance of retention and replication settings minimizes this duplication while retaining your data in a system that provides actionable intelligence. +A typical scenario with audit logging is to route the messages to an analytics platform like Splunk. If your retention period is too long, you may find that you are storing excessive amounts of replicated messages in both Redpanda and in your analytics suite. Identifying the right balance of retention and replication settings minimizes this duplication while retaining your data in a system that provides actionable intelligence. -Assess the retention needs for your audit logs. You may not need to keep the logs around for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic or by setting xref:reference:cluster-properties.adoc#delete_retention_ms[`delete_retention_ms`] at the cluster level. +Assess the retention needs for your audit logs. You may not need to keep the logs for the default seven days. This is controlled by setting xref:reference:topic-properties.adoc#retentionms[`retention.ms`] for the `_redpanda.audit_log` topic or by setting xref:reference:cluster-properties.adoc#delete_retention_ms[`delete_retention_ms`] at the cluster level. == Next steps From 0f373ecdf827f31e55819912331117d7726030a2 Mon Sep 17 00:00:00 2001 From: Michele Cyran Date: Tue, 8 Apr 2025 14:11:17 -0600 Subject: [PATCH 43/43] fix Next steps + local-antora-playbook --- local-antora-playbook.yml | 2 +- modules/manage/partials/audit-logging.adoc | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index 858e2d9bef..0f53a55d6e 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -17,7 +17,7 @@ content: - url: https://github.com/redpanda-data/docs branches: [v/*, api, shared, site-search,'!v-end-of-life/*'] - url: https://github.com/redpanda-data/cloud-docs - branches: 'DOC-666-Document-feature-Manage-form-factor-appropriate-cluster-configuration-properties-in-Console' + branches: 'main' - url: https://github.com/redpanda-data/redpanda-labs branches: main start_paths: [docs,'*/docs'] diff --git a/modules/manage/partials/audit-logging.adoc b/modules/manage/partials/audit-logging.adoc index 63b5f607de..40338fbaba 100644 --- a/modules/manage/partials/audit-logging.adoc +++ b/modules/manage/partials/audit-logging.adoc @@ -147,6 +147,7 @@ endif::[] ifdef::env-cloud[] Audit logging is enabled by default. Cluster administrators can configure the audited topics and principals. However, only the Redpanda team can configure the type of audited events. For more information or support, contact your Redpanda account team. + endif::[] ifndef::env-cloud[] @@ -345,6 +346,10 @@ ifdef::env-cloud[] Assess the retention needs for your audit logs. You may not need to keep the logs for the default seven days. This is controlled by setting the `retention.ms` property for the `_redpanda.audit_log` topic. +== Next steps + +xref:manage:audit-logging/audit-log-samples.adoc[See samples of audit log messages] + endif::[] ifndef::env-cloud[]