From 8ac115a0b7c882418c1cab06f494f11a20318e59 Mon Sep 17 00:00:00 2001 From: aishwarya24 Date: Fri, 12 Sep 2025 14:56:53 -0400 Subject: [PATCH 01/15] Create v2.25 from current preview content --- docs/content/v2.25/_index.md | 80 + .../v2.25/additional-features/_index.md | 35 + .../change-data-capture/_index.md | 57 + .../using-logical-replication/_index.md | 126 + .../advanced-configuration.md | 43 + .../advanced-topic.md | 244 ++ .../best-practices.md | 30 + .../using-logical-replication/get-started.md | 461 +++ .../using-logical-replication/key-concepts.md | 136 + .../using-logical-replication/monitor.md | 119 + .../using-logical-replication/transformers.md | 190 ++ .../yugabytedb-connector-properties.md | 783 +++++ .../yugabytedb-connector.md | 1734 ++++++++++ .../_index.md | 92 + .../cdc-get-started.md | 597 ++++ .../cdc-monitor.md | 132 + .../debezium-connector-yugabytedb.md | 1293 ++++++++ .../yugabytedb-grpc-transformers.md | 220 ++ .../v2.25/additional-features/colocation.md | 537 +++ .../connection-manager-ysql/_index.md | 51 + .../ycm-best-practices.md | 63 + .../connection-manager-ysql/ycm-migrate.md | 74 + .../connection-manager-ysql/ycm-monitor.md | 117 + .../connection-manager-ysql/ycm-setup.md | 173 + .../ycm-troubleshoot.md | 64 + docs/content/v2.25/admin/_index.md | 67 + docs/content/v2.25/admin/yb-admin.md | 2919 +++++++++++++++++ docs/content/v2.25/admin/yb-ctl.md | 566 ++++ docs/content/v2.25/admin/yb-docker-ctl.md | 248 ++ docs/content/v2.25/admin/yb-ts-cli.md | 339 ++ docs/content/v2.25/admin/ysql-dump.md | 365 +++ docs/content/v2.25/admin/ysql-dumpall.md | 252 ++ docs/content/v2.25/api/_index.md | 80 + docs/content/v2.25/api/ycql/_index.md | 137 + docs/content/v2.25/api/ycql/batch.md | 168 + .../v2.25/api/ycql/ddl_alter_keyspace.md | 95 + docs/content/v2.25/api/ycql/ddl_alter_role.md | 72 + .../content/v2.25/api/ycql/ddl_alter_table.md | 156 + .../v2.25/api/ycql/ddl_create_index.md | 437 +++ .../v2.25/api/ycql/ddl_create_keyspace.md | 88 + .../content/v2.25/api/ycql/ddl_create_role.md | 87 + .../v2.25/api/ycql/ddl_create_table.md | 273 ++ .../content/v2.25/api/ycql/ddl_create_type.md | 87 + docs/content/v2.25/api/ycql/ddl_drop_index.md | 58 + .../v2.25/api/ycql/ddl_drop_keyspace.md | 59 + docs/content/v2.25/api/ycql/ddl_drop_role.md | 63 + docs/content/v2.25/api/ycql/ddl_drop_table.md | 58 + docs/content/v2.25/api/ycql/ddl_drop_type.md | 54 + .../v2.25/api/ycql/ddl_grant_permission.md | 119 + docs/content/v2.25/api/ycql/ddl_grant_role.md | 61 + .../v2.25/api/ycql/ddl_revoke_permission.md | 74 + .../content/v2.25/api/ycql/ddl_revoke_role.md | 59 + docs/content/v2.25/api/ycql/ddl_use.md | 89 + docs/content/v2.25/api/ycql/dml_delete.md | 294 ++ docs/content/v2.25/api/ycql/dml_insert.md | 275 ++ docs/content/v2.25/api/ycql/dml_select.md | 315 ++ .../content/v2.25/api/ycql/dml_transaction.md | 156 + docs/content/v2.25/api/ycql/dml_truncate.md | 96 + docs/content/v2.25/api/ycql/dml_update.md | 283 ++ docs/content/v2.25/api/ycql/explain.md | 217 ++ docs/content/v2.25/api/ycql/expr_fcall.md | 196 ++ docs/content/v2.25/api/ycql/expr_ocall.md | 51 + docs/content/v2.25/api/ycql/expr_simple.md | 39 + docs/content/v2.25/api/ycql/expr_subscript.md | 34 + .../v2.25/api/ycql/function_datetime.md | 432 +++ .../v2.25/api/ycql/grammar_diagrams.md | 436 +++ .../ycql/syntax_resources/ycql_grammar.ebnf | 148 + docs/content/v2.25/api/ycql/type_blob.md | 70 + docs/content/v2.25/api/ycql/type_bool.md | 66 + .../content/v2.25/api/ycql/type_collection.md | 253 ++ docs/content/v2.25/api/ycql/type_datetime.md | 251 ++ docs/content/v2.25/api/ycql/type_frozen.md | 68 + docs/content/v2.25/api/ycql/type_inet.md | 66 + docs/content/v2.25/api/ycql/type_int.md | 114 + docs/content/v2.25/api/ycql/type_jsonb.md | 248 ++ docs/content/v2.25/api/ycql/type_number.md | 81 + docs/content/v2.25/api/ycql/type_text.md | 70 + docs/content/v2.25/api/ycql/type_uuid.md | 80 + docs/content/v2.25/api/ycqlsh.md | 413 +++ docs/content/v2.25/api/ysql/_index.md | 81 + docs/content/v2.25/api/ysql/cursors.md | 530 +++ .../v2.25/api/ysql/datatypes/_index.md | 72 + .../api/ysql/datatypes/type_array/_index.md | 373 +++ .../datatypes/type_array/array-constructor.md | 191 ++ .../datatypes/type_array/array-of-domains.md | 656 ++++ .../type_array/functions-operators/_index.md | 117 + .../type_array/functions-operators/any-all.md | 402 +++ .../functions-operators/array-agg-unnest.md | 1033 ++++++ .../functions-operators/array-fill.md | 122 + .../functions-operators/array-position.md | 129 + .../functions-operators/array-remove.md | 45 + .../functions-operators/array-to-string.md | 77 + .../functions-operators/comparison.md | 454 +++ .../functions-operators/concatenation.md | 231 ++ .../functions-operators/properties.md | 417 +++ .../functions-operators/replace-a-value.md | 318 ++ .../functions-operators/slice-operator.md | 99 + .../functions-operators/string-to-array.md | 116 + .../datatypes/type_array/literals/_index.md | 97 + .../literals/array-of-primitive-values.md | 389 +++ .../type_array/literals/array-of-rows.md | 241 ++ .../ysql/datatypes/type_array/literals/row.md | 350 ++ .../literals/text-typecasting-and-literals.md | 269 ++ .../type_array/looping-through-arrays.md | 697 ++++ .../v2.25/api/ysql/datatypes/type_binary.md | 36 + .../v2.25/api/ysql/datatypes/type_bool.md | 25 + .../api/ysql/datatypes/type_character.md | 40 + .../ysql/datatypes/type_datetime/_index.md | 174 + .../type_datetime/conceptual-background.md | 376 +++ .../date-time-data-types-semantics/_index.md | 19 + .../type-date.md | 46 + .../type-interval/_index.md | 214 ++ .../type-interval/custom-interval-domains.md | 1343 ++++++++ .../type-interval/declaring-intervals.md | 427 +++ .../interval-arithmetic/_index.md | 248 ++ .../interval-interval-addition.md | 169 + .../interval-interval-comparison.md | 288 ++ .../interval-number-multiplication.md | 318 ++ ...nt-interval-overloads-of-plus-and-minus.md | 660 ++++ .../moment-moment-overloads-of-minus.md | 81 + .../type-interval/interval-limits.md | 312 ++ .../interval-representation/_index.md | 112 + .../ad-hoc-examples.md | 240 ++ .../internal-representation-model.md | 363 ++ .../type-interval/interval-utilities.md | 905 +++++ .../type-interval/justfy-and-extract-epoch.md | 523 +++ .../type-time.md | 85 + .../type-timestamp.md | 592 ++++ .../download-date-time-utilities.md | 29 + .../type_datetime/formatting-functions.md | 1193 +++++++ .../type_datetime/functions/_index.md | 157 + .../functions/creating-date-time-values.md | 429 +++ .../functions/current-date-time-moment.md | 313 ++ .../functions/delaying-execution.md | 46 + .../manipulating-date-time-values.md | 153 + .../functions/miscellaneous/_index.md | 599 ++++ .../functions/miscellaneous/age.md | 451 +++ .../functions/miscellaneous/extract.md | 363 ++ .../functions/miscellaneous/overlaps.md | 414 +++ .../type_datetime/operators/_index.md | 239 ++ .../test-date-time-addition-overloads.md | 213 ++ .../test-date-time-comparison-overloads.md | 173 + .../test-date-time-division-overloads.md | 148 + ...test-date-time-multiplication-overloads.md | 148 + .../test-date-time-subtraction-overloads.md | 215 ++ .../ysql/datatypes/type_datetime/stopwatch.md | 624 ++++ .../type_datetime/timezones/_index.md | 53 + .../type_datetime/timezones/catalog-views.md | 497 +++ .../extended-timezone-names/_index.md | 119 + .../canonical-no-country-no-dst.md | 139 + .../canonical-real-country-no-dst.md | 264 ++ .../canonical-real-country-with-dst.md | 184 ++ .../unrestricted-full-projection.md | 595 ++++ .../type_datetime/timezones/recommendation.md | 458 +++ .../syntax-contexts-to-spec-offset.md | 145 + .../timezone-sensitive-operations/_index.md | 53 + .../timestamptz-interval-day-arithmetic.md | 329 ++ .../timestamptz-plain-timestamp-conversion.md | 431 +++ .../timezones/ways-to-spec-offset/_index.md | 151 + .../name-res-rules/_index.md | 84 + .../name-res-rules/helper-functions.md | 185 ++ .../name-res-rules/rule-1.md | 95 + .../name-res-rules/rule-2.md | 131 + .../name-res-rules/rule-3.md | 55 + .../name-res-rules/rule-4.md | 320 ++ .../api/ysql/datatypes/type_datetime/toc.md | 120 + .../typecasting-between-date-time-and-text.md | 496 +++ .../typecasting-between-date-time-values.md | 981 ++++++ .../api/ysql/datatypes/type_json/_index.md | 38 + .../type_json/code-example-conventions.md | 56 + .../create-indexes-check-constraints.md | 161 + .../type_json/functions-operators/_index.md | 85 + .../functions-operators/array-to-json.md | 49 + .../concatenation-operator.md | 103 + .../containment-operators.md | 42 + .../functions-operators/equality-operator.md | 67 + .../functions-operators/jsonb-agg.md | 81 + .../jsonb-array-elements-text.md | 118 + .../jsonb-array-elements.md | 55 + .../functions-operators/jsonb-array-length.md | 47 + .../functions-operators/jsonb-build-array.md | 89 + .../functions-operators/jsonb-build-object.md | 108 + .../functions-operators/jsonb-each-text.md | 63 + .../functions-operators/jsonb-each.md | 58 + .../jsonb-extract-path-text.md | 23 + .../functions-operators/jsonb-extract-path.md | 52 + .../functions-operators/jsonb-object-agg.md | 93 + .../functions-operators/jsonb-object-keys.md | 52 + .../functions-operators/jsonb-object.md | 96 + .../jsonb-populate-record.md | 48 + .../jsonb-populate-recordset.md | 73 + .../functions-operators/jsonb-pretty.md | 59 + .../jsonb-set-jsonb-insert.md | 223 ++ .../functions-operators/jsonb-strip-nulls.md | 50 + .../functions-operators/jsonb-to-record.md | 90 + .../functions-operators/jsonb-to-recordset.md | 82 + .../functions-operators/jsonb-typeof.md | 45 + .../key-or-value-existence-operators.md | 249 ++ .../functions-operators/remove-operators.md | 128 + .../functions-operators/row-to-json.md | 47 + .../functions-operators/subvalue-operators.md | 250 ++ .../type_json/functions-operators/to-jsonb.md | 64 + .../functions-operators/typecast-operators.md | 139 + .../ysql/datatypes/type_json/json-literals.md | 77 + .../primitive-and-compound-data-types.md | 166 + .../v2.25/api/ysql/datatypes/type_money.md | 29 + .../v2.25/api/ysql/datatypes/type_numeric.md | 73 + .../v2.25/api/ysql/datatypes/type_range.md | 487 +++ .../v2.25/api/ysql/datatypes/type_serial.md | 29 + .../v2.25/api/ysql/datatypes/type_uuid.md | 31 + docs/content/v2.25/api/ysql/exprs/_index.md | 30 + .../ysql/exprs/aggregate_functions/_index.md | 109 + .../case-study-the-68-95-997-rule.md | 160 + .../covid-data-case-study/_index.md | 82 + .../analyze-the-covidcast-data/_index.md | 97 + .../analysis-scripts/_index.md | 19 + .../analysis-scripts/analysis-queries-sql.md | 106 + .../analysis-scripts/synthetic-data-sql.md | 98 + .../daily-regression-analysis.md | 147 + .../scatter-plot-for-2020-10-21.md | 151 + .../symptoms-vs-mask-wearing-by-state.md | 97 + .../download-the-covidcast-data.md | 41 + .../ingest-the-covidcast-data/_index.md | 27 + .../check-data-conforms-to-the-rules.md | 297 ++ .../ingest-scripts/_index.md | 26 + .../cr-assert-assumptions-ok-sql.md | 236 ++ .../cr-cr-copy-from-csv-scripts-sql.md | 50 + .../cr-cr-staging-tables-sql.md | 56 + .../cr-xform-to-joined-table-sql.md | 82 + .../ingest-scripts/ingest-the-data-sql.md | 74 + .../inspect-the-csv-files.md | 163 + .../join-the-staged-data.md | 110 + .../stage-the-csv-files.md | 137 + .../function-syntax-semantics/_index.md | 110 + .../array-string-jsonb-jsonb-object-agg.md | 284 ++ .../avg-count-max-min-sum.md | 288 ++ .../bit-and-or-bool-and-or.md | 170 + .../linear-regression/_index.md | 176 + .../linear-regression/covar-corr.md | 254 ++ .../linear-regression/regr.md | 276 ++ .../mode-percentile-disc-percentile-cont.md | 663 ++++ .../rank-dense-rank-percent-rank-cume-dist.md | 339 ++ .../variance-stddev.md | 229 ++ .../functionality-overview.md | 464 +++ .../grouping-sets-rollup-cube.md | 548 ++++ .../invocation-syntax-semantics.md | 400 +++ .../api/ysql/exprs/func_gen_random_uuid.md | 40 + .../v2.25/api/ysql/exprs/func_yb_hash_code.md | 226 ++ .../api/ysql/exprs/func_yb_index_check.md | 183 ++ .../_index.md | 24 + .../func_yb_is_local_table.md | 221 ++ .../func_yb_server_cloud.md | 36 + .../func_yb_server_region.md | 160 + .../func_yb_server_zone.md | 40 + .../ysql/exprs/sequence_functions/_index.md | 32 + .../exprs/sequence_functions/func_currval.md | 84 + .../exprs/sequence_functions/func_lastval.md | 82 + .../exprs/sequence_functions/func_nextval.md | 116 + .../exprs/sequence_functions/func_setval.md | 92 + .../api/ysql/exprs/window_functions/_index.md | 90 + .../analyzing-a-normal-distribution/_index.md | 303 ++ .../bucket-allocation.md | 88 + .../cr-bucket-dedicated-code.md | 78 + .../cr-bucket-using-width-bucket.md | 56 + .../cr-do-cume-dist.md | 42 + .../cr-do-ntile.md | 36 + .../cr-do-percent-rank.md | 42 + .../cr-dp-views.md | 25 + .../cr-histogram.md | 35 + .../cr-int-views.md | 25 + .../cr-pr-cd-equality-report.md | 64 + .../cr-show-t4.md | 62 + .../do-assert-bucket-ok.md | 82 + .../do-clean-start.md | 40 + .../do-compare-dp-results.md | 46 + .../do-demo.md | 114 + .../do-populate-results.md | 25 + .../do-report-results.md | 49 + .../reports/_index.md | 19 + .../reports/compare-dp-results.md | 37 + .../reports/dp-results.md | 85 + .../reports/histogram-report.md | 65 + .../reports/int-results.md | 85 + .../function-syntax-semantics/_index.md | 106 + .../data-sets/_index.md | 107 + .../data-sets/table-t1.md | 209 ++ .../data-sets/table-t2.md | 131 + .../data-sets/table-t3.md | 124 + .../data-sets/table-t4.md | 105 + .../first-value-nth-value-last-value.md | 164 + .../function-syntax-semantics/lag-lead.md | 300 ++ .../percent-rank-cume-dist-ntile.md | 202 ++ .../row-number-rank-dense-rank.md | 101 + .../functionality-overview.md | 406 +++ .../invocation-syntax-semantics.md | 294 ++ docs/content/v2.25/api/ysql/keywords.md | 448 +++ .../ysql/name-resolution-in-top-level-sql.md | 410 +++ .../v2.25/api/ysql/names-and-identifiers.md | 325 ++ docs/content/v2.25/api/ysql/pg15-features.md | 226 ++ docs/content/v2.25/api/ysql/reserved_names.md | 28 + .../v2.25/api/ysql/sql-feature-support.md | 154 + .../ysql/syntax_resources/grammar_diagrams.md | 8 + .../ysql/syntax_resources/ysql_grammar.ebnf | 1809 ++++++++++ .../v2.25/api/ysql/the-sql-language/_index.md | 56 + .../_index.md | 41 + ...g-temporary-schema-objects-of-all-kinds.md | 233 ++ ...data-and-privacy-of-use-of-temp-objects.md | 242 ++ ...paradigm-for-creating-temporary-objects.md | 229 ++ ...rary-tables-views-sequences-and-indexes.md | 92 + .../the-sql-language/statements/_index.md | 174 + .../statements/cmd_analyze.md | 168 + .../the-sql-language/statements/cmd_call.md | 216 ++ .../the-sql-language/statements/cmd_copy.md | 225 ++ .../the-sql-language/statements/cmd_do.md | 117 + .../the-sql-language/statements/cmd_reset.md | 36 + .../the-sql-language/statements/cmd_set.md | 134 + .../the-sql-language/statements/cmd_show.md | 42 + .../dcl_alter_default_privileges.md | 63 + .../statements/dcl_alter_group.md | 39 + .../statements/dcl_alter_policy.md | 59 + .../statements/dcl_alter_role.md | 83 + .../statements/dcl_alter_user.md | 37 + .../statements/dcl_create_group.md | 40 + .../statements/dcl_create_policy.md | 73 + .../statements/dcl_create_role.md | 85 + .../statements/dcl_create_user.md | 78 + .../statements/dcl_drop_group.md | 41 + .../statements/dcl_drop_owned.md | 50 + .../statements/dcl_drop_policy.md | 43 + .../statements/dcl_drop_role.md | 48 + .../statements/dcl_drop_user.md | 42 + .../the-sql-language/statements/dcl_grant.md | 146 + .../statements/dcl_reassign_owned.md | 43 + .../the-sql-language/statements/dcl_revoke.md | 63 + .../statements/dcl_set_role.md | 77 + .../dcl_set_session_authorization.md | 56 + .../statements/ddl_alter_db.md | 55 + .../statements/ddl_alter_domain.md | 61 + .../ddl_alter_foreign_data_wrapper.md | 68 + .../statements/ddl_alter_foreign_table.md | 64 + .../statements/ddl_alter_function.md | 156 + .../statements/ddl_alter_index.md | 91 + .../statements/ddl_alter_matview.md | 64 + .../statements/ddl_alter_procedure.md | 158 + .../statements/ddl_alter_publication.md | 91 + .../statements/ddl_alter_schema.md | 128 + .../statements/ddl_alter_sequence.md | 178 + .../statements/ddl_alter_server.md | 42 + .../statements/ddl_alter_table.md | 412 +++ .../statements/ddl_comment.md | 53 + .../statements/ddl_create_aggregate.md | 104 + .../statements/ddl_create_cast.md | 85 + .../statements/ddl_create_database.md | 90 + .../statements/ddl_create_domain.md | 75 + .../statements/ddl_create_extension.md | 51 + .../ddl_create_foreign_data_wrapper.md | 58 + .../statements/ddl_create_foreign_table.md | 53 + .../statements/ddl_create_function.md | 212 ++ .../statements/ddl_create_index.md | 418 +++ .../statements/ddl_create_matview.md | 76 + .../statements/ddl_create_operator.md | 52 + .../statements/ddl_create_operator_class.md | 49 + .../statements/ddl_create_procedure.md | 183 ++ .../statements/ddl_create_publication.md | 81 + .../statements/ddl_create_rule.md | 65 + .../statements/ddl_create_schema.md | 64 + .../statements/ddl_create_sequence.md | 188 ++ .../statements/ddl_create_server.md | 54 + .../statements/ddl_create_table.md | 361 ++ .../statements/ddl_create_table_as.md | 73 + .../statements/ddl_create_tablespace.md | 41 + .../statements/ddl_create_trigger.md | 132 + .../statements/ddl_create_type.md | 139 + .../statements/ddl_create_user_mapping.md | 46 + .../statements/ddl_create_view.md | 82 + .../statements/ddl_drop_aggregate.md | 79 + .../statements/ddl_drop_cast.md | 45 + .../statements/ddl_drop_database.md | 39 + .../statements/ddl_drop_domain.md | 74 + .../statements/ddl_drop_extension.md | 66 + .../ddl_drop_foreign_data_wrapper.md | 41 + .../statements/ddl_drop_foreign_table.md | 41 + .../statements/ddl_drop_function.md | 53 + .../statements/ddl_drop_index.md | 93 + .../statements/ddl_drop_matview.md | 59 + .../statements/ddl_drop_operator.md | 46 + .../statements/ddl_drop_operator_class.md | 46 + .../statements/ddl_drop_procedure.md | 53 + .../statements/ddl_drop_publication.md | 48 + .../statements/ddl_drop_rule.md | 45 + .../statements/ddl_drop_schema.md | 76 + .../statements/ddl_drop_sequence.md | 107 + .../statements/ddl_drop_server.md | 41 + .../statements/ddl_drop_table.md | 97 + .../statements/ddl_drop_tablespace.md | 46 + .../statements/ddl_drop_trigger.md | 42 + .../statements/ddl_drop_type.md | 69 + .../statements/ddl_drop_view.md | 58 + .../statements/ddl_import_foreign_schema.md | 40 + .../statements/ddl_refresh_matview.md | 77 + .../statements/ddl_truncate.md | 164 + .../the-sql-language/statements/dml_close.md | 97 + .../statements/dml_declare.md | 145 + .../the-sql-language/statements/dml_delete.md | 115 + .../the-sql-language/statements/dml_fetch.md | 114 + .../the-sql-language/statements/dml_insert.md | 337 ++ .../the-sql-language/statements/dml_move.md | 76 + .../the-sql-language/statements/dml_select.md | 134 + .../the-sql-language/statements/dml_update.md | 125 + .../the-sql-language/statements/dml_values.md | 125 + .../statements/perf_deallocate.md | 55 + .../statements/perf_execute.md | 75 + .../statements/perf_explain.md | 174 + .../statements/perf_prepare.md | 70 + .../statements/savepoint_create.md | 101 + .../statements/savepoint_release.md | 70 + .../statements/savepoint_rollback.md | 102 + .../statements/streaming_create_repl_slot.md | 69 + .../statements/streaming_drop_repl_slot.md | 54 + .../statements/streaming_start_replication.md | 86 + .../the-sql-language/statements/txn_abort.md | 140 + .../the-sql-language/statements/txn_begin.md | 161 + .../the-sql-language/statements/txn_commit.md | 140 + .../the-sql-language/statements/txn_end.md | 47 + .../the-sql-language/statements/txn_lock.md | 56 + .../statements/txn_rollback.md | 45 + .../the-sql-language/statements/txn_set.md | 202 ++ .../statements/txn_set_constraints.md | 56 + .../the-sql-language/statements/txn_show.md | 41 + .../the-sql-language/statements/txn_start.md | 36 + .../the-sql-language/with-clause/_index.md | 161 + .../with-clause/bacon-numbers/_index.md | 233 ++ .../with-clause/bacon-numbers/imdb-data.md | 264 ++ .../bacon-numbers/synthetic-data.md | 399 +++ .../with-clause/emps-hierarchy.md | 738 +++++ .../with-clause/recursive-cte.md | 522 +++ .../traversing-general-graphs/_index.md | 110 + .../traversing-general-graphs/common-code.md | 520 +++ .../directed-acyclic-graph.md | 158 + .../directed-cyclic-graph.md | 81 + .../graph-representation.md | 45 + .../traversing-general-graphs/rooted-tree.md | 90 + .../traversing-general-graphs/stress-test.md | 482 +++ .../undirected-cyclic-graph.md | 619 ++++ .../unq-containing-paths.md | 93 + .../with-clause-syntax-semantics.md | 227 ++ .../api/ysql/txn-model-for-top-level-sql.md | 167 + .../_index.md | 148 + .../commit-in-user-defined-subprograms.md | 241 ++ .../language-plpgsql-subprograms/_index.md | 204 ++ .../plpgsql-execution-model.md | 483 +++ .../plpgsql-syntax-and-semantics/_index.md | 103 + .../declaration-section.md | 304 ++ .../exception-section.md | 674 ++++ .../executable-section/_index.md | 160 + .../basic-statements/_index.md | 38 + .../basic-statements/assert.md | 164 + .../basic-statements/cursor-manipulation.md | 435 +++ .../doing-sql-from-plpgsql.md | 412 +++ .../basic-statements/get-diagnostics.md | 166 + .../basic-statements/raise.md | 954 ++++++ .../basic-statements/return-statement.md | 43 + .../compound-statements/_index.md | 25 + .../compound-statements/case-statement.md | 93 + .../compound-statements/if-statement.md | 190 ++ .../loop-exit-continue/_index.md | 86 + .../loop-exit-continue/array-foreach-loop.md | 431 +++ .../exit-from-block-statememt.md | 144 + .../infinite-and-while-loops.md | 173 + .../loop-exit-continue/integer-for-loop.md | 67 + .../loop-exit-continue/query-for-loop.md | 370 +++ .../loop-exit-continue/two-case-studies.md | 410 +++ ...provisioning-roles-for-current-database.md | 194 ++ .../language-sql-subprograms.md | 1075 ++++++ .../name-resolution-in-subprograms.md | 280 ++ .../pg-proc-catalog-table.md | 205 ++ .../subprogram-attributes/_index.md | 86 + .../_index.md | 384 +++ .../immutable-function-examples.md | 335 ++ .../alterable-subprogram-attributes.md | 149 + .../depends-on-extension-semantics.md | 76 + .../subprogram-overloading.md | 342 ++ .../variadic-and-polymorphic-subprograms.md | 457 +++ .../content/v2.25/api/ysqlsh-meta-commands.md | 683 ++++ .../content/v2.25/api/ysqlsh-meta-examples.md | 265 ++ docs/content/v2.25/api/ysqlsh-pset-options.md | 130 + docs/content/v2.25/api/ysqlsh.md | 719 ++++ docs/content/v2.25/architecture/_index.md | 85 + .../v2.25/architecture/design-goals.md | 180 + .../architecture/docdb-replication/_index.md | 59 + .../docdb-replication/async-replication.md | 410 +++ .../cdc-logical-replication.md | 91 + .../docdb-replication/change-data-capture.md | 82 + .../architecture/docdb-replication/raft.md | 79 + .../docdb-replication/read-replicas.md | 41 + .../docdb-replication/replication.md | 96 + .../architecture/docdb-sharding/_index.md | 31 + .../architecture/docdb-sharding/sharding.md | 122 + .../docdb-sharding/tablet-splitting.md | 395 +++ .../v2.25/architecture/docdb/_index.md | 44 + .../v2.25/architecture/docdb/data-model.md | 276 ++ .../v2.25/architecture/docdb/lsm-sst.md | 88 + .../v2.25/architecture/docdb/packed-rows.md | 71 + .../v2.25/architecture/docdb/performance.md | 88 + .../v2.25/architecture/key-concepts.md | 222 ++ .../v2.25/architecture/query-layer/_index.md | 112 + .../query-layer/join-strategies.md | 292 ++ .../query-layer/planner-optimizer.md | 95 + .../v2.25/architecture/system-catalog.md | 192 ++ .../v2.25/architecture/transactions/_index.md | 97 + .../transactions/concurrency-control.md | 1273 +++++++ .../transactions/distributed-txns.md | 97 + .../transactions/isolation-levels.md | 187 ++ .../transactions/read-committed.md | 1449 ++++++++ .../transactions/read-restart-error.md | 117 + .../transactions/single-row-transactions.md | 128 + .../transactions/transaction-priorities.md | 193 ++ .../transactions/transactional-io-path.md | 118 + .../transactions/transactions-overview.md | 61 + docs/content/v2.25/architecture/yb-master.md | 77 + docs/content/v2.25/architecture/yb-tserver.md | 81 + docs/content/v2.25/benchmark/_index.md | 67 + .../benchmark/key-value-workload-ycql.md | 137 + .../benchmark/key-value-workload-ysql.md | 17 + .../v2.25/benchmark/large-datasets-ycql.md | 128 + .../v2.25/benchmark/large-datasets-ysql.md | 17 + .../v2.25/benchmark/resilience/_index.md | 39 + .../benchmark/resilience/jepsen-testing.md | 160 + .../v2.25/benchmark/scalability/_index.md | 28 + .../scalability/scaling-queries-ycql.md | 90 + .../scalability/scaling-queries-ysql.md | 150 + docs/content/v2.25/benchmark/sysbench-ysql.md | 230 ++ docs/content/v2.25/benchmark/tpcc/_index.md | 66 + .../benchmark/tpcc/high-scale-workloads.md | 303 ++ .../benchmark/tpcc/horizontal-scaling.md | 309 ++ .../v2.25/benchmark/tpcc/running-tpcc.md | 288 ++ docs/content/v2.25/benchmark/ycsb-jdbc.md | 205 ++ docs/content/v2.25/benchmark/ycsb-ycql.md | 190 ++ docs/content/v2.25/benchmark/ycsb-ysql.md | 191 ++ .../v2.25/best-practices-operations/_index.md | 35 + .../administration.md | 69 + .../ysql-catalog-cache-tuning-guide.md | 564 ++++ .../ysql-yb-enable-cbo.md | 103 + docs/content/v2.25/contribute/_index.md | 60 + .../v2.25/contribute/core-database/_index.md | 54 + .../core-database/build-and-test.md | 251 ++ .../core-database/build-from-src-almalinux.md | 206 ++ .../core-database/build-from-src-macos.md | 114 + .../core-database/build-from-src-ubuntu.md | 244 ++ .../contribute/core-database/checklist.md | 50 + .../contribute/core-database/clion-setup.md | 63 + .../contribute/core-database/coding-style.md | 651 ++++ .../core-database/includes/build-the-code.md | 38 + .../core-database/includes/ccache.md | 10 + .../core-database/includes/cmake.md | 19 + .../contribute/core-database/includes/java.md | 12 + .../core-database/includes/ninja.md | 10 + .../core-database/includes/opt-yb-build.md | 22 + .../core-database/includes/python.md | 8 + .../contribute/core-database/includes/tldr.md | 5 + .../core-database/includes/ulimit.md | 19 + .../core-database/includes/yugabyted-ui.md | 14 + .../merge-with-upstream-repositories.md | 1023 ++++++ docs/content/v2.25/contribute/docs/_index.md | 48 + .../contribute/docs/all-page-elements.md | 323 ++ .../v2.25/contribute/docs/docs-build.md | 152 + .../v2.25/contribute/docs/docs-checklist.md | 55 + .../v2.25/contribute/docs/docs-edit.md | 99 + .../contribute/docs/docs-editor-setup.md | 56 + .../v2.25/contribute/docs/docs-layout.md | 54 + .../contribute/docs/docs-page-structure.md | 68 + .../v2.25/contribute/docs/docs-style.md | 100 + .../v2.25/contribute/docs/include-file.md | 22 + .../v2.25/contribute/docs/include-markdown.md | 22 + docs/content/v2.25/contribute/docs/macos.md | 31 + .../v2.25/contribute/docs/syntax-diagrams.md | 221 ++ docs/content/v2.25/contribute/docs/ubuntu.md | 15 + .../contribute/docs/widgets-and-shortcodes.md | 351 ++ docs/content/v2.25/deploy/_index.md | 61 + docs/content/v2.25/deploy/checklist.md | 248 ++ .../content/v2.25/deploy/kubernetes/_index.md | 49 + .../v2.25/deploy/kubernetes/best-practices.md | 61 + .../v2.25/deploy/kubernetes/clients.md | 275 ++ .../deploy/kubernetes/multi-cluster/_index.md | 25 + .../multi-cluster/gke/helm-chart.md | 534 +++ .../deploy/kubernetes/multi-zone/_index.md | 31 + .../kubernetes/multi-zone/eks/helm-chart.md | 366 +++ .../kubernetes/multi-zone/gke/helm-chart.md | 375 +++ .../deploy/kubernetes/single-zone/_index.md | 41 + .../kubernetes/single-zone/aks/helm-chart.md | 290 ++ .../single-zone/aks/statefulset-yaml.md | 199 ++ .../kubernetes/single-zone/eks/helm-chart.md | 30 + .../kubernetes/single-zone/gke/helm-chart.md | 267 ++ .../gke/statefulset-yaml-local-ssd.md | 289 ++ .../single-zone/gke/statefulset-yaml.md | 181 + .../kubernetes/single-zone/oss/helm-chart.md | 455 +++ .../single-zone/oss/yugabyte-operator.md | 38 + .../v2.25/deploy/manual-deployment/_index.md | 45 + .../manual-deployment/install-software.md | 60 + .../deploy/manual-deployment/start-masters.md | 275 ++ .../manual-deployment/start-yugabyted.md | 130 + .../deploy/manual-deployment/system-config.md | 303 ++ .../verify-deployment-yugabyted.md | 48 + .../manual-deployment/verify-deployment.md | 80 + .../v2.25/deploy/multi-dc/3dc-deployment.md | 202 ++ docs/content/v2.25/deploy/multi-dc/_index.md | 70 + .../multi-dc/async-replication/_index.md | 56 + .../async-replication/async-deployment.md | 626 ++++ .../async-replication-transactional.md | 44 + .../async-transactional-failover.md | 355 ++ .../async-transactional-setup-automatic.md | 109 + .../async-transactional-setup-manual.md | 268 ++ ...sync-transactional-setup-semi-automatic.md | 85 + .../async-transactional-switchover.md | 87 + .../async-transactional-tables.md | 140 + .../includes/automatic-setup.md | 184 ++ .../includes/semi-automatic-setup.md | 163 + .../includes/transactional-add-db.md | 118 + .../includes/transactional-drop.md | 54 + .../includes/transactional-remove-db.md | 54 + .../deploy/multi-dc/read-replica-clusters.md | 159 + .../v2.25/deploy/public-clouds/_index.md | 58 + .../public-clouds/aws/cloudformation.md | 107 + .../public-clouds/aws/manual-deployment.md | 796 +++++ .../deploy/public-clouds/aws/terraform.md | 174 + .../v2.25/deploy/public-clouds/azure/aks.md | 37 + .../deploy/public-clouds/azure/azure-arm.md | 151 + .../deploy/public-clouds/azure/terraform.md | 174 + .../gcp/gcp-deployment-manager.md | 64 + .../v2.25/deploy/public-clouds/gcp/gke.md | 39 + .../deploy/public-clouds/gcp/terraform.md | 171 + docs/content/v2.25/develop/_index.md | 82 + .../develop/best-practices-develop/_index.md | 45 + .../best-practices-ycql.md | 111 + .../develop/best-practices-develop/clients.md | 50 + .../data-modeling-perf.md | 257 ++ .../v2.25/develop/build-global-apps/_index.md | 125 + .../active-active-multi-master.md | 51 + .../active-active-single-master.md | 56 + .../build-global-apps/duplicate-indexes.md | 166 + .../build-global-apps/follower-reads.md | 78 + .../build-global-apps/global-database.md | 140 + .../latency-optimized-geo-partition.md | 116 + .../locality-optimized-geo-partition.md | 128 + .../build-global-apps/read-replicas.md | 72 + .../real-world-scenarios/_index.md | 26 + .../real-world-scenarios/globally-local.md | 176 + .../v2.25/develop/data-modeling/_index.md | 65 + .../data-modeling/common-patterns/_index.md | 85 + .../data-modeling/common-patterns/jobqueue.md | 185 ++ .../data-modeling/common-patterns/keyvalue.md | 236 ++ .../common-patterns/timeseries/_index.md | 53 + .../common-patterns/timeseries/data-expiry.md | 108 + .../timeseries/global-ordering.md | 187 ++ .../timeseries/ordering-by-entity.md | 194 ++ .../timeseries/partitioning-by-time.md | 147 + .../develop/data-modeling/hot-shards-ysql.md | 130 + .../develop/data-modeling/partitioning.md | 46 + .../data-modeling/primary-keys-ycql.md | 227 ++ .../data-modeling/primary-keys-ysql.md | 223 ++ .../data-modeling/secondary-indexes-ycql.md | 180 + .../data-modeling/secondary-indexes-ysql.md | 214 ++ docs/content/v2.25/develop/gitdev/_index.md | 39 + .../v2.25/develop/gitdev/codespaces.md | 144 + docs/content/v2.25/develop/gitdev/gitpod.md | 171 + docs/content/v2.25/develop/learn/_index.md | 70 + .../v2.25/develop/learn/aggregations-ycql.md | 134 + .../v2.25/develop/learn/aggregations-ysql.md | 21 + .../develop/learn/batch-operations-ycql.md | 127 + .../develop/learn/batch-operations-ysql.md | 19 + .../v2.25/develop/learn/date-and-time-ycql.md | 17 + .../v2.25/develop/learn/date-and-time-ysql.md | 1053 ++++++ .../develop/learn/strings-and-text-ycql.md | 16 + .../develop/learn/strings-and-text-ysql.md | 907 +++++ .../v2.25/develop/learn/text-search/_index.md | 111 + .../learn/text-search/full-text-search.md | 297 ++ .../learn/text-search/pattern-matching.md | 186 ++ .../learn/text-search/phonetic-matching.md | 117 + .../learn/text-search/similarity-matching.md | 147 + .../transactions/acid-transactions-ycql.md | 286 ++ .../transactions/acid-transactions-ysql.md | 227 ++ .../transactions-errorcodes-ysql.md | 165 + .../transactions/transactions-global-apps.md | 74 + .../transactions-performance-ysql.md | 181 + .../transactions/transactions-retries-ysql.md | 258 ++ .../develop/learn/ttl-data-expiration-ycql.md | 168 + .../develop/learn/ttl-data-expiration-ysql.md | 21 + .../v2.25/develop/multi-cloud/_index.md | 62 + .../v2.25/develop/multi-cloud/hybrid-cloud.md | 91 + .../multi-cloud/multicloud-migration.md | 132 + .../develop/multi-cloud/multicloud-setup.md | 132 + .../develop/quality-of-service/_index.md | 30 + .../limiting-connections.md | 126 + .../transaction-priority.md | 246 ++ .../write-heavy-workloads.md | 94 + docs/content/v2.25/drivers-orms/_index.md | 124 + docs/content/v2.25/drivers-orms/c/_index.md | 39 + docs/content/v2.25/drivers-orms/c/ysql.md | 171 + docs/content/v2.25/drivers-orms/cpp/_index.md | 40 + docs/content/v2.25/drivers-orms/cpp/ycql.md | 258 ++ docs/content/v2.25/drivers-orms/cpp/ysql.md | 162 + .../v2.25/drivers-orms/csharp/_index.md | 60 + .../drivers-orms/csharp/compatibility.md | 20 + .../drivers-orms/csharp/entityframework.md | 149 + .../csharp/postgres-npgsql-reference.md | 220 ++ .../drivers-orms/csharp/postgres-npgsql.md | 292 ++ .../csharp/yb-npgsql-reference.md | 208 ++ .../content/v2.25/drivers-orms/csharp/ycql.md | 138 + .../content/v2.25/drivers-orms/csharp/ysql.md | 206 ++ .../v2.25/drivers-orms/elixir/_index.md | 41 + .../v2.25/drivers-orms/elixir/phoenix.md | 267 ++ docs/content/v2.25/drivers-orms/go/_index.md | 54 + .../v2.25/drivers-orms/go/compatibility.md | 34 + docs/content/v2.25/drivers-orms/go/gorm.md | 120 + docs/content/v2.25/drivers-orms/go/pg.md | 338 ++ .../v2.25/drivers-orms/go/pgx-reference.md | 270 ++ docs/content/v2.25/drivers-orms/go/pgx.md | 235 ++ .../v2.25/drivers-orms/go/pq-reference.md | 258 ++ docs/content/v2.25/drivers-orms/go/pq.md | 237 ++ .../v2.25/drivers-orms/go/yb-pgx-reference.md | 292 ++ docs/content/v2.25/drivers-orms/go/yb-pgx.md | 692 ++++ docs/content/v2.25/drivers-orms/go/ycql.md | 145 + .../drivers-orms/include-drivers-orms-list.md | 162 + .../content/v2.25/drivers-orms/java/_index.md | 90 + .../v2.25/drivers-orms/java/compatibility.md | 30 + docs/content/v2.25/drivers-orms/java/ebean.md | 250 ++ .../v2.25/drivers-orms/java/hibernate.md | 240 ++ .../v2.25/drivers-orms/java/mybatis.md | 303 ++ .../java/postgres-jdbc-reference.md | 378 +++ .../v2.25/drivers-orms/java/postgres-jdbc.md | 227 ++ .../v2.25/drivers-orms/java/yb-r2dbc.md | 237 ++ .../v2.25/drivers-orms/java/ycql-4.x.md | 222 ++ .../v2.25/drivers-orms/java/ycql-ssl.md | 316 ++ docs/content/v2.25/drivers-orms/java/ycql.md | 228 ++ .../drivers-orms/java/ysql-vertx-pg-client.md | 213 ++ .../java/yugabyte-jdbc-reference.md | 309 ++ .../v2.25/drivers-orms/java/yugabyte-jdbc.md | 266 ++ .../v2.25/drivers-orms/nodejs/_index.md | 62 + .../drivers-orms/nodejs/compatibility.md | 41 + .../nodejs/postgres-node-driver.md | 278 ++ .../nodejs/postgres-pg-reference.md | 275 ++ .../v2.25/drivers-orms/nodejs/prisma.md | 292 ++ .../v2.25/drivers-orms/nodejs/sequelize.md | 226 ++ .../v2.25/drivers-orms/nodejs/typeorm.md | 242 ++ .../content/v2.25/drivers-orms/nodejs/ycql.md | 148 + .../nodejs/yugabyte-node-driver.md | 225 ++ .../nodejs/yugabyte-pg-reference.md | 275 ++ .../content/v2.25/drivers-orms/orms/_index.md | 80 + .../drivers-orms/orms/csharp/ysql-dapper.md | 260 ++ .../orms/csharp/ysql-entity-framework.md | 268 ++ .../v2.25/drivers-orms/orms/go/ysql-gorm.md | 250 ++ .../drivers-orms/orms/java/ysql-ebean.md | 365 +++ .../drivers-orms/orms/java/ysql-hibernate.md | 292 ++ .../drivers-orms/orms/java/ysql-mybatis.md | 299 ++ .../orms/java/ysql-spring-data.md | 292 ++ .../drivers-orms/orms/nodejs/ysql-prisma.md | 319 ++ .../orms/nodejs/ysql-sequelize.md | 260 ++ .../drivers-orms/orms/php/ysql-laravel.md | 284 ++ .../drivers-orms/orms/python/ysql-django.md | 270 ++ .../orms/python/ysql-sqlalchemy.md | 262 ++ .../drivers-orms/orms/rust/ysql-diesel.md | 317 ++ docs/content/v2.25/drivers-orms/php/_index.md | 39 + .../content/v2.25/drivers-orms/php/laravel.md | 251 ++ docs/content/v2.25/drivers-orms/php/ysql.md | 117 + .../v2.25/drivers-orms/python/_index.md | 54 + .../v2.25/drivers-orms/python/aiopg.md | 130 + .../drivers-orms/python/compatibility.md | 29 + .../v2.25/drivers-orms/python/django.md | 220 ++ .../python/postgres-psycopg2-reference.md | 218 ++ .../drivers-orms/python/postgres-psycopg2.md | 225 ++ .../python/postgres-psycopg3-reference.md | 199 ++ .../drivers-orms/python/postgres-psycopg3.md | 225 ++ .../v2.25/drivers-orms/python/sqlalchemy.md | 153 + .../content/v2.25/drivers-orms/python/ycql.md | 144 + .../python/yugabyte-psycopg2-reference.md | 229 ++ .../drivers-orms/python/yugabyte-psycopg2.md | 285 ++ .../content/v2.25/drivers-orms/ruby/_index.md | 44 + .../v2.25/drivers-orms/ruby/activerecord.md | 251 ++ .../v2.25/drivers-orms/ruby/ruby-pg.md | 112 + .../v2.25/drivers-orms/ruby/yb-ruby-pg.md | 220 ++ docs/content/v2.25/drivers-orms/ruby/ycql.md | 110 + .../content/v2.25/drivers-orms/rust/_index.md | 61 + .../content/v2.25/drivers-orms/rust/diesel.md | 338 ++ .../rust/rust-postgres-reference.md | 383 +++ .../drivers-orms/rust/yb-rust-postgres.md | 165 + .../v2.25/drivers-orms/scala/_index.md | 31 + docs/content/v2.25/drivers-orms/scala/ycql.md | 161 + .../v2.25/drivers-orms/smart-drivers-ycql.md | 105 + .../v2.25/drivers-orms/smart-drivers.md | 324 ++ .../v2.25/drivers-orms/ycql-client-drivers.md | 147 + .../v2.25/drivers-orms/ysql-client-drivers.md | 174 + docs/content/v2.25/explore/_index.md | 137 + .../v2.25/explore/change-data-capture.md | 307 ++ .../explore/cluster-management/_index.md | 26 + .../point-in-time-recovery-ycql.md | 421 +++ .../point-in-time-recovery-ysql.md | 769 +++++ .../v2.25/explore/cluster-setup-aeon.md | 135 + .../v2.25/explore/cluster-setup-anywhere.md | 157 + .../v2.25/explore/cluster-setup-local.md | 154 + .../v2.25/explore/fault-tolerance/_index.md | 102 + .../fault-tolerance/handling-node-upgrades.md | 122 + .../fault-tolerance/handling-rack-failures.md | 113 + .../handling-region-failures.md | 161 + .../fault-tolerance/handling-zone-failures.md | 94 + .../explore/fault-tolerance/macos-yba.md | 73 + .../v2.25/explore/fault-tolerance/macos.md | 63 + .../transaction-availability.md | 330 ++ .../v2.25/explore/going-beyond-sql/_index.md | 94 + .../asynchronous-replication-ysql.md | 42 + .../going-beyond-sql/cluster-aware-drivers.md | 40 + .../going-beyond-sql/cluster-topology.md | 58 + .../going-beyond-sql/connection-mgr-ysql.md | 125 + .../explore/going-beyond-sql/data-sharding.md | 275 ++ .../decoupling-compute-storage.md | 98 + .../going-beyond-sql/follower-reads-ycql.md | 141 + .../going-beyond-sql/follower-reads-ysql.md | 249 ++ .../explore/going-beyond-sql/gen-ai-apps.md | 69 + .../going-beyond-sql/tablespaces-platform.md | 5 + .../going-beyond-sql/tablespaces-yugabyted.md | 60 + .../explore/going-beyond-sql/tablespaces.md | 435 +++ .../topology-aware-drivers.md | 50 + .../explore/linear-scalability/_index.md | 143 + .../linear-scalability/data-distribution.md | 78 + .../horizontal-vs-vertical-scaling.md | 55 + .../linear-scalability/node-addition.md | 72 + .../scaling-large-datasets.md | 119 + .../linear-scalability/scaling-reads.md | 43 + .../scaling-transactions.md | 74 + .../scaling-universe-cloud.md | 100 + .../scaling-universe-yba.md | 94 + .../linear-scalability/scaling-universe.md | 76 + .../linear-scalability/scaling-writes.md | 65 + .../multi-region-deployments/_index.md | 61 + .../read-replicas-ycql.md | 285 ++ .../read-replicas-ysql.md | 53 + .../row-level-geo-partitioning.md | 465 +++ .../synchronous-replication-cloud.md | 137 + .../synchronous-replication-yba.md | 111 + .../synchronous-replication-ysql.md | 145 + .../v2.25/explore/observability/_index.md | 134 + .../observability/active-session-history.md | 311 ++ .../grafana-dashboard/grafana.md | 58 + .../v2.25/explore/observability/logging.md | 85 + .../v2.25/explore/observability/pg-locks.md | 150 + .../explore/observability/pg-stat-activity.md | 214 ++ .../observability/pg-stat-progress-copy.md | 175 + .../observability/prometheus-integration.md | 165 + .../explore/observability/yb-local-tablets.md | 101 + .../observability/yb-pg-stat-get-queries.md | 257 ++ .../explore/query-1-performance/_index.md | 84 + .../query-1-performance/auto-analyze.md | 89 + .../query-1-performance/explain-analyze.md | 277 ++ .../query-1-performance/pg-hint-plan.md | 1093 ++++++ .../query-1-performance/pg-stat-statements.md | 480 +++ .../explore/query-1-performance/pg-stats.md | 212 ++ .../query-1-performance/query-diagnostics.md | 243 ++ .../ycql-stat-statements.md | 163 + .../v2.25/explore/security/security.md | 232 ++ .../v2.25/explore/transactions/_index.md | 77 + .../distributed-transactions-ycql.md | 223 ++ .../distributed-transactions-ysql.md | 215 ++ .../explore/transactions/explicit-locking.md | 177 + .../explore/transactions/isolation-levels.md | 488 +++ .../v2.25/explore/ycql-language/_index.md | 51 + .../cassandra-feature-support.md | 164 + .../v2.25/explore/ycql-language/data-types.md | 329 ++ .../indexes-constraints/_index.md | 78 + .../covering-index-ycql.md | 120 + .../indexes-constraints/partial-index-ycql.md | 121 + .../indexes-constraints/primary-key-ycql.md | 105 + .../secondary-indexes-with-jsonb-ycql.md | 120 + .../secondary-indexes-ycql.md | 227 ++ .../indexes-constraints/unique-index-ycql.md | 74 + .../v2.25/explore/ycql-language/jsonb-ycql.md | 246 ++ .../explore/ycql-language/keyspaces-tables.md | 183 ++ .../explore/ysql-language-features/_index.md | 111 + .../advanced-features/_index.md | 92 + .../advanced-features/collations.md | 293 ++ .../advanced-features/cursor.md | 171 + .../foreign-data-wrappers.md | 62 + .../advanced-features/inheritance.md | 103 + .../advanced-features/parallel-query.md | 39 + .../advanced-features/partitions.md | 201 ++ .../advanced-features/savepoints.md | 94 + .../snapshot-synchronization.md | 115 + .../advanced-features/stored-procedures.md | 162 + .../advanced-features/triggers.md | 252 ++ .../advanced-features/views.md | 227 ++ .../data-manipulation.md | 358 ++ .../ysql-language-features/data-types.md | 482 +++ .../databases-schemas-tables.md | 401 +++ .../expressions-operators.md | 317 ++ .../indexes-constraints/_index.md | 95 + .../covering-index-ysql.md | 119 + .../expression-index-ysql.md | 78 + .../indexes-constraints/gin.md | 311 ++ .../indexes-constraints/index-backfill.md | 140 + .../indexes-constraints/partial-index-ysql.md | 88 + .../indexes-constraints/primary-key-ysql.md | 162 + .../secondary-indexes-with-jsonb-ysql.md | 15 + .../secondary-indexes-ysql.md | 165 + .../indexes-constraints/unique-index-ysql.md | 99 + .../ysql-language-features/jsonb-ysql.md | 439 +++ .../pg-extensions/_index.md | 69 + .../pg-extensions/extension-auto-explain.md | 70 + .../pg-extensions/extension-file-fdw.md | 34 + .../pg-extensions/extension-fuzzystrmatch.md | 27 + .../pg-extensions/extension-hypopg.md | 127 + .../pg-extensions/extension-passwordcheck.md | 81 + .../pg-extensions/extension-pganon.md | 318 ++ .../pg-extensions/extension-pgcron.md | 143 + .../pg-extensions/extension-pgcrypto.md | 29 + .../pg-extensions/extension-pgpartman.md | 401 +++ .../extension-pgstatstatements.md | 24 + .../pg-extensions/extension-pgvector.md | 220 ++ .../pg-extensions/extension-postgres-fdw.md | 45 + .../pg-extensions/extension-postgresql-hll.md | 75 + .../pg-extensions/extension-spi.md | 98 + .../pg-extensions/extension-tablefunc.md | 66 + .../pg-extensions/extension-uuid-ossp.md | 33 + .../pg-extensions/install-extensions.md | 108 + .../explore/ysql-language-features/queries.md | 561 ++++ docs/content/v2.25/faq/comparisons/_index.md | 54 + .../v2.25/faq/comparisons/amazon-aurora.md | 38 + .../v2.25/faq/comparisons/amazon-dynamodb.md | 72 + .../v2.25/faq/comparisons/azure-cosmos.md | 19 + .../v2.25/faq/comparisons/cassandra.md | 99 + .../v2.25/faq/comparisons/cockroachdb.md | 44 + .../v2.25/faq/comparisons/foundationdb.md | 118 + .../v2.25/faq/comparisons/google-spanner.md | 36 + docs/content/v2.25/faq/comparisons/hbase.md | 34 + docs/content/v2.25/faq/comparisons/mongodb.md | 74 + .../v2.25/faq/comparisons/postgresql.md | 41 + docs/content/v2.25/faq/comparisons/redis.md | 33 + docs/content/v2.25/faq/comparisons/tidb.md | 23 + docs/content/v2.25/faq/comparisons/vitess.md | 24 + docs/content/v2.25/faq/compatibility.md | 235 ++ docs/content/v2.25/faq/general.md | 270 ++ docs/content/v2.25/faq/operations-faq.md | 139 + docs/content/v2.25/faq/smart-drivers-faq.md | 104 + docs/content/v2.25/faq/yugabyte-platform.md | 243 ++ .../v2.25/faq/yugabytedb-managed-faq.md | 300 ++ docs/content/v2.25/features.md | 98 + .../content/v2.25/google8916e0ed5f6556b0.html | 1 + docs/content/v2.25/integrations/_index.md | 410 +++ docs/content/v2.25/integrations/akka-ycql.md | 322 ++ docs/content/v2.25/integrations/akka-ysql.md | 317 ++ .../content/v2.25/integrations/apache-beam.md | 108 + .../v2.25/integrations/apache-flink.md | 89 + .../content/v2.25/integrations/apache-hudi.md | 421 +++ .../v2.25/integrations/apache-spark/_index.md | 29 + .../integrations/apache-spark/java-ycql.md | 270 ++ .../integrations/apache-spark/java-ysql.md | 272 ++ .../integrations/apache-spark/python-ycql.md | 47 + .../integrations/apache-spark/python-ysql.md | 295 ++ .../integrations/apache-spark/scala-ycql.md | 320 ++ .../integrations/apache-spark/scala-ysql.md | 314 ++ .../integrations/apache-spark/spark-sql.md | 203 ++ docs/content/v2.25/integrations/ataccama.md | 27 + docs/content/v2.25/integrations/atlas-ycql.md | 170 + docs/content/v2.25/integrations/atomicjar.md | 111 + docs/content/v2.25/integrations/budibase.md | 35 + docs/content/v2.25/integrations/camunda.md | 206 ++ docs/content/v2.25/integrations/caspio.md | 37 + .../v2.25/integrations/cdc/debezium.md | 175 + docs/content/v2.25/integrations/cohesity.md | 56 + docs/content/v2.25/integrations/commvault.md | 27 + docs/content/v2.25/integrations/dataedo.md | 27 + docs/content/v2.25/integrations/datahub.md | 101 + docs/content/v2.25/integrations/delphix.md | 27 + .../integrations/django-rest-framework.md | 202 ++ docs/content/v2.25/integrations/flyway.md | 133 + docs/content/v2.25/integrations/gorm.md | 143 + .../v2.25/integrations/hashicorp-vault.md | 234 ++ .../v2.25/integrations/hasura/_index.md | 35 + .../v2.25/integrations/hasura/graphql.md | 277 ++ .../v2.25/integrations/hasura/hasura-1.md | 162 + .../v2.25/integrations/hasura/hasura-cloud.md | 107 + .../integrations/hasura/hasura-sample-app.md | 173 + docs/content/v2.25/integrations/hevodata.md | 39 + docs/content/v2.25/integrations/jaeger.md | 114 + docs/content/v2.25/integrations/janusgraph.md | 280 ++ docs/content/v2.25/integrations/kairosdb.md | 219 ++ docs/content/v2.25/integrations/keycloak.md | 230 ++ docs/content/v2.25/integrations/kinesis.md | 124 + docs/content/v2.25/integrations/liquibase.md | 126 + docs/content/v2.25/integrations/metacat.md | 88 + docs/content/v2.25/integrations/mirantis.md | 223 ++ .../content/v2.25/integrations/nutanix-ahv.md | 44 + docs/content/v2.25/integrations/pgmigrate.md | 44 + docs/content/v2.25/integrations/presto.md | 238 ++ docs/content/v2.25/integrations/prisma.md | 346 ++ docs/content/v2.25/integrations/rabbitmq.md | 474 +++ docs/content/v2.25/integrations/retool.md | 33 + .../integrations/schema-evolution-mgr.md | 29 + docs/content/v2.25/integrations/sequelize.md | 111 + .../integrations/spring-framework/_index.md | 55 + .../integrations/spring-framework/sd-jpa.md | 246 ++ .../integrations/spring-framework/sdyb.md | 395 +++ .../spring-framework/spring-aeon.md | 185 ++ .../spring-framework/spring-cassandra.md | 483 +++ docs/content/v2.25/integrations/sqlalchemy.md | 162 + .../content/v2.25/integrations/superblocks.md | 33 + .../v2.25/integrations/tools/_index.md | 104 + .../v2.25/integrations/tools/arctype.md | 62 + .../v2.25/integrations/tools/dbeaver-ycql.md | 88 + .../v2.25/integrations/tools/dbeaver-ysql.md | 69 + .../v2.25/integrations/tools/dbschema.md | 51 + .../v2.25/integrations/tools/metabase.md | 113 + .../v2.25/integrations/tools/pgadmin.md | 60 + .../v2.25/integrations/tools/sql-workbench.md | 110 + .../v2.25/integrations/tools/superset.md | 82 + .../v2.25/integrations/tools/tableplus.md | 59 + .../tools/visualstudioworkbench.md | 92 + docs/content/v2.25/integrations/typeorm.md | 106 + docs/content/v2.25/integrations/wso2.md | 174 + .../content/v2.25/integrations/ysql-loader.md | 235 ++ .../content/v2.25/launch-and-manage/_index.md | 48 + .../monitor-and-alert/_index.md | 41 + .../active-session-history-monitor.md | 228 ++ .../monitor-and-alert/metrics/_index.md | 105 + .../metrics/cache-storage.md | 124 + .../monitor-and-alert/metrics/connections.md | 29 + .../monitor-and-alert/metrics/raft-dst.md | 65 + .../monitor-and-alert/metrics/replication.md | 27 + .../monitor-and-alert/metrics/throughput.md | 82 + .../monitor-and-alert/metrics/ybmaster.md | 39 + .../monitor-and-alert/xcluster-monitor.md | 211 ++ docs/content/v2.25/legal/_index.md | 35 + .../v2.25/legal/third-party-software.md | 28 + docs/content/v2.25/manage/_index.md | 47 + .../v2.25/manage/backup-restore/_index.md | 76 + .../backup-restore/export-import-data-ycql.md | 116 + .../backup-restore/export-import-data.md | 106 + .../backup-restore/instant-db-cloning.md | 280 ++ .../backup-restore/point-in-time-recovery.md | 280 ++ .../manage/backup-restore/snapshot-ysql.md | 228 ++ .../manage/backup-restore/snapshots-ycql.md | 185 ++ .../backup-restore/time-travel-query.md | 190 ++ .../v2.25/manage/change-cluster-config.md | 213 ++ .../v2.25/manage/data-migration/_index.md | 55 + .../manage/data-migration/bulk-export-ycql.md | 153 + .../manage/data-migration/bulk-export-ysql.md | 103 + .../manage/data-migration/bulk-import-ycql.md | 146 + .../manage/data-migration/bulk-import-ysql.md | 167 + .../data-migration/migrate-from-postgres.md | 335 ++ .../data-migration/verify-migration-ycql.md | 107 + .../data-migration/verify-migration-ysql.md | 146 + .../v2.25/manage/diagnostics-reporting.md | 122 + .../v2.25/manage/upgrade-deployment.md | 373 +++ .../v2.25/manage/ysql-major-upgrade-local.md | 258 ++ .../manage/ysql-major-upgrade-yugabyted.md | 232 ++ .../quick-start-yugabytedb-managed/_index.md | 185 ++ .../managed-quick-start-include.md | 98 + .../quick-start-buildapps-include.md | 739 +++++ docs/content/v2.25/quick-start/docker.md | 478 +++ .../explore/binary/explore-ycql.md | 29 + .../explore/binary/explore-ysql.md | 19 + .../explore/docker/explore-ycql.md | 31 + .../explore/docker/explore-ysql.md | 19 + .../explore/kubernetes/explore-ycql.md | 31 + .../explore/kubernetes/explore-ysql.md | 19 + .../content/v2.25/quick-start/explore/ycql.md | 155 + .../content/v2.25/quick-start/explore/ysql.md | 860 +++++ .../v2.25/quick-start/include-connect.md | 61 + .../include-prerequisites-linux.md | 38 + .../include-prerequisites-macos.md | 30 + docs/content/v2.25/quick-start/kubernetes.md | 547 +++ docs/content/v2.25/quick-start/linux.md | 150 + docs/content/v2.25/quick-start/macos.md | 254 ++ .../v2.25/reference/configuration/_index.md | 61 + .../configuration/all-flags-yb-master.md | 17 + .../configuration/all-flags-yb-tserver.md | 17 + .../reference/configuration/default-ports.md | 139 + .../configuration/operating-systems.md | 85 + .../configuration/postgresql-compatibility.md | 204 ++ .../reference/configuration/smart-defaults.md | 53 + .../reference/configuration/yb-master.md | 1133 +++++++ .../reference/configuration/yb-tserver.md | 2882 ++++++++++++++++ .../reference/configuration/yugabyted.md | 2183 ++++++++++++ .../v2.25/reference/get-started-guide.md | 200 ++ docs/content/v2.25/releases/_index.md | 62 + .../v2.25/releases/techadvisories/_index.md | 154 + .../releases/techadvisories/_template.md | 22 + .../v2.25/releases/techadvisories/ta-14696.md | 100 + .../v2.25/releases/techadvisories/ta-20398.md | 35 + .../v2.25/releases/techadvisories/ta-20648.md | 31 + .../v2.25/releases/techadvisories/ta-20827.md | 40 + .../v2.25/releases/techadvisories/ta-20864.md | 62 + .../v2.25/releases/techadvisories/ta-21218.md | 33 + .../v2.25/releases/techadvisories/ta-21297.md | 97 + .../v2.25/releases/techadvisories/ta-21491.md | 49 + .../v2.25/releases/techadvisories/ta-22057.md | 48 + .../v2.25/releases/techadvisories/ta-22802.md | 156 + .../v2.25/releases/techadvisories/ta-22935.md | 178 + .../v2.25/releases/techadvisories/ta-23476.md | 123 + .../v2.25/releases/techadvisories/ta-24992.md | 41 + .../v2.25/releases/techadvisories/ta-25106.md | 84 + .../v2.25/releases/techadvisories/ta-25193.md | 43 + .../v2.25/releases/techadvisories/ta-26440.md | 68 + .../v2.25/releases/techadvisories/ta-26666.md | 94 + .../v2.25/releases/techadvisories/ta-27380.md | 98 + .../v2.25/releases/techadvisories/ta-28222.md | 132 + .../v2.25/releases/techadvisories/ta-2968.md | 65 + .../releases/techadvisories/ta-cl-23623.md | 34 + .../releases/techadvisories/ta-reol-24.md | 110 + docs/content/v2.25/releases/versioning.md | 173 + .../v2.25/releases/yba-releases/_index.md | 73 + .../v2.25/releases/yba-releases/v2.20.md | 1266 +++++++ .../v2.25/releases/yba-releases/v2.25.md | 511 +++ .../v2.25/releases/yba-releases/v2024.1.md | 664 ++++ .../v2.25/releases/yba-releases/v2024.2.md | 979 ++++++ .../v2.25/releases/yba-releases/v2025.1.md | 588 ++++ .../v2.25/releases/ybdb-releases/_index.md | 62 + .../ybdb-releases/end-of-life/v1.2.md | 608 ++++ .../ybdb-releases/end-of-life/v1.3.md | 194 ++ .../ybdb-releases/end-of-life/v2.0.md | 850 +++++ .../ybdb-releases/end-of-life/v2.1.md | 665 ++++ .../ybdb-releases/end-of-life/v2.11.md | 756 +++++ .../ybdb-releases/end-of-life/v2.12.md | 1373 ++++++++ .../ybdb-releases/end-of-life/v2.13.md | 922 ++++++ .../ybdb-releases/end-of-life/v2.14.md | 2270 +++++++++++++ .../ybdb-releases/end-of-life/v2.15.md | 1433 ++++++++ .../end-of-life/v2.16-anywhere.md | 756 +++++ .../ybdb-releases/end-of-life/v2.16.md | 1032 ++++++ .../end-of-life/v2.17-anywhere.md | 1199 +++++++ .../ybdb-releases/end-of-life/v2.17.md | 909 +++++ .../end-of-life/v2.18-anywhere.md | 1304 ++++++++ .../ybdb-releases/end-of-life/v2.18.md | 1315 ++++++++ .../end-of-life/v2.19-anywhere.md | 721 ++++ .../ybdb-releases/end-of-life/v2.19.md | 686 ++++ .../ybdb-releases/end-of-life/v2.2.md | 555 ++++ .../end-of-life/v2.21-anywhere.md | 749 +++++ .../ybdb-releases/end-of-life/v2.21.md | 937 ++++++ .../end-of-life/v2.23-anywhere.md | 608 ++++ .../ybdb-releases/end-of-life/v2.23.md | 662 ++++ .../ybdb-releases/end-of-life/v2.3.md | 428 +++ .../ybdb-releases/end-of-life/v2.4.md | 717 ++++ .../ybdb-releases/end-of-life/v2.5.md | 614 ++++ .../ybdb-releases/end-of-life/v2.6.md | 1511 +++++++++ .../ybdb-releases/end-of-life/v2.7.md | 709 ++++ .../ybdb-releases/end-of-life/v2.8.md | 1403 ++++++++ .../ybdb-releases/end-of-life/v2.9.md | 486 +++ .../v2.25/releases/ybdb-releases/v2.20.md | 1860 +++++++++++ .../v2.25/releases/ybdb-releases/v2.25.md | 978 ++++++ .../v2.25/releases/ybdb-releases/v2024.1.md | 1127 +++++++ .../v2.25/releases/ybdb-releases/v2024.2.md | 1685 ++++++++++ .../v2.25/releases/ybdb-releases/v2025.1.md | 973 ++++++ .../v2.25/releases/yugabyte-clients.md | 111 + docs/content/v2.25/sample-data/_index.md | 58 + docs/content/v2.25/sample-data/chinook.md | 140 + docs/content/v2.25/sample-data/northwind.md | 138 + docs/content/v2.25/sample-data/pgexercises.md | 119 + .../v2.25/sample-data/retail-analytics.md | 389 +++ docs/content/v2.25/sample-data/sportsdb.md | 125 + docs/content/v2.25/secure/_index.md | 66 + .../v2.25/secure/audit-logging/_index.md | 43 + .../audit-logging/audit-logging-ycql.md | 297 ++ .../audit-logging/audit-logging-ysql.md | 229 ++ .../object-audit-logging-ysql.md | 122 + .../session-audit-logging-ysql.md | 93 + .../audit-logging/trace-statements-ysql.md | 212 ++ .../v2.25/secure/authentication/_index.md | 49 + .../host-based-authentication.md | 273 ++ .../ldap-authentication-ycql.md | 139 + .../ldap-authentication-ysql.md | 223 ++ .../authentication/password-authentication.md | 200 ++ .../authentication/trust-authentication.md | 35 + .../v2.25/secure/authorization/_index.md | 49 + .../authorization/column-level-security.md | 203 ++ .../secure/authorization/create-roles-ycql.md | 170 + .../secure/authorization/create-roles.md | 187 ++ .../secure/authorization/rbac-model-ycql.md | 82 + .../v2.25/secure/authorization/rbac-model.md | 143 + .../authorization/row-level-security.md | 340 ++ .../authorization/ycql-grant-permissions.md | 291 ++ .../authorization/ysql-grant-permissions.md | 265 ++ .../v2.25/secure/column-level-encryption.md | 230 ++ .../secure/enable-authentication/_index.md | 43 + .../authentication-ycql.md | 378 +++ .../authentication-ysql.md | 378 +++ .../ysql-login-profiles.md | 215 ++ .../ysql_hba_conf-configuration.md | 56 + .../v2.25/secure/encryption-at-rest.md | 150 + .../v2.25/secure/security-checklist.md | 77 + .../v2.25/secure/tls-encryption/_index.md | 56 + .../tls-encryption/connect-to-cluster.md | 137 + .../tls-encryption/server-certificates.md | 376 +++ .../secure/tls-encryption/server-to-server.md | 63 + .../tls-encryption/tls-authentication.md | 226 ++ .../secure/vulnerability-disclosure-policy.md | 130 + docs/content/v2.25/troubleshoot/_index.md | 58 + .../v2.25/troubleshoot/cluster/_index.md | 60 + .../troubleshoot/cluster/connect-ycql.md | 70 + .../troubleshoot/cluster/failed_tablets.md | 31 + .../cluster/performance-troubleshooting.md | 197 ++ .../troubleshoot/cluster/recover_server.md | 63 + .../cluster/replace_failed_peers.md | 96 + .../troubleshoot/cluster/replace_master.md | 56 + .../troubleshoot/cluster/replace_tserver.md | 44 + .../v2.25/troubleshoot/nodes/_index.md | 47 + .../troubleshoot/nodes/check-processes.md | 91 + .../v2.25/troubleshoot/nodes/check-stats.md | 66 + .../v2.25/troubleshoot/nodes/disk-full.md | 149 + .../v2.25/troubleshoot/nodes/recover-disk.md | 32 + .../troubleshoot/nodes/trouble-common.md | 83 + .../v2.25/troubleshoot/other-issues.md | 42 + .../content/v2.25/troubleshoot/ysql-issues.md | 106 + docs/content/v2.25/tutorials/AI/_index.md | 68 + .../v2.25/tutorials/AI/ai-langchain-openai.md | 290 ++ .../tutorials/AI/ai-llamaindex-openai.md | 286 ++ docs/content/v2.25/tutorials/AI/ai-localai.md | 386 +++ docs/content/v2.25/tutorials/AI/ai-ollama.md | 327 ++ .../v2.25/tutorials/AI/azure-openai.md | 326 ++ .../v2.25/tutorials/AI/google-vertex-ai.md | 225 ++ docs/content/v2.25/tutorials/AI/hello-rag.md | 283 ++ docs/content/v2.25/tutorials/_index.md | 76 + docs/content/v2.25/tutorials/azure/_index.md | 59 + .../tutorials/azure/azure-api-management.md | 127 + .../tutorials/azure/azure-app-service.md | 139 + .../v2.25/tutorials/azure/azure-event-hubs.md | 327 ++ .../v2.25/tutorials/azure/azure-functions.md | 214 ++ .../v2.25/tutorials/azure/azure-key-vault.md | 212 ++ .../tutorials/azure/azure-private-link.md | 111 + .../v2.25/tutorials/build-and-learn/_index.md | 15 + .../chapter1-debuting-with-postgres.md | 172 + .../chapter2-scaling-with-yugabytedb.md | 277 ++ .../chapter3-tolerating-outages.md | 400 +++ .../build-and-learn/chapter4-going-global.md | 350 ++ .../chapter5-going-cloud-native.md | 172 + .../includes/chapter1-full-text-search.md | 21 + .../includes/chapter1-similarity-search.md | 23 + .../includes/chapter2-full-text-search.md | 7 + .../includes/chapter2-similarity-search.md | 7 + .../includes/chapter3-full-text-search.md | 7 + .../chapter3-second-full-text-search.md | 7 + .../chapter3-second-similarity-search.md | 7 + .../includes/chapter3-similarity-search.md | 7 + ...ter4-us-east-add-movie-full-text-search.md | 1 + ...er4-us-east-add-movie-similarity-search.md | 1 + .../chapter4-us-east-full-text-search.md | 7 + .../chapter4-us-east-similarity-search.md | 7 + .../includes/chapter5-full-text-search.md | 5 + .../includes/chapter5-similarity-search.md | 5 + .../tutorials/build-and-learn/overview.md | 32 + .../v2.25/tutorials/build-apps/_index.md | 111 + .../tutorials/build-apps/c/cloud-ysql-c.md | 183 ++ .../tutorials/build-apps/cloud-add-ip.md | 62 + .../build-apps/cpp/cloud-ysql-cpp.md | 184 ++ .../build-apps/csharp/cloud-ysql-csharp.md | 176 + .../build-apps/elixir/cloud-ysql-elixir.md | 152 + .../tutorials/build-apps/go/cloud-ysql-go.md | 184 ++ .../build-apps/java/cloud-ysql-yb-jdbc.md | 170 + .../build-apps/nodejs/cloud-ysql-node.md | 182 + .../build-apps/php/cloud-ysql-php.md | 149 + .../build-apps/python/cloud-ysql-python.md | 168 + .../build-apps/ruby/cloud-ysql-ruby.md | 171 + .../build-apps/rust/cloud-ysql-rust.md | 150 + .../v2.25/tutorials/cdc-tutorials/_index.md | 55 + .../tutorials/cdc-tutorials/cdc-aws-msk.md | 243 ++ .../cdc-tutorials/cdc-azure-event-hub.md | 208 ++ .../cdc-tutorials/cdc-confluent-cloud.md | 162 + .../tutorials/cdc-tutorials/cdc-redpanda.md | 106 + docs/content/v2.25/tutorials/google/_index.md | 23 + docs/content/v2.25/yedis/_index.md | 85 + docs/content/v2.25/yedis/api/_index.md | 115 + docs/content/v2.25/yedis/api/append.md | 57 + docs/content/v2.25/yedis/api/auth.md | 91 + docs/content/v2.25/yedis/api/config.md | 84 + docs/content/v2.25/yedis/api/createdb.md | 77 + docs/content/v2.25/yedis/api/del.md | 73 + docs/content/v2.25/yedis/api/deletedb.md | 84 + docs/content/v2.25/yedis/api/echo.md | 33 + docs/content/v2.25/yedis/api/exists.md | 62 + docs/content/v2.25/yedis/api/expire.md | 53 + docs/content/v2.25/yedis/api/expireat.md | 53 + docs/content/v2.25/yedis/api/flushall.md | 87 + docs/content/v2.25/yedis/api/flushdb.md | 87 + docs/content/v2.25/yedis/api/get.md | 56 + docs/content/v2.25/yedis/api/getrange.md | 56 + docs/content/v2.25/yedis/api/getset.md | 48 + docs/content/v2.25/yedis/api/hdel.md | 87 + docs/content/v2.25/yedis/api/hexists.md | 57 + docs/content/v2.25/yedis/api/hget.md | 48 + docs/content/v2.25/yedis/api/hgetall.md | 61 + docs/content/v2.25/yedis/api/hincrby.md | 64 + docs/content/v2.25/yedis/api/hkeys.md | 57 + docs/content/v2.25/yedis/api/hlen.md | 56 + docs/content/v2.25/yedis/api/hmget.md | 50 + docs/content/v2.25/yedis/api/hmset.md | 49 + docs/content/v2.25/yedis/api/hset.md | 86 + docs/content/v2.25/yedis/api/hstrlen.md | 48 + docs/content/v2.25/yedis/api/hvals.md | 49 + docs/content/v2.25/yedis/api/incr.md | 48 + docs/content/v2.25/yedis/api/incrby.md | 48 + docs/content/v2.25/yedis/api/keys.md | 70 + docs/content/v2.25/yedis/api/listdb.md | 93 + docs/content/v2.25/yedis/api/monitor.md | 45 + docs/content/v2.25/yedis/api/pexpire.md | 53 + docs/content/v2.25/yedis/api/pexpireat.md | 53 + docs/content/v2.25/yedis/api/psetex.md | 53 + docs/content/v2.25/yedis/api/psubscribe.md | 29 + docs/content/v2.25/yedis/api/pttl.md | 53 + docs/content/v2.25/yedis/api/publish.md | 41 + docs/content/v2.25/yedis/api/pubsub.md | 38 + docs/content/v2.25/yedis/api/punsubscribe.md | 29 + docs/content/v2.25/yedis/api/rename.md | 34 + docs/content/v2.25/yedis/api/role.md | 61 + docs/content/v2.25/yedis/api/sadd.md | 89 + docs/content/v2.25/yedis/api/scard.md | 58 + docs/content/v2.25/yedis/api/select.md | 103 + docs/content/v2.25/yedis/api/set.md | 48 + docs/content/v2.25/yedis/api/setex.md | 52 + docs/content/v2.25/yedis/api/setrange.md | 58 + docs/content/v2.25/yedis/api/sismember.md | 57 + docs/content/v2.25/yedis/api/smembers.md | 57 + docs/content/v2.25/yedis/api/srem.md | 65 + docs/content/v2.25/yedis/api/strlen.md | 56 + docs/content/v2.25/yedis/api/subscribe.md | 31 + docs/content/v2.25/yedis/api/tsadd.md | 118 + docs/content/v2.25/yedis/api/tscard.md | 75 + docs/content/v2.25/yedis/api/tsget.md | 114 + docs/content/v2.25/yedis/api/tslastn.md | 91 + docs/content/v2.25/yedis/api/tsrangebytime.md | 113 + docs/content/v2.25/yedis/api/tsrem.md | 110 + .../v2.25/yedis/api/tsrevrangebytime.md | 141 + docs/content/v2.25/yedis/api/ttl.md | 55 + docs/content/v2.25/yedis/api/unsubscribe.md | 30 + docs/content/v2.25/yedis/api/zadd.md | 141 + docs/content/v2.25/yedis/api/zcard.md | 65 + docs/content/v2.25/yedis/api/zrange.md | 91 + docs/content/v2.25/yedis/api/zrangebyscore.md | 99 + docs/content/v2.25/yedis/api/zrem.md | 66 + docs/content/v2.25/yedis/api/zrevrange.md | 100 + docs/content/v2.25/yedis/api/zscore.md | 55 + docs/content/v2.25/yedis/develop/_index.md | 31 + .../yedis/develop/client-drivers/_index.md | 105 + .../yedis/develop/client-drivers/yedis/cpp.md | 133 + .../develop/client-drivers/yedis/csharp.md | 115 + .../yedis/develop/client-drivers/yedis/go.md | 84 + .../develop/client-drivers/yedis/java.md | 158 + .../develop/client-drivers/yedis/nodejs.md | 71 + .../develop/client-drivers/yedis/python.md | 64 + .../content/v2.25/yedis/quick-start/_index.md | 188 ++ .../yedis/quick-start/binary/test-yedis.md | 34 + .../yedis/quick-start/docker/test-yedis.md | 28 + .../quick-start/kubernetes/test-yedis.md | 40 + docs/content/v2.25/yugabyte-cloud/_index.md | 154 + .../yugabyte-cloud/cloud-admin/_index.md | 35 + .../cloud-billing-costs-classic.md | 275 ++ .../cloud-admin/cloud-billing-costs.md | 218 ++ .../cloud-admin/cloud-billing-profile.md | 157 + .../yugabyte-cloud/cloud-basics/_index.md | 77 + .../cloud-basics/cloud-vpcs/_index.md | 80 + .../cloud-vpcs/cloud-add-endpoint.md | 77 + .../cloud-vpcs/cloud-add-peering.md | 43 + .../cloud-vpcs/cloud-add-vpc-aws.md | 193 ++ .../cloud-vpcs/cloud-add-vpc-gcp.md | 162 + .../cloud-basics/cloud-vpcs/cloud-add-vpc.md | 97 + .../cloud-vpcs/cloud-vpc-intro.md | 177 + .../cloud-vpcs/managed-endpoint-aws.md | 265 ++ .../cloud-vpcs/managed-endpoint-azure.md | 350 ++ .../cloud-basics/create-clusters-overview.md | 347 ++ .../cloud-basics/create-clusters-topology.md | 267 ++ .../cloud-basics/create-clusters/_index.md | 40 + .../create-clusters/create-clusters-free.md | 112 + .../create-clusters-geopartition.md | 142 + .../create-clusters-multisync.md | 136 + .../create-clusters/create-single-region.md | 128 + .../include-general-settings.md | 19 + .../include-security-settings.md | 33 + .../create-clusters/network-access.md | 39 + .../yugabyte-cloud/cloud-clusters/_index.md | 120 + .../cloud-clusters/add-extensions.md | 46 + .../yugabyte-cloud/cloud-clusters/aeon-cdc.md | 292 ++ .../cloud-clusters/aeon-pitr.md | 128 + .../cloud-clusters/backup-clusters.md | 189 ++ .../cloud-clusters/cloud-maintenance.md | 91 + .../cloud-clusters/configure-clusters.md | 134 + .../disaster-recovery/_index.md | 94 + .../disaster-recovery-failover.md | 48 + .../disaster-recovery-setup.md | 228 ++ .../disaster-recovery-switchover.md | 46 + .../disaster-recovery-tables.md | 90 + .../cloud-clusters/managed-read-replica.md | 87 + .../yugabyte-cloud/cloud-connect/_index.md | 46 + .../cloud-connect/connect-applications.md | 135 + .../cloud-connect/connect-client-shell.md | 111 + .../cloud-connect/connect-cloud-shell.md | 117 + .../cloud-connect/connect/ycql.md | 45 + .../cloud-connect/connect/ysql.md | 49 + .../yugabyte-cloud/cloud-monitor/_index.md | 64 + .../cloud-monitor/cloud-advisor.md | 130 + .../cloud-monitor/cloud-alerts.md | 301 ++ .../cloud-monitor/cloud-queries-live.md | 41 + .../cloud-monitor/cloud-queries-slow.md | 61 + .../cloud-monitor/logging-export.md | 173 + .../cloud-monitor/managed-integrations.md | 243 ++ .../cloud-monitor/metrics-export.md | 38 + .../cloud-monitor/monitor-activity.md | 35 + .../cloud-monitor/monitor-nodes.md | 23 + .../cloud-monitor/monitor-tables.md | 27 + .../yugabyte-cloud/cloud-monitor/overview.md | 219 ++ .../yugabyte-cloud/cloud-quickstart/_index.md | 131 + .../cloud-quickstart/qs-explore.md | 85 + .../cloud-secure-clusters/_index.md | 73 + .../cloud-secure-clusters/add-connections.md | 103 + .../cloud-secure-clusters/add-users.md | 107 + .../cloud-secure-clusters/cloud-activity.md | 51 + .../cloud-authentication.md | 66 + .../cloud-secure-clusters/cloud-users.md | 91 + .../cloud-secure-clusters/managed-ear.md | 188 ++ .../yugabyte-cloud/cloud-security/_index.md | 43 + .../cloud-security/cloud-security-features.md | 48 + .../cloud-security/shared-responsibility.md | 23 + .../yugabyte-cloud/cloud-troubleshoot.md | 171 + .../managed-automation/_index.md | 44 + .../managed-automation/managed-api.md | 34 + .../managed-automation/managed-apikeys.md | 63 + .../managed-automation/managed-cli/_index.md | 60 + .../managed-cli-examples/_index.md | 41 + .../managed-cli-example-create.md | 479 +++ .../managed-cli-examples/managed-guide-api.md | 268 ++ .../managed-cli/managed-cli-overview.md | 178 + .../managed-cli-reference/_index.md | 138 + .../managed-cli-api-key.md | 71 + .../managed-cli-reference/managed-cli-auth.md | 39 + .../managed-cli-backup-policy.md | 77 + .../managed-cli-backup.md | 72 + .../managed-cli-cluster.md | 185 ++ .../managed-cli-db-audit-logging.md | 108 + .../managed-cli-db-audit-logs-exporter.md | 76 + .../managed-cli-db-query-logging.md | 123 + .../managed-cli-integration.md | 61 + .../managed-cli-metrics-exporter.md | 136 + .../managed-cli-network-allow-list.md | 63 + .../managed-cli-network.md | 106 + .../managed-cli-peering.md | 72 + .../managed-cli-permission.md | 35 + .../managed-cli-read-replica.md | 77 + .../managed-cli-region.md | 208 ++ .../managed-cli-reference/managed-cli-role.md | 93 + .../managed-cli-usage.md | 47 + .../managed-cli-reference/managed-cli-user.md | 71 + .../managed-cli-reference/managed-cli-vpc.md | 62 + .../managed-automation/managed-terraform.md | 34 + .../v2.25/yugabyte-cloud/managed-freetrial.md | 44 + .../v2.25/yugabyte-cloud/managed-labs.md | 38 + .../yugabyte-cloud/managed-security/_index.md | 55 + .../managed-security/manage-access.md | 78 + .../managed-authentication/_index.md | 39 + .../federated-custom.md | 102 + .../managed-authentication/federated-entra.md | 91 + .../managed-authentication/federated-jump.md | 110 + .../managed-authentication/federated-okta.md | 101 + .../managed-authentication/federated-ping.md | 108 + .../managed-authentication/social-login.md | 28 + .../managed-security/managed-roles.md | 97 + .../v2.25/yugabyte-cloud/release-notes.md | 700 ++++ .../content/v2.25/yugabyte-platform/_index.md | 98 + .../administer-yugabyte-platform/_index.md | 57 + .../anywhere-rbac.md | 146 + .../back-up-restore-installer.md | 120 + .../back-up-restore-k8s.md | 125 + .../back-up-restore-yp.md | 102 + .../high-availability.md | 321 ++ .../ldap-authentication.md | 278 ++ .../manage-runtime-config.md | 50 + .../oidc-authentication.md | 120 + .../administer-yugabyte-platform/shutdown.md | 44 + .../uninstall-software.md | 264 ++ .../alerts-monitoring/_index.md | 84 + .../alert-policy-templates.md | 768 +++++ .../alerts-monitoring/alert.md | 40 + .../anywhere-export-configuration.md | 141 + .../alerts-monitoring/anywhere-metrics.md | 351 ++ .../alerts-monitoring/latency-histogram.md | 47 + .../live-queries-dashboard.md | 70 + .../alerts-monitoring/performance-advisor.md | 166 + .../prometheus-custom/_index.md | 38 + .../prometheus-custom/prometheus-federate.md | 40 + .../prometheus-custom/prometheus-scrape.md | 95 + .../set-up-alerts-health-check.md | 198 ++ .../slow-queries-dashboard.md | 78 + .../alerts-monitoring/universe-logging.md | 84 + .../anywhere-automation/_index.md | 51 + .../anywhere-automation/anywhere-api.md | 34 + .../anywhere-automation/anywhere-cli.md | 259 ++ .../anywhere-automation/anywhere-terraform.md | 34 + .../yb-kubernetes-operator.md | 631 ++++ .../back-up-restore-universes/_index.md | 90 + .../back-up-universe-data.md | 172 + .../configure-backup-storage.md | 204 ++ .../disaster-recovery/_index.md | 161 + .../disaster-recovery-failover.md | 81 + .../disaster-recovery-setup.md | 343 ++ .../disaster-recovery-switchover.md | 66 + .../disaster-recovery-tables.md | 180 + .../back-up-restore-universes/pitr.md | 89 + .../restore-universe-data.md | 309 ++ .../restore-ysql-single-table.md | 132 + .../schedule-data-backups.md | 94 + .../configure-yugabyte-platform/_index.md | 34 + .../configure-yugabyte-platform/aws.md | 241 ++ .../configure-yugabyte-platform/azure.md | 284 ++ .../configure-yugabyte-platform/gcp.md | 228 ++ .../configure-yugabyte-platform/kubernetes.md | 538 +++ .../on-premises-nodes.md | 93 + .../on-premises-provider.md | 109 + .../on-premises-script.md | 81 + .../on-premises.md | 101 + .../configure-yugabyte-platform/openshift.md | 219 ++ .../vmware-tanzu.md | 229 ++ .../create-deployments/_index.md | 65 + .../create-deployments/connect-to-universe.md | 361 ++ .../create-universe-multi-cloud.md | 152 + .../create-universe-multi-region.md | 130 + .../create-universe-multi-zone-kubernetes.md | 449 +++ .../create-universe-multi-zone.md | 184 ++ .../create-deployments/dedicated-master.md | 74 + .../create-deployments/read-replicas.md | 85 + .../install-yugabyte-platform/_index.md | 55 + .../create-admin-user.md | 51 + .../install-software/installer.md | 548 ++++ .../install-software/kubernetes.md | 541 +++ .../install-software/openshift.md | 325 ++ .../migrate-replicated.md | 138 + .../manage-deployments/_index.md | 59 + .../manage-deployments/delete-universe.md | 38 + .../manage-deployments/edit-config-flags.md | 91 + .../manage-deployments/edit-helm-overrides.md | 48 + .../manage-deployments/edit-universe.md | 93 + .../manage-deployments/instance-tags.md | 55 + .../manage-deployments/remove-nodes.md | 278 ++ .../manage-deployments/retry-failed-task.md | 39 + .../manage-deployments/upgrade-nodes-csp.md | 67 + .../manage-deployments/upgrade-nodes.md | 104 + .../upgrade-software-install.md | 147 + .../upgrade-software-prepare.md | 90 + .../manage-deployments/upgrade-software.md | 72 + .../xcluster-replication/_index.md | 102 + .../bidirectional-replication.md | 100 + .../xcluster-replication-ddl.md | 130 + .../xcluster-replication-setup.md | 302 ++ .../manage-deployments/ybdb-releases.md | 100 + .../v2.25/yugabyte-platform/prepare/_index.md | 52 + .../prepare/cloud-permissions/_index.md | 41 + .../cloud-permissions-ear.md | 128 + .../cloud-permissions-nodes-aws.md | 183 ++ .../cloud-permissions-nodes-azure.md | 110 + .../cloud-permissions-nodes-gcp.md | 116 + .../cloud-permissions-nodes-k8s.md | 171 + .../cloud-permissions-nodes.md | 59 + .../cloud-permissions-storage.md | 129 + .../cloud-permissions-yba.md | 103 + .../prepare/networking-kubernetes.md | 79 + .../yugabyte-platform/prepare/networking.md | 175 + .../prepare/server-nodes-hardware.md | 154 + .../prepare/server-nodes-software/_index.md | 201 ++ .../software-cloud-provider.md | 65 + .../software-kubernetes.md | 226 ++ .../software-on-prem-assist.md | 90 + .../software-on-prem-auto.md | 83 + .../software-on-prem-legacy.md | 60 + .../software-on-prem-manual.md | 850 +++++ .../server-nodes-software/software-on-prem.md | 211 ++ .../yugabyte-platform/prepare/server-nodes.md | 37 + .../prepare/server-yba-kubernetes.md | 73 + .../yugabyte-platform/prepare/server-yba.md | 124 + .../yugabyte-platform/security/_index.md | 66 + .../security/authentication/_index.md | 38 + .../ldap-authentication-platform.md | 142 + .../authentication/oidc-authentication-aad.md | 261 ++ .../oidc-authentication-jumpcloud.md | 198 ++ .../oidc-manage-users-include.md | 30 + .../security/authorization-platform.md | 96 + .../security/create-kms-config/aws-kms.md | 126 + .../security/create-kms-config/azure-kms.md | 113 + .../security/create-kms-config/google-kms.md | 109 + .../create-kms-config/hashicorp-kms.md | 223 ++ .../security/enable-encryption-at-rest.md | 145 + .../enable-encryption-in-transit/_index.md | 94 + .../add-certificate-ca.md | 113 + .../add-certificate-hashicorp.md | 189 ++ .../add-certificate-kubernetes.md | 82 + .../add-certificate-self.md | 107 + .../auto-certificate.md | 97 + .../rotate-certificates.md | 114 + .../trust-store.md | 54 + .../yugabyte-platform/troubleshoot/_index.md | 51 + .../cloud-provider-config-issues.md | 32 + .../install-upgrade-issues/installer.md | 105 + .../install-upgrade-issues/kubernetes.md | 429 +++ .../troubleshoot/install-upgrade-issues/vm.md | 92 + .../troubleshoot/ldap-issues.md | 33 + .../troubleshoot/node-alerts.md | 94 + .../troubleshoot/universe-issues.md | 506 +++ .../v2.25/yugabyte-platform/upgrade/_index.md | 38 + .../upgrade/prepare-to-upgrade.md | 65 + .../upgrade/upgrade-yp-installer.md | 76 + .../upgrade/upgrade-yp-kubernetes.md | 158 + .../upgrade/upgrade-yp-replicated.md | 103 + .../upgrade/upgrade-yp-xcluster-ybadmin.md | 67 + .../yugabyte-platform/yba-overview-install.md | 90 + .../v2.25/yugabyte-platform/yba-overview.md | 119 + docs/content/v2.25/yugabyte-voyager/_index.md | 55 + docs/content/v2.25/yugabyte-voyager/docker.md | 29 + docs/content/v2.25/yugabyte-voyager/github.md | 45 + .../yugabyte-voyager/install-yb-voyager.md | 396 +++ .../v2.25/yugabyte-voyager/introduction.md | 76 + .../yugabyte-voyager/known-issues/_index.md | 128 + .../yugabyte-voyager/known-issues/mysql.md | 959 ++++++ .../yugabyte-voyager/known-issues/oracle.md | 661 ++++ .../known-issues/postgresql.md | 1984 +++++++++++ docs/content/v2.25/yugabyte-voyager/macos.md | 49 + .../v2.25/yugabyte-voyager/migrate/_index.md | 55 + .../migrate/assess-migration.md | 332 ++ .../migrate/bulk-data-load.md | 524 +++ .../migrate/live-fall-back.md | 1681 ++++++++++ .../migrate/live-fall-forward.md | 1608 +++++++++ .../yugabyte-voyager/migrate/live-migrate.md | 1330 ++++++++ .../yugabyte-voyager/migrate/migrate-steps.md | 793 +++++ .../v2.25/yugabyte-voyager/migrate/mysql.md | 60 + .../v2.25/yugabyte-voyager/migrate/oracle.md | 106 + .../yugabyte-voyager/migrate/postgresql.md | 30 + .../yugabyte-voyager/reference/_index.md | 48 + .../reference/assess-migration.md | 277 ++ .../bulk-data-load/import-data-file.md | 581 ++++ .../reference/configuration-file.md | 248 ++ .../cutover-archive/archive-changes.md | 98 + .../reference/cutover-archive/cutover.md | 231 ++ .../reference/data-migration/export-data.md | 586 ++++ .../reference/data-migration/import-data.md | 758 +++++ .../reference/datatype-mapping-mysql.md | 72 + .../reference/datatype-mapping-oracle.md | 94 + .../reference/diagnostics-report.md | 233 ++ .../reference/end-migration.md | 105 + .../reference/non-superuser.md | 197 ++ .../yugabyte-voyager/reference/performance.md | 68 + .../schema-migration/analyze-schema.md | 79 + .../schema-migration/export-schema.md | 253 ++ .../finalize-schema-post-data-import.md | 196 ++ .../schema-migration/import-schema.md | 251 ++ .../reference/yb-voyager-cli.md | 109 + ...-voyager-pg-grant-migration-permissions.md | 163 + .../v2.25/yugabyte-voyager/release-notes.md | 804 +++++ docs/content/v2.25/yugabyte-voyager/rhel.md | 187 ++ docs/content/v2.25/yugabyte-voyager/ubuntu.md | 82 + .../yugabyte-voyager/voyager-troubleshoot.md | 38 + 1649 files changed, 313062 insertions(+) create mode 100644 docs/content/v2.25/_index.md create mode 100644 docs/content/v2.25/additional-features/_index.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/_index.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/_index.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-configuration.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-topic.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/best-practices.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/get-started.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/key-concepts.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/monitor.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/transformers.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector-properties.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-monitor.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/debezium-connector-yugabytedb.md create mode 100644 docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/yugabytedb-grpc-transformers.md create mode 100644 docs/content/v2.25/additional-features/colocation.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/_index.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/ycm-best-practices.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/ycm-migrate.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/ycm-monitor.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/ycm-setup.md create mode 100644 docs/content/v2.25/additional-features/connection-manager-ysql/ycm-troubleshoot.md create mode 100644 docs/content/v2.25/admin/_index.md create mode 100644 docs/content/v2.25/admin/yb-admin.md create mode 100644 docs/content/v2.25/admin/yb-ctl.md create mode 100644 docs/content/v2.25/admin/yb-docker-ctl.md create mode 100644 docs/content/v2.25/admin/yb-ts-cli.md create mode 100644 docs/content/v2.25/admin/ysql-dump.md create mode 100644 docs/content/v2.25/admin/ysql-dumpall.md create mode 100644 docs/content/v2.25/api/_index.md create mode 100644 docs/content/v2.25/api/ycql/_index.md create mode 100644 docs/content/v2.25/api/ycql/batch.md create mode 100644 docs/content/v2.25/api/ycql/ddl_alter_keyspace.md create mode 100644 docs/content/v2.25/api/ycql/ddl_alter_role.md create mode 100644 docs/content/v2.25/api/ycql/ddl_alter_table.md create mode 100644 docs/content/v2.25/api/ycql/ddl_create_index.md create mode 100644 docs/content/v2.25/api/ycql/ddl_create_keyspace.md create mode 100644 docs/content/v2.25/api/ycql/ddl_create_role.md create mode 100644 docs/content/v2.25/api/ycql/ddl_create_table.md create mode 100644 docs/content/v2.25/api/ycql/ddl_create_type.md create mode 100644 docs/content/v2.25/api/ycql/ddl_drop_index.md create mode 100644 docs/content/v2.25/api/ycql/ddl_drop_keyspace.md create mode 100644 docs/content/v2.25/api/ycql/ddl_drop_role.md create mode 100644 docs/content/v2.25/api/ycql/ddl_drop_table.md create mode 100644 docs/content/v2.25/api/ycql/ddl_drop_type.md create mode 100644 docs/content/v2.25/api/ycql/ddl_grant_permission.md create mode 100644 docs/content/v2.25/api/ycql/ddl_grant_role.md create mode 100644 docs/content/v2.25/api/ycql/ddl_revoke_permission.md create mode 100644 docs/content/v2.25/api/ycql/ddl_revoke_role.md create mode 100644 docs/content/v2.25/api/ycql/ddl_use.md create mode 100644 docs/content/v2.25/api/ycql/dml_delete.md create mode 100644 docs/content/v2.25/api/ycql/dml_insert.md create mode 100644 docs/content/v2.25/api/ycql/dml_select.md create mode 100644 docs/content/v2.25/api/ycql/dml_transaction.md create mode 100644 docs/content/v2.25/api/ycql/dml_truncate.md create mode 100644 docs/content/v2.25/api/ycql/dml_update.md create mode 100644 docs/content/v2.25/api/ycql/explain.md create mode 100644 docs/content/v2.25/api/ycql/expr_fcall.md create mode 100644 docs/content/v2.25/api/ycql/expr_ocall.md create mode 100644 docs/content/v2.25/api/ycql/expr_simple.md create mode 100644 docs/content/v2.25/api/ycql/expr_subscript.md create mode 100644 docs/content/v2.25/api/ycql/function_datetime.md create mode 100644 docs/content/v2.25/api/ycql/grammar_diagrams.md create mode 100644 docs/content/v2.25/api/ycql/syntax_resources/ycql_grammar.ebnf create mode 100644 docs/content/v2.25/api/ycql/type_blob.md create mode 100644 docs/content/v2.25/api/ycql/type_bool.md create mode 100644 docs/content/v2.25/api/ycql/type_collection.md create mode 100644 docs/content/v2.25/api/ycql/type_datetime.md create mode 100644 docs/content/v2.25/api/ycql/type_frozen.md create mode 100644 docs/content/v2.25/api/ycql/type_inet.md create mode 100644 docs/content/v2.25/api/ycql/type_int.md create mode 100644 docs/content/v2.25/api/ycql/type_jsonb.md create mode 100644 docs/content/v2.25/api/ycql/type_number.md create mode 100644 docs/content/v2.25/api/ycql/type_text.md create mode 100644 docs/content/v2.25/api/ycql/type_uuid.md create mode 100644 docs/content/v2.25/api/ycqlsh.md create mode 100644 docs/content/v2.25/api/ysql/_index.md create mode 100644 docs/content/v2.25/api/ysql/cursors.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/array-constructor.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/array-of-domains.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/any-all.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-agg-unnest.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-fill.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-position.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-remove.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-to-string.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/comparison.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/concatenation.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/properties.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/replace-a-value.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/slice-operator.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/string-to-array.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/literals/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-primitive-values.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-rows.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/literals/row.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/literals/text-typecasting-and-literals.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_array/looping-through-arrays.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_binary.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_bool.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_character.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/conceptual-background.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-date.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/custom-interval-domains.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/declaring-intervals.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/interval-interval-addition.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/interval-interval-comparison.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/interval-number-multiplication.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/moment-interval-overloads-of-plus-and-minus.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/moment-moment-overloads-of-minus.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-limits.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/ad-hoc-examples.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/internal-representation-model.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-utilities.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/justfy-and-extract-epoch.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-time.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-timestamp.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/download-date-time-utilities.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/formatting-functions.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/creating-date-time-values.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/current-date-time-moment.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/delaying-execution.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/manipulating-date-time-values.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/miscellaneous/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/miscellaneous/age.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/miscellaneous/extract.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/functions/miscellaneous/overlaps.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/test-date-time-addition-overloads.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/test-date-time-comparison-overloads.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/test-date-time-division-overloads.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/test-date-time-multiplication-overloads.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/operators/test-date-time-subtraction-overloads.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/stopwatch.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/catalog-views.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/canonical-no-country-no-dst.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/canonical-real-country-no-dst.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/canonical-real-country-with-dst.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/unrestricted-full-projection.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/recommendation.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/syntax-contexts-to-spec-offset.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/timestamptz-interval-day-arithmetic.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/timestamptz-plain-timestamp-conversion.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/helper-functions.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/rule-1.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/rule-2.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/rule-3.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/rule-4.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/toc.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/typecasting-between-date-time-and-text.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_datetime/typecasting-between-date-time-values.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/code-example-conventions.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/create-indexes-check-constraints.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/_index.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/array-to-json.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/concatenation-operator.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/containment-operators.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/equality-operator.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-agg.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-array-elements-text.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-array-elements.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-array-length.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-build-array.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-build-object.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-each-text.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-each.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-extract-path-text.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-extract-path.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-object-agg.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-object-keys.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-object.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-populate-record.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-populate-recordset.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-pretty.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-set-jsonb-insert.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-strip-nulls.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-to-record.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-to-recordset.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/jsonb-typeof.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/key-or-value-existence-operators.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/remove-operators.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/row-to-json.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/subvalue-operators.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/to-jsonb.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/functions-operators/typecast-operators.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/json-literals.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_json/primitive-and-compound-data-types.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_money.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_numeric.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_range.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_serial.md create mode 100644 docs/content/v2.25/api/ysql/datatypes/type_uuid.md create mode 100644 docs/content/v2.25/api/ysql/exprs/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/case-study-the-68-95-997-rule.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/analysis-queries-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/synthetic-data-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/daily-regression-analysis.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/scatter-plot-for-2020-10-21.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/symptoms-vs-mask-wearing-by-state.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/download-the-covidcast-data.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/check-data-conforms-to-the-rules.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/cr-assert-assumptions-ok-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/cr-cr-copy-from-csv-scripts-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/cr-cr-staging-tables-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/cr-xform-to-joined-table-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/ingest-the-data-sql.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/inspect-the-csv-files.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/join-the-staged-data.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/stage-the-csv-files.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/array-string-jsonb-jsonb-object-agg.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/bit-and-or-bool-and-or.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/covar-corr.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/regr.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/mode-percentile-disc-percentile-cont.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/function-syntax-semantics/variance-stddev.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/functionality-overview.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/grouping-sets-rollup-cube.md create mode 100644 docs/content/v2.25/api/ysql/exprs/aggregate_functions/invocation-syntax-semantics.md create mode 100644 docs/content/v2.25/api/ysql/exprs/func_gen_random_uuid.md create mode 100644 docs/content/v2.25/api/ysql/exprs/func_yb_hash_code.md create mode 100644 docs/content/v2.25/api/ysql/exprs/func_yb_index_check.md create mode 100644 docs/content/v2.25/api/ysql/exprs/geo_partitioning_helper_functions/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/geo_partitioning_helper_functions/func_yb_is_local_table.md create mode 100644 docs/content/v2.25/api/ysql/exprs/geo_partitioning_helper_functions/func_yb_server_cloud.md create mode 100644 docs/content/v2.25/api/ysql/exprs/geo_partitioning_helper_functions/func_yb_server_region.md create mode 100644 docs/content/v2.25/api/ysql/exprs/geo_partitioning_helper_functions/func_yb_server_zone.md create mode 100644 docs/content/v2.25/api/ysql/exprs/sequence_functions/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/sequence_functions/func_currval.md create mode 100644 docs/content/v2.25/api/ysql/exprs/sequence_functions/func_lastval.md create mode 100644 docs/content/v2.25/api/ysql/exprs/sequence_functions/func_nextval.md create mode 100644 docs/content/v2.25/api/ysql/exprs/sequence_functions/func_setval.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/bucket-allocation.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-bucket-dedicated-code.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-bucket-using-width-bucket.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-do-cume-dist.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-do-ntile.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-do-percent-rank.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-dp-views.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-histogram.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-int-views.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-pr-cd-equality-report.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/cr-show-t4.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-assert-bucket-ok.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-clean-start.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-compare-dp-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-demo.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-populate-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/do-report-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/compare-dp-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/dp-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/histogram-report.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/int-results.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/_index.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/table-t1.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/table-t2.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/table-t3.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/table-t4.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/first-value-nth-value-last-value.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/lag-lead.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/percent-rank-cume-dist-ntile.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/function-syntax-semantics/row-number-rank-dense-rank.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/functionality-overview.md create mode 100644 docs/content/v2.25/api/ysql/exprs/window_functions/invocation-syntax-semantics.md create mode 100644 docs/content/v2.25/api/ysql/keywords.md create mode 100644 docs/content/v2.25/api/ysql/name-resolution-in-top-level-sql.md create mode 100644 docs/content/v2.25/api/ysql/names-and-identifiers.md create mode 100644 docs/content/v2.25/api/ysql/pg15-features.md create mode 100644 docs/content/v2.25/api/ysql/reserved_names.md create mode 100644 docs/content/v2.25/api/ysql/sql-feature-support.md create mode 100644 docs/content/v2.25/api/ysql/syntax_resources/grammar_diagrams.md create mode 100644 docs/content/v2.25/api/ysql/syntax_resources/ysql_grammar.ebnf create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/creating-temporary-schema-objects-of-all-kinds.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/globality-of-metadata-and-privacy-of-use-of-temp-objects.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/on-demand-paradigm-for-creating-temporary-objects.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/temporary-tables-views-sequences-and-indexes.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_analyze.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_call.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_copy.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_do.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_reset.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_set.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/cmd_show.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_alter_default_privileges.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_alter_group.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_alter_policy.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_alter_role.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_alter_user.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_create_group.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_create_policy.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_create_role.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_create_user.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_drop_group.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_drop_owned.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_drop_policy.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_drop_role.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_drop_user.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_grant.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_reassign_owned.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_revoke.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_set_role.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dcl_set_session_authorization.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_db.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_domain.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_foreign_data_wrapper.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_foreign_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_function.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_matview.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_procedure.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_publication.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_schema.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_sequence.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_server.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_alter_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_comment.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_aggregate.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_cast.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_database.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_domain.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_extension.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_foreign_data_wrapper.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_foreign_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_function.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_matview.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_operator.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_operator_class.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_procedure.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_publication.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_rule.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_schema.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_sequence.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_server.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_table_as.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_tablespace.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_trigger.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_type.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_user_mapping.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_create_view.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_aggregate.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_cast.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_database.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_domain.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_extension.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_foreign_data_wrapper.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_foreign_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_function.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_matview.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_operator.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_operator_class.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_procedure.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_publication.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_rule.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_schema.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_sequence.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_server.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_table.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_tablespace.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_trigger.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_type.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_drop_view.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_import_foreign_schema.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_refresh_matview.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/ddl_truncate.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_close.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_declare.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_delete.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_fetch.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_insert.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_move.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_select.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_update.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/dml_values.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/perf_deallocate.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/perf_execute.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/perf_explain.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/perf_prepare.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/savepoint_create.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/savepoint_release.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/savepoint_rollback.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/streaming_create_repl_slot.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/streaming_drop_repl_slot.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/streaming_start_replication.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_abort.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_begin.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_commit.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_end.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_lock.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_rollback.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_set.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_set_constraints.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_show.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/statements/txn_start.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/bacon-numbers/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/bacon-numbers/imdb-data.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/bacon-numbers/synthetic-data.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/emps-hierarchy.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/recursive-cte.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/_index.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/common-code.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/directed-acyclic-graph.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/directed-cyclic-graph.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/graph-representation.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/rooted-tree.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/stress-test.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/undirected-cyclic-graph.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/traversing-general-graphs/unq-containing-paths.md create mode 100644 docs/content/v2.25/api/ysql/the-sql-language/with-clause/with-clause-syntax-semantics.md create mode 100644 docs/content/v2.25/api/ysql/txn-model-for-top-level-sql.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/commit-in-user-defined-subprograms.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-execution-model.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/declaration-section.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/exception-section.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/assert.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/cursor-manipulation.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/doing-sql-from-plpgsql.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/get-diagnostics.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/raise.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/return-statement.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/case-statement.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/if-statement.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/array-foreach-loop.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/exit-from-block-statememt.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/infinite-and-while-loops.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/integer-for-loop.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/query-for-loop.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/two-case-studies.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/provisioning-roles-for-current-database.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/language-sql-subprograms.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/name-resolution-in-subprograms.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/pg-proc-catalog-table.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-function-only-attributes/_index.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-function-only-attributes/immutable-function-examples.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-subprogram-attributes.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/depends-on-extension-semantics.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-overloading.md create mode 100644 docs/content/v2.25/api/ysql/user-defined-subprograms-and-anon-blocks/variadic-and-polymorphic-subprograms.md create mode 100644 docs/content/v2.25/api/ysqlsh-meta-commands.md create mode 100644 docs/content/v2.25/api/ysqlsh-meta-examples.md create mode 100644 docs/content/v2.25/api/ysqlsh-pset-options.md create mode 100644 docs/content/v2.25/api/ysqlsh.md create mode 100644 docs/content/v2.25/architecture/_index.md create mode 100644 docs/content/v2.25/architecture/design-goals.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/_index.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/async-replication.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/cdc-logical-replication.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/change-data-capture.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/raft.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/read-replicas.md create mode 100644 docs/content/v2.25/architecture/docdb-replication/replication.md create mode 100644 docs/content/v2.25/architecture/docdb-sharding/_index.md create mode 100644 docs/content/v2.25/architecture/docdb-sharding/sharding.md create mode 100644 docs/content/v2.25/architecture/docdb-sharding/tablet-splitting.md create mode 100644 docs/content/v2.25/architecture/docdb/_index.md create mode 100644 docs/content/v2.25/architecture/docdb/data-model.md create mode 100644 docs/content/v2.25/architecture/docdb/lsm-sst.md create mode 100644 docs/content/v2.25/architecture/docdb/packed-rows.md create mode 100644 docs/content/v2.25/architecture/docdb/performance.md create mode 100644 docs/content/v2.25/architecture/key-concepts.md create mode 100644 docs/content/v2.25/architecture/query-layer/_index.md create mode 100644 docs/content/v2.25/architecture/query-layer/join-strategies.md create mode 100644 docs/content/v2.25/architecture/query-layer/planner-optimizer.md create mode 100644 docs/content/v2.25/architecture/system-catalog.md create mode 100644 docs/content/v2.25/architecture/transactions/_index.md create mode 100644 docs/content/v2.25/architecture/transactions/concurrency-control.md create mode 100644 docs/content/v2.25/architecture/transactions/distributed-txns.md create mode 100644 docs/content/v2.25/architecture/transactions/isolation-levels.md create mode 100644 docs/content/v2.25/architecture/transactions/read-committed.md create mode 100644 docs/content/v2.25/architecture/transactions/read-restart-error.md create mode 100644 docs/content/v2.25/architecture/transactions/single-row-transactions.md create mode 100644 docs/content/v2.25/architecture/transactions/transaction-priorities.md create mode 100644 docs/content/v2.25/architecture/transactions/transactional-io-path.md create mode 100644 docs/content/v2.25/architecture/transactions/transactions-overview.md create mode 100644 docs/content/v2.25/architecture/yb-master.md create mode 100644 docs/content/v2.25/architecture/yb-tserver.md create mode 100644 docs/content/v2.25/benchmark/_index.md create mode 100644 docs/content/v2.25/benchmark/key-value-workload-ycql.md create mode 100644 docs/content/v2.25/benchmark/key-value-workload-ysql.md create mode 100644 docs/content/v2.25/benchmark/large-datasets-ycql.md create mode 100644 docs/content/v2.25/benchmark/large-datasets-ysql.md create mode 100644 docs/content/v2.25/benchmark/resilience/_index.md create mode 100644 docs/content/v2.25/benchmark/resilience/jepsen-testing.md create mode 100644 docs/content/v2.25/benchmark/scalability/_index.md create mode 100644 docs/content/v2.25/benchmark/scalability/scaling-queries-ycql.md create mode 100644 docs/content/v2.25/benchmark/scalability/scaling-queries-ysql.md create mode 100644 docs/content/v2.25/benchmark/sysbench-ysql.md create mode 100644 docs/content/v2.25/benchmark/tpcc/_index.md create mode 100644 docs/content/v2.25/benchmark/tpcc/high-scale-workloads.md create mode 100644 docs/content/v2.25/benchmark/tpcc/horizontal-scaling.md create mode 100644 docs/content/v2.25/benchmark/tpcc/running-tpcc.md create mode 100644 docs/content/v2.25/benchmark/ycsb-jdbc.md create mode 100644 docs/content/v2.25/benchmark/ycsb-ycql.md create mode 100644 docs/content/v2.25/benchmark/ycsb-ysql.md create mode 100644 docs/content/v2.25/best-practices-operations/_index.md create mode 100644 docs/content/v2.25/best-practices-operations/administration.md create mode 100644 docs/content/v2.25/best-practices-operations/ysql-catalog-cache-tuning-guide.md create mode 100644 docs/content/v2.25/best-practices-operations/ysql-yb-enable-cbo.md create mode 100644 docs/content/v2.25/contribute/_index.md create mode 100644 docs/content/v2.25/contribute/core-database/_index.md create mode 100644 docs/content/v2.25/contribute/core-database/build-and-test.md create mode 100644 docs/content/v2.25/contribute/core-database/build-from-src-almalinux.md create mode 100644 docs/content/v2.25/contribute/core-database/build-from-src-macos.md create mode 100644 docs/content/v2.25/contribute/core-database/build-from-src-ubuntu.md create mode 100644 docs/content/v2.25/contribute/core-database/checklist.md create mode 100644 docs/content/v2.25/contribute/core-database/clion-setup.md create mode 100644 docs/content/v2.25/contribute/core-database/coding-style.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/build-the-code.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/ccache.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/cmake.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/java.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/ninja.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/opt-yb-build.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/python.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/tldr.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/ulimit.md create mode 100644 docs/content/v2.25/contribute/core-database/includes/yugabyted-ui.md create mode 100644 docs/content/v2.25/contribute/core-database/merge-with-upstream-repositories.md create mode 100644 docs/content/v2.25/contribute/docs/_index.md create mode 100644 docs/content/v2.25/contribute/docs/all-page-elements.md create mode 100644 docs/content/v2.25/contribute/docs/docs-build.md create mode 100644 docs/content/v2.25/contribute/docs/docs-checklist.md create mode 100644 docs/content/v2.25/contribute/docs/docs-edit.md create mode 100644 docs/content/v2.25/contribute/docs/docs-editor-setup.md create mode 100644 docs/content/v2.25/contribute/docs/docs-layout.md create mode 100644 docs/content/v2.25/contribute/docs/docs-page-structure.md create mode 100644 docs/content/v2.25/contribute/docs/docs-style.md create mode 100644 docs/content/v2.25/contribute/docs/include-file.md create mode 100644 docs/content/v2.25/contribute/docs/include-markdown.md create mode 100644 docs/content/v2.25/contribute/docs/macos.md create mode 100644 docs/content/v2.25/contribute/docs/syntax-diagrams.md create mode 100644 docs/content/v2.25/contribute/docs/ubuntu.md create mode 100644 docs/content/v2.25/contribute/docs/widgets-and-shortcodes.md create mode 100644 docs/content/v2.25/deploy/_index.md create mode 100644 docs/content/v2.25/deploy/checklist.md create mode 100644 docs/content/v2.25/deploy/kubernetes/_index.md create mode 100644 docs/content/v2.25/deploy/kubernetes/best-practices.md create mode 100644 docs/content/v2.25/deploy/kubernetes/clients.md create mode 100644 docs/content/v2.25/deploy/kubernetes/multi-cluster/_index.md create mode 100644 docs/content/v2.25/deploy/kubernetes/multi-cluster/gke/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/multi-zone/_index.md create mode 100644 docs/content/v2.25/deploy/kubernetes/multi-zone/eks/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/multi-zone/gke/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/_index.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/aks/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/aks/statefulset-yaml.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/eks/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/gke/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/gke/statefulset-yaml-local-ssd.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/gke/statefulset-yaml.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/oss/helm-chart.md create mode 100644 docs/content/v2.25/deploy/kubernetes/single-zone/oss/yugabyte-operator.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/_index.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/install-software.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/start-masters.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/start-yugabyted.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/system-config.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/verify-deployment-yugabyted.md create mode 100644 docs/content/v2.25/deploy/manual-deployment/verify-deployment.md create mode 100644 docs/content/v2.25/deploy/multi-dc/3dc-deployment.md create mode 100644 docs/content/v2.25/deploy/multi-dc/_index.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/_index.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-deployment.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-replication-transactional.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-failover.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-setup-automatic.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-setup-manual.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-setup-semi-automatic.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-switchover.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/async-transactional-tables.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/includes/automatic-setup.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/includes/semi-automatic-setup.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/includes/transactional-add-db.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/includes/transactional-drop.md create mode 100644 docs/content/v2.25/deploy/multi-dc/async-replication/includes/transactional-remove-db.md create mode 100644 docs/content/v2.25/deploy/multi-dc/read-replica-clusters.md create mode 100644 docs/content/v2.25/deploy/public-clouds/_index.md create mode 100644 docs/content/v2.25/deploy/public-clouds/aws/cloudformation.md create mode 100644 docs/content/v2.25/deploy/public-clouds/aws/manual-deployment.md create mode 100644 docs/content/v2.25/deploy/public-clouds/aws/terraform.md create mode 100644 docs/content/v2.25/deploy/public-clouds/azure/aks.md create mode 100644 docs/content/v2.25/deploy/public-clouds/azure/azure-arm.md create mode 100644 docs/content/v2.25/deploy/public-clouds/azure/terraform.md create mode 100644 docs/content/v2.25/deploy/public-clouds/gcp/gcp-deployment-manager.md create mode 100644 docs/content/v2.25/deploy/public-clouds/gcp/gke.md create mode 100644 docs/content/v2.25/deploy/public-clouds/gcp/terraform.md create mode 100644 docs/content/v2.25/develop/_index.md create mode 100644 docs/content/v2.25/develop/best-practices-develop/_index.md create mode 100644 docs/content/v2.25/develop/best-practices-develop/best-practices-ycql.md create mode 100644 docs/content/v2.25/develop/best-practices-develop/clients.md create mode 100644 docs/content/v2.25/develop/best-practices-develop/data-modeling-perf.md create mode 100644 docs/content/v2.25/develop/build-global-apps/_index.md create mode 100644 docs/content/v2.25/develop/build-global-apps/active-active-multi-master.md create mode 100644 docs/content/v2.25/develop/build-global-apps/active-active-single-master.md create mode 100644 docs/content/v2.25/develop/build-global-apps/duplicate-indexes.md create mode 100644 docs/content/v2.25/develop/build-global-apps/follower-reads.md create mode 100644 docs/content/v2.25/develop/build-global-apps/global-database.md create mode 100644 docs/content/v2.25/develop/build-global-apps/latency-optimized-geo-partition.md create mode 100644 docs/content/v2.25/develop/build-global-apps/locality-optimized-geo-partition.md create mode 100644 docs/content/v2.25/develop/build-global-apps/read-replicas.md create mode 100644 docs/content/v2.25/develop/build-global-apps/real-world-scenarios/_index.md create mode 100644 docs/content/v2.25/develop/build-global-apps/real-world-scenarios/globally-local.md create mode 100644 docs/content/v2.25/develop/data-modeling/_index.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/_index.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/jobqueue.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/keyvalue.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/timeseries/_index.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/timeseries/data-expiry.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/timeseries/global-ordering.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md create mode 100644 docs/content/v2.25/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md create mode 100644 docs/content/v2.25/develop/data-modeling/hot-shards-ysql.md create mode 100644 docs/content/v2.25/develop/data-modeling/partitioning.md create mode 100644 docs/content/v2.25/develop/data-modeling/primary-keys-ycql.md create mode 100644 docs/content/v2.25/develop/data-modeling/primary-keys-ysql.md create mode 100644 docs/content/v2.25/develop/data-modeling/secondary-indexes-ycql.md create mode 100644 docs/content/v2.25/develop/data-modeling/secondary-indexes-ysql.md create mode 100644 docs/content/v2.25/develop/gitdev/_index.md create mode 100644 docs/content/v2.25/develop/gitdev/codespaces.md create mode 100644 docs/content/v2.25/develop/gitdev/gitpod.md create mode 100644 docs/content/v2.25/develop/learn/_index.md create mode 100644 docs/content/v2.25/develop/learn/aggregations-ycql.md create mode 100644 docs/content/v2.25/develop/learn/aggregations-ysql.md create mode 100644 docs/content/v2.25/develop/learn/batch-operations-ycql.md create mode 100644 docs/content/v2.25/develop/learn/batch-operations-ysql.md create mode 100644 docs/content/v2.25/develop/learn/date-and-time-ycql.md create mode 100644 docs/content/v2.25/develop/learn/date-and-time-ysql.md create mode 100644 docs/content/v2.25/develop/learn/strings-and-text-ycql.md create mode 100644 docs/content/v2.25/develop/learn/strings-and-text-ysql.md create mode 100644 docs/content/v2.25/develop/learn/text-search/_index.md create mode 100644 docs/content/v2.25/develop/learn/text-search/full-text-search.md create mode 100644 docs/content/v2.25/develop/learn/text-search/pattern-matching.md create mode 100644 docs/content/v2.25/develop/learn/text-search/phonetic-matching.md create mode 100644 docs/content/v2.25/develop/learn/text-search/similarity-matching.md create mode 100644 docs/content/v2.25/develop/learn/transactions/acid-transactions-ycql.md create mode 100644 docs/content/v2.25/develop/learn/transactions/acid-transactions-ysql.md create mode 100644 docs/content/v2.25/develop/learn/transactions/transactions-errorcodes-ysql.md create mode 100644 docs/content/v2.25/develop/learn/transactions/transactions-global-apps.md create mode 100644 docs/content/v2.25/develop/learn/transactions/transactions-performance-ysql.md create mode 100644 docs/content/v2.25/develop/learn/transactions/transactions-retries-ysql.md create mode 100644 docs/content/v2.25/develop/learn/ttl-data-expiration-ycql.md create mode 100644 docs/content/v2.25/develop/learn/ttl-data-expiration-ysql.md create mode 100644 docs/content/v2.25/develop/multi-cloud/_index.md create mode 100644 docs/content/v2.25/develop/multi-cloud/hybrid-cloud.md create mode 100644 docs/content/v2.25/develop/multi-cloud/multicloud-migration.md create mode 100644 docs/content/v2.25/develop/multi-cloud/multicloud-setup.md create mode 100644 docs/content/v2.25/develop/quality-of-service/_index.md create mode 100644 docs/content/v2.25/develop/quality-of-service/limiting-connections.md create mode 100644 docs/content/v2.25/develop/quality-of-service/transaction-priority.md create mode 100644 docs/content/v2.25/develop/quality-of-service/write-heavy-workloads.md create mode 100644 docs/content/v2.25/drivers-orms/_index.md create mode 100644 docs/content/v2.25/drivers-orms/c/_index.md create mode 100644 docs/content/v2.25/drivers-orms/c/ysql.md create mode 100644 docs/content/v2.25/drivers-orms/cpp/_index.md create mode 100644 docs/content/v2.25/drivers-orms/cpp/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/cpp/ysql.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/_index.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/compatibility.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/entityframework.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/postgres-npgsql-reference.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/postgres-npgsql.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/yb-npgsql-reference.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/csharp/ysql.md create mode 100644 docs/content/v2.25/drivers-orms/elixir/_index.md create mode 100644 docs/content/v2.25/drivers-orms/elixir/phoenix.md create mode 100644 docs/content/v2.25/drivers-orms/go/_index.md create mode 100644 docs/content/v2.25/drivers-orms/go/compatibility.md create mode 100644 docs/content/v2.25/drivers-orms/go/gorm.md create mode 100644 docs/content/v2.25/drivers-orms/go/pg.md create mode 100644 docs/content/v2.25/drivers-orms/go/pgx-reference.md create mode 100644 docs/content/v2.25/drivers-orms/go/pgx.md create mode 100644 docs/content/v2.25/drivers-orms/go/pq-reference.md create mode 100644 docs/content/v2.25/drivers-orms/go/pq.md create mode 100644 docs/content/v2.25/drivers-orms/go/yb-pgx-reference.md create mode 100644 docs/content/v2.25/drivers-orms/go/yb-pgx.md create mode 100644 docs/content/v2.25/drivers-orms/go/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/include-drivers-orms-list.md create mode 100644 docs/content/v2.25/drivers-orms/java/_index.md create mode 100644 docs/content/v2.25/drivers-orms/java/compatibility.md create mode 100644 docs/content/v2.25/drivers-orms/java/ebean.md create mode 100644 docs/content/v2.25/drivers-orms/java/hibernate.md create mode 100644 docs/content/v2.25/drivers-orms/java/mybatis.md create mode 100644 docs/content/v2.25/drivers-orms/java/postgres-jdbc-reference.md create mode 100644 docs/content/v2.25/drivers-orms/java/postgres-jdbc.md create mode 100644 docs/content/v2.25/drivers-orms/java/yb-r2dbc.md create mode 100644 docs/content/v2.25/drivers-orms/java/ycql-4.x.md create mode 100644 docs/content/v2.25/drivers-orms/java/ycql-ssl.md create mode 100644 docs/content/v2.25/drivers-orms/java/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/java/ysql-vertx-pg-client.md create mode 100644 docs/content/v2.25/drivers-orms/java/yugabyte-jdbc-reference.md create mode 100644 docs/content/v2.25/drivers-orms/java/yugabyte-jdbc.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/_index.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/compatibility.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/postgres-node-driver.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/postgres-pg-reference.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/prisma.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/sequelize.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/typeorm.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/yugabyte-node-driver.md create mode 100644 docs/content/v2.25/drivers-orms/nodejs/yugabyte-pg-reference.md create mode 100644 docs/content/v2.25/drivers-orms/orms/_index.md create mode 100644 docs/content/v2.25/drivers-orms/orms/csharp/ysql-dapper.md create mode 100644 docs/content/v2.25/drivers-orms/orms/csharp/ysql-entity-framework.md create mode 100644 docs/content/v2.25/drivers-orms/orms/go/ysql-gorm.md create mode 100644 docs/content/v2.25/drivers-orms/orms/java/ysql-ebean.md create mode 100644 docs/content/v2.25/drivers-orms/orms/java/ysql-hibernate.md create mode 100644 docs/content/v2.25/drivers-orms/orms/java/ysql-mybatis.md create mode 100644 docs/content/v2.25/drivers-orms/orms/java/ysql-spring-data.md create mode 100644 docs/content/v2.25/drivers-orms/orms/nodejs/ysql-prisma.md create mode 100644 docs/content/v2.25/drivers-orms/orms/nodejs/ysql-sequelize.md create mode 100644 docs/content/v2.25/drivers-orms/orms/php/ysql-laravel.md create mode 100644 docs/content/v2.25/drivers-orms/orms/python/ysql-django.md create mode 100644 docs/content/v2.25/drivers-orms/orms/python/ysql-sqlalchemy.md create mode 100644 docs/content/v2.25/drivers-orms/orms/rust/ysql-diesel.md create mode 100644 docs/content/v2.25/drivers-orms/php/_index.md create mode 100644 docs/content/v2.25/drivers-orms/php/laravel.md create mode 100644 docs/content/v2.25/drivers-orms/php/ysql.md create mode 100644 docs/content/v2.25/drivers-orms/python/_index.md create mode 100644 docs/content/v2.25/drivers-orms/python/aiopg.md create mode 100644 docs/content/v2.25/drivers-orms/python/compatibility.md create mode 100644 docs/content/v2.25/drivers-orms/python/django.md create mode 100644 docs/content/v2.25/drivers-orms/python/postgres-psycopg2-reference.md create mode 100644 docs/content/v2.25/drivers-orms/python/postgres-psycopg2.md create mode 100644 docs/content/v2.25/drivers-orms/python/postgres-psycopg3-reference.md create mode 100644 docs/content/v2.25/drivers-orms/python/postgres-psycopg3.md create mode 100644 docs/content/v2.25/drivers-orms/python/sqlalchemy.md create mode 100644 docs/content/v2.25/drivers-orms/python/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/python/yugabyte-psycopg2-reference.md create mode 100644 docs/content/v2.25/drivers-orms/python/yugabyte-psycopg2.md create mode 100644 docs/content/v2.25/drivers-orms/ruby/_index.md create mode 100644 docs/content/v2.25/drivers-orms/ruby/activerecord.md create mode 100644 docs/content/v2.25/drivers-orms/ruby/ruby-pg.md create mode 100644 docs/content/v2.25/drivers-orms/ruby/yb-ruby-pg.md create mode 100644 docs/content/v2.25/drivers-orms/ruby/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/rust/_index.md create mode 100644 docs/content/v2.25/drivers-orms/rust/diesel.md create mode 100644 docs/content/v2.25/drivers-orms/rust/rust-postgres-reference.md create mode 100644 docs/content/v2.25/drivers-orms/rust/yb-rust-postgres.md create mode 100644 docs/content/v2.25/drivers-orms/scala/_index.md create mode 100644 docs/content/v2.25/drivers-orms/scala/ycql.md create mode 100644 docs/content/v2.25/drivers-orms/smart-drivers-ycql.md create mode 100644 docs/content/v2.25/drivers-orms/smart-drivers.md create mode 100644 docs/content/v2.25/drivers-orms/ycql-client-drivers.md create mode 100644 docs/content/v2.25/drivers-orms/ysql-client-drivers.md create mode 100644 docs/content/v2.25/explore/_index.md create mode 100644 docs/content/v2.25/explore/change-data-capture.md create mode 100644 docs/content/v2.25/explore/cluster-management/_index.md create mode 100644 docs/content/v2.25/explore/cluster-management/point-in-time-recovery-ycql.md create mode 100644 docs/content/v2.25/explore/cluster-management/point-in-time-recovery-ysql.md create mode 100644 docs/content/v2.25/explore/cluster-setup-aeon.md create mode 100644 docs/content/v2.25/explore/cluster-setup-anywhere.md create mode 100644 docs/content/v2.25/explore/cluster-setup-local.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/_index.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/handling-node-upgrades.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/handling-rack-failures.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/handling-region-failures.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/handling-zone-failures.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/macos-yba.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/macos.md create mode 100644 docs/content/v2.25/explore/fault-tolerance/transaction-availability.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/_index.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/asynchronous-replication-ysql.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/cluster-aware-drivers.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/cluster-topology.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/connection-mgr-ysql.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/data-sharding.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/decoupling-compute-storage.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/follower-reads-ycql.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/follower-reads-ysql.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/gen-ai-apps.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/tablespaces-platform.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/tablespaces-yugabyted.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/tablespaces.md create mode 100644 docs/content/v2.25/explore/going-beyond-sql/topology-aware-drivers.md create mode 100644 docs/content/v2.25/explore/linear-scalability/_index.md create mode 100644 docs/content/v2.25/explore/linear-scalability/data-distribution.md create mode 100644 docs/content/v2.25/explore/linear-scalability/horizontal-vs-vertical-scaling.md create mode 100644 docs/content/v2.25/explore/linear-scalability/node-addition.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-large-datasets.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-reads.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-transactions.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-universe-cloud.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-universe-yba.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-universe.md create mode 100644 docs/content/v2.25/explore/linear-scalability/scaling-writes.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/_index.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/read-replicas-ycql.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/read-replicas-ysql.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/row-level-geo-partitioning.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/synchronous-replication-cloud.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/synchronous-replication-yba.md create mode 100644 docs/content/v2.25/explore/multi-region-deployments/synchronous-replication-ysql.md create mode 100644 docs/content/v2.25/explore/observability/_index.md create mode 100644 docs/content/v2.25/explore/observability/active-session-history.md create mode 100644 docs/content/v2.25/explore/observability/grafana-dashboard/grafana.md create mode 100644 docs/content/v2.25/explore/observability/logging.md create mode 100644 docs/content/v2.25/explore/observability/pg-locks.md create mode 100644 docs/content/v2.25/explore/observability/pg-stat-activity.md create mode 100644 docs/content/v2.25/explore/observability/pg-stat-progress-copy.md create mode 100644 docs/content/v2.25/explore/observability/prometheus-integration.md create mode 100644 docs/content/v2.25/explore/observability/yb-local-tablets.md create mode 100644 docs/content/v2.25/explore/observability/yb-pg-stat-get-queries.md create mode 100644 docs/content/v2.25/explore/query-1-performance/_index.md create mode 100644 docs/content/v2.25/explore/query-1-performance/auto-analyze.md create mode 100644 docs/content/v2.25/explore/query-1-performance/explain-analyze.md create mode 100644 docs/content/v2.25/explore/query-1-performance/pg-hint-plan.md create mode 100644 docs/content/v2.25/explore/query-1-performance/pg-stat-statements.md create mode 100644 docs/content/v2.25/explore/query-1-performance/pg-stats.md create mode 100644 docs/content/v2.25/explore/query-1-performance/query-diagnostics.md create mode 100644 docs/content/v2.25/explore/query-1-performance/ycql-stat-statements.md create mode 100644 docs/content/v2.25/explore/security/security.md create mode 100644 docs/content/v2.25/explore/transactions/_index.md create mode 100644 docs/content/v2.25/explore/transactions/distributed-transactions-ycql.md create mode 100644 docs/content/v2.25/explore/transactions/distributed-transactions-ysql.md create mode 100644 docs/content/v2.25/explore/transactions/explicit-locking.md create mode 100644 docs/content/v2.25/explore/transactions/isolation-levels.md create mode 100644 docs/content/v2.25/explore/ycql-language/_index.md create mode 100644 docs/content/v2.25/explore/ycql-language/cassandra-feature-support.md create mode 100644 docs/content/v2.25/explore/ycql-language/data-types.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/_index.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/covering-index-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/partial-index-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/primary-key-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/secondary-indexes-with-jsonb-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/secondary-indexes-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/indexes-constraints/unique-index-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/jsonb-ycql.md create mode 100644 docs/content/v2.25/explore/ycql-language/keyspaces-tables.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/_index.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/_index.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/collations.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/cursor.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/foreign-data-wrappers.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/inheritance.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/parallel-query.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/partitions.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/savepoints.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/snapshot-synchronization.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/stored-procedures.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/triggers.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/advanced-features/views.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/data-manipulation.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/data-types.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/databases-schemas-tables.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/expressions-operators.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/_index.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/covering-index-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/expression-index-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/gin.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/index-backfill.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/partial-index-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/primary-key-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/secondary-indexes-with-jsonb-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/secondary-indexes-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/indexes-constraints/unique-index-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/jsonb-ysql.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/_index.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-auto-explain.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-file-fdw.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-fuzzystrmatch.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-hypopg.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-passwordcheck.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pganon.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pgcron.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pgcrypto.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pgpartman.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pgstatstatements.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-pgvector.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-postgres-fdw.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-postgresql-hll.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-spi.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-tablefunc.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/extension-uuid-ossp.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/pg-extensions/install-extensions.md create mode 100644 docs/content/v2.25/explore/ysql-language-features/queries.md create mode 100644 docs/content/v2.25/faq/comparisons/_index.md create mode 100644 docs/content/v2.25/faq/comparisons/amazon-aurora.md create mode 100644 docs/content/v2.25/faq/comparisons/amazon-dynamodb.md create mode 100644 docs/content/v2.25/faq/comparisons/azure-cosmos.md create mode 100644 docs/content/v2.25/faq/comparisons/cassandra.md create mode 100644 docs/content/v2.25/faq/comparisons/cockroachdb.md create mode 100644 docs/content/v2.25/faq/comparisons/foundationdb.md create mode 100644 docs/content/v2.25/faq/comparisons/google-spanner.md create mode 100644 docs/content/v2.25/faq/comparisons/hbase.md create mode 100644 docs/content/v2.25/faq/comparisons/mongodb.md create mode 100644 docs/content/v2.25/faq/comparisons/postgresql.md create mode 100644 docs/content/v2.25/faq/comparisons/redis.md create mode 100644 docs/content/v2.25/faq/comparisons/tidb.md create mode 100644 docs/content/v2.25/faq/comparisons/vitess.md create mode 100644 docs/content/v2.25/faq/compatibility.md create mode 100644 docs/content/v2.25/faq/general.md create mode 100644 docs/content/v2.25/faq/operations-faq.md create mode 100644 docs/content/v2.25/faq/smart-drivers-faq.md create mode 100644 docs/content/v2.25/faq/yugabyte-platform.md create mode 100644 docs/content/v2.25/faq/yugabytedb-managed-faq.md create mode 100644 docs/content/v2.25/features.md create mode 100644 docs/content/v2.25/google8916e0ed5f6556b0.html create mode 100644 docs/content/v2.25/integrations/_index.md create mode 100644 docs/content/v2.25/integrations/akka-ycql.md create mode 100644 docs/content/v2.25/integrations/akka-ysql.md create mode 100644 docs/content/v2.25/integrations/apache-beam.md create mode 100644 docs/content/v2.25/integrations/apache-flink.md create mode 100644 docs/content/v2.25/integrations/apache-hudi.md create mode 100644 docs/content/v2.25/integrations/apache-spark/_index.md create mode 100644 docs/content/v2.25/integrations/apache-spark/java-ycql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/java-ysql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/python-ycql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/python-ysql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/scala-ycql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/scala-ysql.md create mode 100644 docs/content/v2.25/integrations/apache-spark/spark-sql.md create mode 100644 docs/content/v2.25/integrations/ataccama.md create mode 100644 docs/content/v2.25/integrations/atlas-ycql.md create mode 100644 docs/content/v2.25/integrations/atomicjar.md create mode 100644 docs/content/v2.25/integrations/budibase.md create mode 100644 docs/content/v2.25/integrations/camunda.md create mode 100644 docs/content/v2.25/integrations/caspio.md create mode 100644 docs/content/v2.25/integrations/cdc/debezium.md create mode 100644 docs/content/v2.25/integrations/cohesity.md create mode 100644 docs/content/v2.25/integrations/commvault.md create mode 100644 docs/content/v2.25/integrations/dataedo.md create mode 100644 docs/content/v2.25/integrations/datahub.md create mode 100644 docs/content/v2.25/integrations/delphix.md create mode 100644 docs/content/v2.25/integrations/django-rest-framework.md create mode 100644 docs/content/v2.25/integrations/flyway.md create mode 100644 docs/content/v2.25/integrations/gorm.md create mode 100644 docs/content/v2.25/integrations/hashicorp-vault.md create mode 100644 docs/content/v2.25/integrations/hasura/_index.md create mode 100644 docs/content/v2.25/integrations/hasura/graphql.md create mode 100644 docs/content/v2.25/integrations/hasura/hasura-1.md create mode 100644 docs/content/v2.25/integrations/hasura/hasura-cloud.md create mode 100644 docs/content/v2.25/integrations/hasura/hasura-sample-app.md create mode 100644 docs/content/v2.25/integrations/hevodata.md create mode 100644 docs/content/v2.25/integrations/jaeger.md create mode 100644 docs/content/v2.25/integrations/janusgraph.md create mode 100644 docs/content/v2.25/integrations/kairosdb.md create mode 100644 docs/content/v2.25/integrations/keycloak.md create mode 100644 docs/content/v2.25/integrations/kinesis.md create mode 100644 docs/content/v2.25/integrations/liquibase.md create mode 100644 docs/content/v2.25/integrations/metacat.md create mode 100644 docs/content/v2.25/integrations/mirantis.md create mode 100644 docs/content/v2.25/integrations/nutanix-ahv.md create mode 100644 docs/content/v2.25/integrations/pgmigrate.md create mode 100644 docs/content/v2.25/integrations/presto.md create mode 100644 docs/content/v2.25/integrations/prisma.md create mode 100644 docs/content/v2.25/integrations/rabbitmq.md create mode 100644 docs/content/v2.25/integrations/retool.md create mode 100644 docs/content/v2.25/integrations/schema-evolution-mgr.md create mode 100644 docs/content/v2.25/integrations/sequelize.md create mode 100644 docs/content/v2.25/integrations/spring-framework/_index.md create mode 100644 docs/content/v2.25/integrations/spring-framework/sd-jpa.md create mode 100644 docs/content/v2.25/integrations/spring-framework/sdyb.md create mode 100644 docs/content/v2.25/integrations/spring-framework/spring-aeon.md create mode 100644 docs/content/v2.25/integrations/spring-framework/spring-cassandra.md create mode 100644 docs/content/v2.25/integrations/sqlalchemy.md create mode 100644 docs/content/v2.25/integrations/superblocks.md create mode 100644 docs/content/v2.25/integrations/tools/_index.md create mode 100644 docs/content/v2.25/integrations/tools/arctype.md create mode 100644 docs/content/v2.25/integrations/tools/dbeaver-ycql.md create mode 100644 docs/content/v2.25/integrations/tools/dbeaver-ysql.md create mode 100644 docs/content/v2.25/integrations/tools/dbschema.md create mode 100644 docs/content/v2.25/integrations/tools/metabase.md create mode 100644 docs/content/v2.25/integrations/tools/pgadmin.md create mode 100644 docs/content/v2.25/integrations/tools/sql-workbench.md create mode 100644 docs/content/v2.25/integrations/tools/superset.md create mode 100644 docs/content/v2.25/integrations/tools/tableplus.md create mode 100644 docs/content/v2.25/integrations/tools/visualstudioworkbench.md create mode 100644 docs/content/v2.25/integrations/typeorm.md create mode 100644 docs/content/v2.25/integrations/wso2.md create mode 100644 docs/content/v2.25/integrations/ysql-loader.md create mode 100644 docs/content/v2.25/launch-and-manage/_index.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/_index.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/active-session-history-monitor.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/_index.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/cache-storage.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/connections.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/raft-dst.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/replication.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/throughput.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/metrics/ybmaster.md create mode 100644 docs/content/v2.25/launch-and-manage/monitor-and-alert/xcluster-monitor.md create mode 100644 docs/content/v2.25/legal/_index.md create mode 100644 docs/content/v2.25/legal/third-party-software.md create mode 100644 docs/content/v2.25/manage/_index.md create mode 100644 docs/content/v2.25/manage/backup-restore/_index.md create mode 100644 docs/content/v2.25/manage/backup-restore/export-import-data-ycql.md create mode 100644 docs/content/v2.25/manage/backup-restore/export-import-data.md create mode 100644 docs/content/v2.25/manage/backup-restore/instant-db-cloning.md create mode 100644 docs/content/v2.25/manage/backup-restore/point-in-time-recovery.md create mode 100644 docs/content/v2.25/manage/backup-restore/snapshot-ysql.md create mode 100644 docs/content/v2.25/manage/backup-restore/snapshots-ycql.md create mode 100644 docs/content/v2.25/manage/backup-restore/time-travel-query.md create mode 100644 docs/content/v2.25/manage/change-cluster-config.md create mode 100644 docs/content/v2.25/manage/data-migration/_index.md create mode 100644 docs/content/v2.25/manage/data-migration/bulk-export-ycql.md create mode 100644 docs/content/v2.25/manage/data-migration/bulk-export-ysql.md create mode 100644 docs/content/v2.25/manage/data-migration/bulk-import-ycql.md create mode 100644 docs/content/v2.25/manage/data-migration/bulk-import-ysql.md create mode 100644 docs/content/v2.25/manage/data-migration/migrate-from-postgres.md create mode 100644 docs/content/v2.25/manage/data-migration/verify-migration-ycql.md create mode 100644 docs/content/v2.25/manage/data-migration/verify-migration-ysql.md create mode 100644 docs/content/v2.25/manage/diagnostics-reporting.md create mode 100644 docs/content/v2.25/manage/upgrade-deployment.md create mode 100644 docs/content/v2.25/manage/ysql-major-upgrade-local.md create mode 100644 docs/content/v2.25/manage/ysql-major-upgrade-yugabyted.md create mode 100644 docs/content/v2.25/quick-start-yugabytedb-managed/_index.md create mode 100644 docs/content/v2.25/quick-start-yugabytedb-managed/managed-quick-start-include.md create mode 100644 docs/content/v2.25/quick-start-yugabytedb-managed/quick-start-buildapps-include.md create mode 100644 docs/content/v2.25/quick-start/docker.md create mode 100644 docs/content/v2.25/quick-start/explore/binary/explore-ycql.md create mode 100644 docs/content/v2.25/quick-start/explore/binary/explore-ysql.md create mode 100644 docs/content/v2.25/quick-start/explore/docker/explore-ycql.md create mode 100644 docs/content/v2.25/quick-start/explore/docker/explore-ysql.md create mode 100644 docs/content/v2.25/quick-start/explore/kubernetes/explore-ycql.md create mode 100644 docs/content/v2.25/quick-start/explore/kubernetes/explore-ysql.md create mode 100644 docs/content/v2.25/quick-start/explore/ycql.md create mode 100644 docs/content/v2.25/quick-start/explore/ysql.md create mode 100644 docs/content/v2.25/quick-start/include-connect.md create mode 100644 docs/content/v2.25/quick-start/include-prerequisites-linux.md create mode 100644 docs/content/v2.25/quick-start/include-prerequisites-macos.md create mode 100644 docs/content/v2.25/quick-start/kubernetes.md create mode 100644 docs/content/v2.25/quick-start/linux.md create mode 100644 docs/content/v2.25/quick-start/macos.md create mode 100644 docs/content/v2.25/reference/configuration/_index.md create mode 100644 docs/content/v2.25/reference/configuration/all-flags-yb-master.md create mode 100644 docs/content/v2.25/reference/configuration/all-flags-yb-tserver.md create mode 100644 docs/content/v2.25/reference/configuration/default-ports.md create mode 100644 docs/content/v2.25/reference/configuration/operating-systems.md create mode 100644 docs/content/v2.25/reference/configuration/postgresql-compatibility.md create mode 100644 docs/content/v2.25/reference/configuration/smart-defaults.md create mode 100644 docs/content/v2.25/reference/configuration/yb-master.md create mode 100644 docs/content/v2.25/reference/configuration/yb-tserver.md create mode 100644 docs/content/v2.25/reference/configuration/yugabyted.md create mode 100644 docs/content/v2.25/reference/get-started-guide.md create mode 100644 docs/content/v2.25/releases/_index.md create mode 100644 docs/content/v2.25/releases/techadvisories/_index.md create mode 100644 docs/content/v2.25/releases/techadvisories/_template.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-14696.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-20398.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-20648.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-20827.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-20864.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-21218.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-21297.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-21491.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-22057.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-22802.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-22935.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-23476.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-24992.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-25106.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-25193.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-26440.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-26666.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-27380.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-28222.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-2968.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-cl-23623.md create mode 100644 docs/content/v2.25/releases/techadvisories/ta-reol-24.md create mode 100644 docs/content/v2.25/releases/versioning.md create mode 100644 docs/content/v2.25/releases/yba-releases/_index.md create mode 100644 docs/content/v2.25/releases/yba-releases/v2.20.md create mode 100644 docs/content/v2.25/releases/yba-releases/v2.25.md create mode 100644 docs/content/v2.25/releases/yba-releases/v2024.1.md create mode 100644 docs/content/v2.25/releases/yba-releases/v2024.2.md create mode 100644 docs/content/v2.25/releases/yba-releases/v2025.1.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/_index.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v1.2.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v1.3.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.0.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.1.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.11.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.12.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.13.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.14.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.15.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.16-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.16.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.17-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.17.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.18-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.18.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.19-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.19.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.2.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.21-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.21.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.23-anywhere.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.23.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.3.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.4.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.5.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.6.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.7.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.8.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/end-of-life/v2.9.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/v2.20.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/v2.25.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/v2024.1.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/v2024.2.md create mode 100644 docs/content/v2.25/releases/ybdb-releases/v2025.1.md create mode 100644 docs/content/v2.25/releases/yugabyte-clients.md create mode 100644 docs/content/v2.25/sample-data/_index.md create mode 100644 docs/content/v2.25/sample-data/chinook.md create mode 100644 docs/content/v2.25/sample-data/northwind.md create mode 100644 docs/content/v2.25/sample-data/pgexercises.md create mode 100644 docs/content/v2.25/sample-data/retail-analytics.md create mode 100644 docs/content/v2.25/sample-data/sportsdb.md create mode 100644 docs/content/v2.25/secure/_index.md create mode 100644 docs/content/v2.25/secure/audit-logging/_index.md create mode 100644 docs/content/v2.25/secure/audit-logging/audit-logging-ycql.md create mode 100644 docs/content/v2.25/secure/audit-logging/audit-logging-ysql.md create mode 100644 docs/content/v2.25/secure/audit-logging/object-audit-logging-ysql.md create mode 100644 docs/content/v2.25/secure/audit-logging/session-audit-logging-ysql.md create mode 100644 docs/content/v2.25/secure/audit-logging/trace-statements-ysql.md create mode 100644 docs/content/v2.25/secure/authentication/_index.md create mode 100644 docs/content/v2.25/secure/authentication/host-based-authentication.md create mode 100644 docs/content/v2.25/secure/authentication/ldap-authentication-ycql.md create mode 100644 docs/content/v2.25/secure/authentication/ldap-authentication-ysql.md create mode 100644 docs/content/v2.25/secure/authentication/password-authentication.md create mode 100644 docs/content/v2.25/secure/authentication/trust-authentication.md create mode 100644 docs/content/v2.25/secure/authorization/_index.md create mode 100644 docs/content/v2.25/secure/authorization/column-level-security.md create mode 100644 docs/content/v2.25/secure/authorization/create-roles-ycql.md create mode 100644 docs/content/v2.25/secure/authorization/create-roles.md create mode 100644 docs/content/v2.25/secure/authorization/rbac-model-ycql.md create mode 100644 docs/content/v2.25/secure/authorization/rbac-model.md create mode 100644 docs/content/v2.25/secure/authorization/row-level-security.md create mode 100644 docs/content/v2.25/secure/authorization/ycql-grant-permissions.md create mode 100644 docs/content/v2.25/secure/authorization/ysql-grant-permissions.md create mode 100644 docs/content/v2.25/secure/column-level-encryption.md create mode 100644 docs/content/v2.25/secure/enable-authentication/_index.md create mode 100644 docs/content/v2.25/secure/enable-authentication/authentication-ycql.md create mode 100644 docs/content/v2.25/secure/enable-authentication/authentication-ysql.md create mode 100644 docs/content/v2.25/secure/enable-authentication/ysql-login-profiles.md create mode 100644 docs/content/v2.25/secure/enable-authentication/ysql_hba_conf-configuration.md create mode 100644 docs/content/v2.25/secure/encryption-at-rest.md create mode 100644 docs/content/v2.25/secure/security-checklist.md create mode 100644 docs/content/v2.25/secure/tls-encryption/_index.md create mode 100644 docs/content/v2.25/secure/tls-encryption/connect-to-cluster.md create mode 100644 docs/content/v2.25/secure/tls-encryption/server-certificates.md create mode 100644 docs/content/v2.25/secure/tls-encryption/server-to-server.md create mode 100644 docs/content/v2.25/secure/tls-encryption/tls-authentication.md create mode 100644 docs/content/v2.25/secure/vulnerability-disclosure-policy.md create mode 100644 docs/content/v2.25/troubleshoot/_index.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/_index.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/connect-ycql.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/failed_tablets.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/performance-troubleshooting.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/recover_server.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/replace_failed_peers.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/replace_master.md create mode 100644 docs/content/v2.25/troubleshoot/cluster/replace_tserver.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/_index.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/check-processes.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/check-stats.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/disk-full.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/recover-disk.md create mode 100644 docs/content/v2.25/troubleshoot/nodes/trouble-common.md create mode 100644 docs/content/v2.25/troubleshoot/other-issues.md create mode 100644 docs/content/v2.25/troubleshoot/ysql-issues.md create mode 100644 docs/content/v2.25/tutorials/AI/_index.md create mode 100644 docs/content/v2.25/tutorials/AI/ai-langchain-openai.md create mode 100644 docs/content/v2.25/tutorials/AI/ai-llamaindex-openai.md create mode 100644 docs/content/v2.25/tutorials/AI/ai-localai.md create mode 100644 docs/content/v2.25/tutorials/AI/ai-ollama.md create mode 100644 docs/content/v2.25/tutorials/AI/azure-openai.md create mode 100644 docs/content/v2.25/tutorials/AI/google-vertex-ai.md create mode 100644 docs/content/v2.25/tutorials/AI/hello-rag.md create mode 100644 docs/content/v2.25/tutorials/_index.md create mode 100644 docs/content/v2.25/tutorials/azure/_index.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-api-management.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-app-service.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-event-hubs.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-functions.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-key-vault.md create mode 100644 docs/content/v2.25/tutorials/azure/azure-private-link.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/_index.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/chapter1-debuting-with-postgres.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/chapter2-scaling-with-yugabytedb.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/chapter3-tolerating-outages.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/chapter4-going-global.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/chapter5-going-cloud-native.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter1-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter1-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter2-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter2-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter3-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter3-second-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter3-second-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter3-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter4-us-east-add-movie-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter4-us-east-add-movie-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter4-us-east-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter4-us-east-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter5-full-text-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/includes/chapter5-similarity-search.md create mode 100644 docs/content/v2.25/tutorials/build-and-learn/overview.md create mode 100644 docs/content/v2.25/tutorials/build-apps/_index.md create mode 100644 docs/content/v2.25/tutorials/build-apps/c/cloud-ysql-c.md create mode 100644 docs/content/v2.25/tutorials/build-apps/cloud-add-ip.md create mode 100644 docs/content/v2.25/tutorials/build-apps/cpp/cloud-ysql-cpp.md create mode 100644 docs/content/v2.25/tutorials/build-apps/csharp/cloud-ysql-csharp.md create mode 100644 docs/content/v2.25/tutorials/build-apps/elixir/cloud-ysql-elixir.md create mode 100644 docs/content/v2.25/tutorials/build-apps/go/cloud-ysql-go.md create mode 100644 docs/content/v2.25/tutorials/build-apps/java/cloud-ysql-yb-jdbc.md create mode 100644 docs/content/v2.25/tutorials/build-apps/nodejs/cloud-ysql-node.md create mode 100644 docs/content/v2.25/tutorials/build-apps/php/cloud-ysql-php.md create mode 100644 docs/content/v2.25/tutorials/build-apps/python/cloud-ysql-python.md create mode 100644 docs/content/v2.25/tutorials/build-apps/ruby/cloud-ysql-ruby.md create mode 100644 docs/content/v2.25/tutorials/build-apps/rust/cloud-ysql-rust.md create mode 100644 docs/content/v2.25/tutorials/cdc-tutorials/_index.md create mode 100644 docs/content/v2.25/tutorials/cdc-tutorials/cdc-aws-msk.md create mode 100644 docs/content/v2.25/tutorials/cdc-tutorials/cdc-azure-event-hub.md create mode 100644 docs/content/v2.25/tutorials/cdc-tutorials/cdc-confluent-cloud.md create mode 100644 docs/content/v2.25/tutorials/cdc-tutorials/cdc-redpanda.md create mode 100644 docs/content/v2.25/tutorials/google/_index.md create mode 100644 docs/content/v2.25/yedis/_index.md create mode 100644 docs/content/v2.25/yedis/api/_index.md create mode 100644 docs/content/v2.25/yedis/api/append.md create mode 100644 docs/content/v2.25/yedis/api/auth.md create mode 100644 docs/content/v2.25/yedis/api/config.md create mode 100644 docs/content/v2.25/yedis/api/createdb.md create mode 100644 docs/content/v2.25/yedis/api/del.md create mode 100644 docs/content/v2.25/yedis/api/deletedb.md create mode 100644 docs/content/v2.25/yedis/api/echo.md create mode 100644 docs/content/v2.25/yedis/api/exists.md create mode 100644 docs/content/v2.25/yedis/api/expire.md create mode 100644 docs/content/v2.25/yedis/api/expireat.md create mode 100644 docs/content/v2.25/yedis/api/flushall.md create mode 100644 docs/content/v2.25/yedis/api/flushdb.md create mode 100644 docs/content/v2.25/yedis/api/get.md create mode 100644 docs/content/v2.25/yedis/api/getrange.md create mode 100644 docs/content/v2.25/yedis/api/getset.md create mode 100644 docs/content/v2.25/yedis/api/hdel.md create mode 100644 docs/content/v2.25/yedis/api/hexists.md create mode 100644 docs/content/v2.25/yedis/api/hget.md create mode 100644 docs/content/v2.25/yedis/api/hgetall.md create mode 100644 docs/content/v2.25/yedis/api/hincrby.md create mode 100644 docs/content/v2.25/yedis/api/hkeys.md create mode 100644 docs/content/v2.25/yedis/api/hlen.md create mode 100644 docs/content/v2.25/yedis/api/hmget.md create mode 100644 docs/content/v2.25/yedis/api/hmset.md create mode 100644 docs/content/v2.25/yedis/api/hset.md create mode 100644 docs/content/v2.25/yedis/api/hstrlen.md create mode 100644 docs/content/v2.25/yedis/api/hvals.md create mode 100644 docs/content/v2.25/yedis/api/incr.md create mode 100644 docs/content/v2.25/yedis/api/incrby.md create mode 100644 docs/content/v2.25/yedis/api/keys.md create mode 100644 docs/content/v2.25/yedis/api/listdb.md create mode 100644 docs/content/v2.25/yedis/api/monitor.md create mode 100644 docs/content/v2.25/yedis/api/pexpire.md create mode 100644 docs/content/v2.25/yedis/api/pexpireat.md create mode 100644 docs/content/v2.25/yedis/api/psetex.md create mode 100644 docs/content/v2.25/yedis/api/psubscribe.md create mode 100644 docs/content/v2.25/yedis/api/pttl.md create mode 100644 docs/content/v2.25/yedis/api/publish.md create mode 100644 docs/content/v2.25/yedis/api/pubsub.md create mode 100644 docs/content/v2.25/yedis/api/punsubscribe.md create mode 100644 docs/content/v2.25/yedis/api/rename.md create mode 100644 docs/content/v2.25/yedis/api/role.md create mode 100644 docs/content/v2.25/yedis/api/sadd.md create mode 100644 docs/content/v2.25/yedis/api/scard.md create mode 100644 docs/content/v2.25/yedis/api/select.md create mode 100644 docs/content/v2.25/yedis/api/set.md create mode 100644 docs/content/v2.25/yedis/api/setex.md create mode 100644 docs/content/v2.25/yedis/api/setrange.md create mode 100644 docs/content/v2.25/yedis/api/sismember.md create mode 100644 docs/content/v2.25/yedis/api/smembers.md create mode 100644 docs/content/v2.25/yedis/api/srem.md create mode 100644 docs/content/v2.25/yedis/api/strlen.md create mode 100644 docs/content/v2.25/yedis/api/subscribe.md create mode 100644 docs/content/v2.25/yedis/api/tsadd.md create mode 100644 docs/content/v2.25/yedis/api/tscard.md create mode 100644 docs/content/v2.25/yedis/api/tsget.md create mode 100644 docs/content/v2.25/yedis/api/tslastn.md create mode 100644 docs/content/v2.25/yedis/api/tsrangebytime.md create mode 100644 docs/content/v2.25/yedis/api/tsrem.md create mode 100644 docs/content/v2.25/yedis/api/tsrevrangebytime.md create mode 100644 docs/content/v2.25/yedis/api/ttl.md create mode 100644 docs/content/v2.25/yedis/api/unsubscribe.md create mode 100644 docs/content/v2.25/yedis/api/zadd.md create mode 100644 docs/content/v2.25/yedis/api/zcard.md create mode 100644 docs/content/v2.25/yedis/api/zrange.md create mode 100644 docs/content/v2.25/yedis/api/zrangebyscore.md create mode 100644 docs/content/v2.25/yedis/api/zrem.md create mode 100644 docs/content/v2.25/yedis/api/zrevrange.md create mode 100644 docs/content/v2.25/yedis/api/zscore.md create mode 100644 docs/content/v2.25/yedis/develop/_index.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/_index.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/cpp.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/csharp.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/go.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/java.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/nodejs.md create mode 100644 docs/content/v2.25/yedis/develop/client-drivers/yedis/python.md create mode 100644 docs/content/v2.25/yedis/quick-start/_index.md create mode 100644 docs/content/v2.25/yedis/quick-start/binary/test-yedis.md create mode 100644 docs/content/v2.25/yedis/quick-start/docker/test-yedis.md create mode 100644 docs/content/v2.25/yedis/quick-start/kubernetes/test-yedis.md create mode 100644 docs/content/v2.25/yugabyte-cloud/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-admin/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-admin/cloud-billing-costs-classic.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-admin/cloud-billing-costs.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-admin/cloud-billing-profile.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-add-endpoint.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-add-peering.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-add-vpc-aws.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-add-vpc-gcp.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-add-vpc.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/cloud-vpc-intro.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/managed-endpoint-aws.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/cloud-vpcs/managed-endpoint-azure.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters-overview.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters-topology.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/create-clusters-free.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/create-clusters-geopartition.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/create-clusters-multisync.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/create-single-region.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/include-general-settings.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/include-security-settings.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-basics/create-clusters/network-access.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/add-extensions.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/aeon-cdc.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/aeon-pitr.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/backup-clusters.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/cloud-maintenance.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/configure-clusters.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/disaster-recovery/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/disaster-recovery/disaster-recovery-failover.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/disaster-recovery/disaster-recovery-setup.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/disaster-recovery/disaster-recovery-switchover.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/disaster-recovery/disaster-recovery-tables.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-clusters/managed-read-replica.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/connect-applications.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/connect-client-shell.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/connect-cloud-shell.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/connect/ycql.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-connect/connect/ysql.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/cloud-advisor.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/cloud-alerts.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/cloud-queries-live.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/cloud-queries-slow.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/logging-export.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/managed-integrations.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/metrics-export.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/monitor-activity.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/monitor-nodes.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/monitor-tables.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-monitor/overview.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-quickstart/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-quickstart/qs-explore.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/add-connections.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/add-users.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/cloud-activity.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/cloud-authentication.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/cloud-users.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-secure-clusters/managed-ear.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-security/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-security/cloud-security-features.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-security/shared-responsibility.md create mode 100644 docs/content/v2.25/yugabyte-cloud/cloud-troubleshoot.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-api.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-apikeys.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-examples/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-examples/managed-cli-example-create.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-examples/managed-guide-api.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-overview.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-api-key.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-auth.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-backup-policy.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-backup.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-cluster.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-db-audit-logging.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-db-audit-logs-exporter.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-db-query-logging.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-integration.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-metrics-exporter.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-network-allow-list.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-network.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-peering.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-permission.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-read-replica.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-region.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-role.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-usage.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-user.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-cli/managed-cli-reference/managed-cli-vpc.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-automation/managed-terraform.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-freetrial.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-labs.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/manage-access.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/_index.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/federated-custom.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/federated-entra.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/federated-jump.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/federated-okta.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/federated-ping.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-authentication/social-login.md create mode 100644 docs/content/v2.25/yugabyte-cloud/managed-security/managed-roles.md create mode 100644 docs/content/v2.25/yugabyte-cloud/release-notes.md create mode 100644 docs/content/v2.25/yugabyte-platform/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/anywhere-rbac.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/back-up-restore-installer.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/back-up-restore-k8s.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/back-up-restore-yp.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/high-availability.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/ldap-authentication.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/manage-runtime-config.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/oidc-authentication.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/shutdown.md create mode 100644 docs/content/v2.25/yugabyte-platform/administer-yugabyte-platform/uninstall-software.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/alert-policy-templates.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/alert.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/anywhere-export-configuration.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/anywhere-metrics.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/latency-histogram.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/live-queries-dashboard.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/performance-advisor.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/prometheus-custom/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/prometheus-custom/prometheus-federate.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/prometheus-custom/prometheus-scrape.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/set-up-alerts-health-check.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/slow-queries-dashboard.md create mode 100644 docs/content/v2.25/yugabyte-platform/alerts-monitoring/universe-logging.md create mode 100644 docs/content/v2.25/yugabyte-platform/anywhere-automation/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/anywhere-automation/anywhere-api.md create mode 100644 docs/content/v2.25/yugabyte-platform/anywhere-automation/anywhere-cli.md create mode 100644 docs/content/v2.25/yugabyte-platform/anywhere-automation/anywhere-terraform.md create mode 100644 docs/content/v2.25/yugabyte-platform/anywhere-automation/yb-kubernetes-operator.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/back-up-universe-data.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/configure-backup-storage.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/disaster-recovery/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/disaster-recovery/disaster-recovery-failover.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/disaster-recovery/disaster-recovery-setup.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/disaster-recovery/disaster-recovery-switchover.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/disaster-recovery/disaster-recovery-tables.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/pitr.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/restore-universe-data.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/restore-ysql-single-table.md create mode 100644 docs/content/v2.25/yugabyte-platform/back-up-restore-universes/schedule-data-backups.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/aws.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/azure.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/gcp.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/on-premises-nodes.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/on-premises-provider.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/on-premises-script.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/on-premises.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/openshift.md create mode 100644 docs/content/v2.25/yugabyte-platform/configure-yugabyte-platform/vmware-tanzu.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/connect-to-universe.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/create-universe-multi-cloud.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/create-universe-multi-region.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/create-universe-multi-zone-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/create-universe-multi-zone.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/dedicated-master.md create mode 100644 docs/content/v2.25/yugabyte-platform/create-deployments/read-replicas.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/create-admin-user.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/install-software/installer.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/install-software/kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/install-software/openshift.md create mode 100644 docs/content/v2.25/yugabyte-platform/install-yugabyte-platform/migrate-replicated.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/delete-universe.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/edit-config-flags.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/edit-helm-overrides.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/edit-universe.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/instance-tags.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/remove-nodes.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/retry-failed-task.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/upgrade-nodes-csp.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/upgrade-nodes.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/upgrade-software-install.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/upgrade-software-prepare.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/upgrade-software.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/xcluster-replication/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/xcluster-replication/bidirectional-replication.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-ddl.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md create mode 100644 docs/content/v2.25/yugabyte-platform/manage-deployments/ybdb-releases.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-ear.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-nodes-aws.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-nodes-azure.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-nodes-gcp.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-nodes-k8s.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-nodes.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-storage.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/cloud-permissions/cloud-permissions-yba.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/networking-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/networking.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-hardware.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-cloud-provider.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-on-prem-assist.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-on-prem-auto.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-on-prem-legacy.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-on-prem-manual.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes-software/software-on-prem.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-nodes.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-yba-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/prepare/server-yba.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authentication/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authentication/ldap-authentication-platform.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authentication/oidc-authentication-aad.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authentication/oidc-authentication-jumpcloud.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authentication/oidc-manage-users-include.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/authorization-platform.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/create-kms-config/aws-kms.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/create-kms-config/azure-kms.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/create-kms-config/google-kms.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/create-kms-config/hashicorp-kms.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-at-rest.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/add-certificate-ca.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/add-certificate-hashicorp.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/add-certificate-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/add-certificate-self.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/auto-certificate.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/rotate-certificates.md create mode 100644 docs/content/v2.25/yugabyte-platform/security/enable-encryption-in-transit/trust-store.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/cloud-provider-config-issues.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/install-upgrade-issues/installer.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/install-upgrade-issues/kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/install-upgrade-issues/vm.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/ldap-issues.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/node-alerts.md create mode 100644 docs/content/v2.25/yugabyte-platform/troubleshoot/universe-issues.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/_index.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/prepare-to-upgrade.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/upgrade-yp-installer.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/upgrade-yp-kubernetes.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/upgrade-yp-replicated.md create mode 100644 docs/content/v2.25/yugabyte-platform/upgrade/upgrade-yp-xcluster-ybadmin.md create mode 100644 docs/content/v2.25/yugabyte-platform/yba-overview-install.md create mode 100644 docs/content/v2.25/yugabyte-platform/yba-overview.md create mode 100644 docs/content/v2.25/yugabyte-voyager/_index.md create mode 100644 docs/content/v2.25/yugabyte-voyager/docker.md create mode 100644 docs/content/v2.25/yugabyte-voyager/github.md create mode 100644 docs/content/v2.25/yugabyte-voyager/install-yb-voyager.md create mode 100644 docs/content/v2.25/yugabyte-voyager/introduction.md create mode 100644 docs/content/v2.25/yugabyte-voyager/known-issues/_index.md create mode 100644 docs/content/v2.25/yugabyte-voyager/known-issues/mysql.md create mode 100644 docs/content/v2.25/yugabyte-voyager/known-issues/oracle.md create mode 100644 docs/content/v2.25/yugabyte-voyager/known-issues/postgresql.md create mode 100644 docs/content/v2.25/yugabyte-voyager/macos.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/_index.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/assess-migration.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/bulk-data-load.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/live-fall-back.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/live-fall-forward.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/live-migrate.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/migrate-steps.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/mysql.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/oracle.md create mode 100644 docs/content/v2.25/yugabyte-voyager/migrate/postgresql.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/_index.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/assess-migration.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/bulk-data-load/import-data-file.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/configuration-file.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/cutover-archive/archive-changes.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/cutover-archive/cutover.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/data-migration/export-data.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/data-migration/import-data.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/datatype-mapping-mysql.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/datatype-mapping-oracle.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/diagnostics-report.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/end-migration.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/non-superuser.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/performance.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/schema-migration/analyze-schema.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/schema-migration/export-schema.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/schema-migration/finalize-schema-post-data-import.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/schema-migration/import-schema.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/yb-voyager-cli.md create mode 100644 docs/content/v2.25/yugabyte-voyager/reference/yb-voyager-pg-grant-migration-permissions.md create mode 100644 docs/content/v2.25/yugabyte-voyager/release-notes.md create mode 100644 docs/content/v2.25/yugabyte-voyager/rhel.md create mode 100644 docs/content/v2.25/yugabyte-voyager/ubuntu.md create mode 100644 docs/content/v2.25/yugabyte-voyager/voyager-troubleshoot.md diff --git a/docs/content/v2.25/_index.md b/docs/content/v2.25/_index.md new file mode 100644 index 000000000000..9b857e905401 --- /dev/null +++ b/docs/content/v2.25/_index.md @@ -0,0 +1,80 @@ +--- +title: YugabyteDB +description: YugabyteDB documentation is the best source to learn the most in-depth information about the YugabyteDB database, YugabyteDB Aeon, and YugabyteDB Anywhere. +headcontent: Open source cloud-native distributed SQL database +weight: 1 +type: indexpage +breadcrumbDisable: true +--- + +YugabyteDB is an open source PostgreSQL-compatible distributed database for cloud native apps. Resilient, scalable, and flexible, it can be deployed across public and private clouds as well as in Kubernetes environments. + +{{< sections/2-boxes >}} + {{< sections/bottom-image-box + title="Get started locally on your laptop" + description="Download and install YugabyteDB on your laptop, create a cluster, and build a sample application." + buttonText="Quick Start" + buttonUrl="/preview/quick-start/macos/" + imageAlt="Locally Laptop" imageUrl="/images/homepage/locally-laptop.svg" + >}} + + {{< sections/bottom-image-box + title="Explore distributed SQL" + description="Explore the features of distributed SQL, with examples." + buttonText="Explore" + buttonUrl="/preview/explore/" + imageAlt="Yugabyte cloud" imageUrl="/images/homepage/yugabyte-in-cloud.svg" + >}} +{{< /sections/2-boxes >}} + +## Develop for YugabyteDB + +{{< sections/3-boxes>}} + {{< sections/3-box-card + title="Build a Hello World application" + description="Use your favorite programming language to build an application that connects to a YugabyteDB cluster." + buttonText="Build" + buttonUrl="/preview/tutorials/build-apps/" + >}} + + {{< sections/3-box-card + title="Connect using drivers and ORMs" + description="Connect applications to your database using familiar third-party divers and ORMs and YugabyteDB Smart Drivers." + buttonText="Connect" + buttonUrl="/preview/drivers-orms/" + >}} + + {{< sections/3-box-card + title="Use familiar APIs" + description="Get up to speed quickly using YugabyteDB's PostgreSQL-compatible YSQL and Cassandra-based YCQL APIs." + buttonText="Develop" + buttonUrl="/preview/api/" + >}} + +{{< /sections/3-boxes >}} + +## Get under the hood + +{{< sections/3-boxes>}} + {{< sections/3-box-card + title="Architecture" + description="Learn how YugabyteDB achieves consistency and high availability." + buttonText="Learn More" + buttonUrl="/preview/architecture/" + >}} + + {{< sections/3-box-card + title="Secure" + description="Secure YugabyteDB with authentication, authorization, and encryption." + buttonText="Secure" + buttonUrl="/preview/secure/" + >}} + + {{< sections/3-box-card + title="Configure" + description="Configure core database services." + buttonText="Configure" + buttonUrl="/preview/reference/configuration/" + >}} + +{{< /sections/3-boxes >}} diff --git a/docs/content/v2.25/additional-features/_index.md b/docs/content/v2.25/additional-features/_index.md new file mode 100644 index 000000000000..8488da6f8372 --- /dev/null +++ b/docs/content/v2.25/additional-features/_index.md @@ -0,0 +1,35 @@ +--- +title: Advanced capabilities +headerTitle: Advanced capabilities +linkTitle: Advanced capabilities +description: How to deploy advanced capabilities for your YugabyteDB deployment. +headcontent: Deploy and manage advanced capabilities for your YugabyteDB universe +menu: + preview: + identifier: additional-features + parent: launch-and-manage + weight: 40 +type: indexpage +--- + +{{}} + + {{}} + + {{}} + + {{}} + +{{}} diff --git a/docs/content/v2.25/additional-features/change-data-capture/_index.md b/docs/content/v2.25/additional-features/change-data-capture/_index.md new file mode 100644 index 000000000000..c1aad68d43a8 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/_index.md @@ -0,0 +1,57 @@ +--- +title: Change data capture (CDC) +headerTitle: Change data capture (CDC) +linkTitle: Change data capture +description: CDC or Change data capture is a process to capture changes made to data in the database. +headcontent: Capture changes made to data in the database +tags: + feature: early-access +aliases: + - /preview/develop/change-data-capture/ +menu: + preview: + identifier: explore-change-data-capture + parent: additional-features + weight: 30 +type: indexpage +--- + +Change data capture (CDC) is used to determine and track the data that has changed so that action can be taken using the changed data. CDC is used in a number of scenarios: + +- **Microservice-oriented architectures**: Some microservices require a stream of changes to the data, and using CDC in YugabyteDB can provide consumable data changes to CDC subscribers. + +- **Asynchronous replication to remote systems**: Remote systems may subscribe to a stream of data changes and then transform and consume the changes. Maintaining separate database instances for transactional and reporting purposes can be used to manage workload performance. + +- **Multiple data center strategies**: Maintaining multiple data centers enables enterprises to provide high availability (HA). + +- **Compliance and auditing**: Auditing and compliance requirements can require you to use CDC to maintain records of data changes. + +YugabyteDB supports the following methods for reading change events. + +## PostgreSQL Replication Protocol + +This method uses the [PostgreSQL replication protocol](using-logical-replication/key-concepts/#replication-protocols), ensuring compatibility with PostgreSQL CDC systems. Logical replication operates through a publish-subscribe model. It replicates data objects and their changes based on the replication identity. + +It works as follows: + +1. Create Publications in the YugabyteDB cluster similar to PostgreSQL. +1. Deploy the YugabyteDB Connector in your preferred Kafka Connect environment. +1. The connector uses replication slots to capture change events and publishes them directly to a Kafka topic. + +{{}} +Learn about CDC in YugabyteDB using the [PostgreSQL Replication Protocol](./using-logical-replication/). +{{}} + +## YugabyteDB gRPC Replication Protocol + +This method involves setting up a change stream in YugabyteDB that uses the native gRPC replication protocol to publish change events. + +It works as follows: + +1. Establish a change stream in the YugabyteDB cluster using the yb_admin CLI commands. +1. Deploy the YugabyteDB gRPC Connector in your preferred Kafka Connect environment. +1. The connector captures change events using YugabyteDB's native gRPC replication and directly publishes them to a Kafka topic. + +{{}} +Learn about CDC in YugabyteDB using the [gRPC Replication Protocol](./using-yugabytedb-grpc-replication/). +{{}} diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/_index.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/_index.md new file mode 100644 index 000000000000..2e9e4eefa457 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/_index.md @@ -0,0 +1,126 @@ +--- +title: CDC using PostgreSQL replication protocol +headerTitle: CDC using PostgreSQL replication protocol +linkTitle: PostgreSQL protocol +description: CDC using YugabyteDB PostgreSQL replication protocol. +headcontent: Capture changes made to data in the database +tags: + feature: early-access +aliases: + - /preview/explore/change-data-capture/using-logical-replication/ +menu: + preview: + identifier: explore-change-data-capture-logical-replication + parent: explore-change-data-capture + weight: 240 +type: indexpage +showRightNav: true +--- + +## Overview + +YugabyteDB CDC captures changes made to data in the database and streams those changes to external processes, applications, or other databases. CDC allows you to track and propagate changes in a YugabyteDB database to downstream consumers based on its Write-Ahead Log (WAL). YugabyteDB CDC captures row-level changes resulting from INSERT, UPDATE, and DELETE operations in the configured database and publishes it further to be consumed by downstream applications. + +### Highlights + +#### Resilience + +YugabyteDB CDC with PostgreSQL Logical Replication provides resilience as follows: + +1. Following a failure of the application, server, or network, the replication can continue from any of the available server nodes. + +2. Replication continues from the transaction immediately after the transaction that was last acknowledged by the application. No transactions are missed by the application. + +#### Security + +Because YugabyteDB is using the PostgreSQL Logical Replication model, the following applies: + +- The CDC user persona will be a PostgreSQL replication client. + +- A standard replication connection is used for consumption, and all the server-side configurations for authentication, authorizations, SSL modes, and connection load balancing can be leveraged automatically. + +#### Guarantees + +CDC in YugabyteDB provides the following guarantees. + +| GUARANTEE | DESCRIPTION | +| :----- | :----- | +| Per-slot ordered delivery guarantee | Changes from transactions from all the tables that are part of the replication slot's publication are received in the order they were committed. This also implies ordered delivery across all the tablets that are part of the publication's table list. | +| At least once delivery | Changes from transactions are streamed at least once. Changes from transactions may be streamed again in case of restart after failure. For example, this can happen in the case of a Kafka Connect node failure. If the Kafka Connect node pushes the records to Kafka and crashes before committing the offset, it will again get the same set of records upon restart. | +| No gaps in change stream | Receiving changes that are part of a transaction with commit time *t* implies that you have already received changes from all transactions with commit time lower than *t*. Thus, receiving any change for a row with commit timestamp *t* implies that you have received all older changes for that row. | + +## Key concepts + +The YugabyteDB logical replication feature makes use of PostgreSQL concepts like replication slot, publication, replica identity, and so on. Understanding these key concepts is crucial for setting up and managing a logical replication environment effectively. + +{{}} +Review [key concepts](./key-concepts) of YugabyteDB CDC with logical replication. +{{}} + +## Getting started + +Get started with YugabyteDB logical replication using the YugabyteDB Connector. + +{{}} +[Get started](./get-started) using the connector. +{{}} + +## Monitoring + +You can monitor the activities and status of the deployed connectors using the http end points provided by YugabyteDB. + +{{}} +Learn how to [monitor](./monitor/) your CDC setup. +{{}} + +## YugabyteDB Connector + +To capture and stream your changes in YugabyteDB to an external system, you need a connector that can read the changes in YugabyteDB and stream it out. For this, you can use the YugabyteDB Connector, which is based on the Debezium platform. The connector is deployed as a set of Kafka Connect-compatible connectors, so you first need to define a YugabyteDB connector configuration and then start the connector by adding it to Kafka Connect. + +{{}} +For reference documentation, see [YugabyteDB Connector](./yugabytedb-connector/). +{{}} + +## Limitations + +- Log Sequence Number ([LSN](../using-logical-replication/key-concepts/#lsn-type)) Comparisons Across Slots. + + In the case of YugabyteDB, the LSN  does not represent the byte offset of a WAL record. Hence, arithmetic on LSN and any other usages of the LSN making this assumption will not work. Also, currently, comparison of LSN values from messages coming from different replication slots is not supported. + +- The following functions are currently unsupported: + + - `pg_current_wal_lsn` + - `pg_wal_lsn_diff` + - `IDENTIFY SYSTEM` + - `txid_current` + - `pg_stat_replication` + + Additionally, the functions responsible for pulling changes instead of the server streaming it are unsupported as well. They are described in [Replication Functions](https://www.postgresql.org/docs/15/functions-admin.html#FUNCTIONS-REPLICATION) in the PostgreSQL documentation. + +- Restriction on DDLs + + DDL operations should not be performed from the time of replication slot creation till the start of snapshot consumption of the last table. + +- There should be a primary key on the table you want to stream the changes from. + +- CDC is not supported on tables that are also the target of xCluster replication (see issue {{}}). However, both CDC and xCluster can work simultaneously on the same source tables. + + When performing [switchover](../../../deploy/multi-dc/async-replication/async-transactional-switchover/) or [failover](../../../deploy/multi-dc/async-replication/async-transactional-failover/) on xCluster, if you are using CDC, remember to also reconfigure CDC to use the new primary universe. + +- Currently, CDC doesn't support schema evolution for changes that require table rewrites (for example, [ALTER TYPE](../../../api/ysql/the-sql-language/statements/ddl_alter_table/#alter-type-with-table-rewrite)), or DROP TABLE and TRUNCATE TABLE operations after the replication slot is created. However, you can perform these operations before creating the replication slot without any issues. + +- YCQL tables aren't currently supported. Issue {{}}. + +- Support for point-in-time recovery (PITR) is tracked in issue {{}}. + +- Support for transaction savepoints is tracked in issue {{}}. + +- Support for enabling CDC on Read Replicas is tracked in issue {{}}. + +- Support for tablet splitting with logical replication is disabled from v2024.1.4 and v2024.2.1. Tracked in issue {{}}. + +- A replication slot should be consumed by at most one consumer at a time. However, there is currently no locking mechanism to enforce this. As a result, you should ensure that multiple consumers do not consume from a slot simultaneously. Tracked in issue {{}}. + +- If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the replica identity is not CHANGE, then CDC will throw an error while processing such events. + + To handle updates/deletes with a non-CHANGE replica identity, set the YB-TServer flag `cdc_send_null_before_image_if_not_exists` to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-configuration.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-configuration.md new file mode 100644 index 000000000000..aaa990334f13 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-configuration.md @@ -0,0 +1,43 @@ +--- +title: Advanced configurations for CDC using Logical Replication +headerTitle: Advanced configuration +linkTitle: Advanced configuration +description: Advanced Configurations for Logical Replication. +headcontent: Tune your CDC configuration +aliases: + - /preview/explore/change-data-capture/using-logical-replication/advanced-configuration/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: advanced-configurations + weight: 40 +type: docs +--- + +## YB-TServer flags + +You can use the following [YB-TServer flags](../../../../reference/configuration/yb-tserver/) to tune logical replication deployment configuration: + +- [ysql_yb_default_replica_identity](../../../../reference/configuration/yb-tserver/#ysql-yb-default-replica-identity) +- [cdcsdk_enable_dynamic_table_support](../../../../reference/configuration/yb-tserver/#cdcsdk-enable-dynamic-table-support) +- [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) +- [cdcsdk_max_consistent_records](../../../../reference/configuration/yb-tserver/#cdcsdk-max-consistent-records) +- [cdcsdk_vwal_getchanges_resp_max_size_bytes](../../../../reference/configuration/yb-tserver/#cdcsdk-vwal-getchanges-resp-max-size-bytes) + +## Retention of resources + +CDC retains resources (such as WAL segments) that contain information related to the changes involved in the transactions. These resources are typically retained until the consuming client acknowledges the receipt of all the transactions contained in that resource. + +Retaining resources has an impact on the system. Clients are expected to consume these transactions within configurable duration limits. Resources will be released if the duration exceeds these configured limits. + +Use the [cdc_intent_retention_ms](../../../../reference/configuration/yb-tserver/#cdc-intent-retention-ms) and [cdc_wal_retention_time_secs](../../../../reference/configuration/yb-tserver/#cdc-wal-retention-time-secs) flags to control the duration for which resources are retained. + +Resources are retained for each tablet of a table that is part of a database whose changes are being consumed using a replication slot. This includes those tables that may not be currently part of the publication specification. + +Starting from v2024.2.1, the default data retention for CDC is 8 hours, with support for maximum retention up to 24 hours. Prior to v2024.2.1, the default retention for CDC is 4 hours. + +{{< warning title="Important" >}} +When using FULL or DEFAULT replica identities, CDC preserves previous row values for UPDATE and DELETE operations. This is done by retaining history for each row in the database through a suspension of the compaction process. Compaction process is halted by setting retention barriers to prevent cleanup of history for those rows that are yet to be streamed to the CDC client. These retention barriers are dynamically managed and advanced only after the CDC events are streamed and explicitly acknowledged by the client, thus allowing compaction of history for streamed rows. + +The [cdc_intent_retention_ms](../../../../reference/configuration/yb-tserver/#cdc-intent-retention-ms) flag governs the maximum retention period (default 8 hours). Be aware that any interruption in CDC consumption for extended periods using these replica identities may degrade read performance. This happens because compaction activities are halted in the database when these replica identities are used, leading to inefficient key lookups as reads must traverse multiple SST files. +{{< /warning >}} diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-topic.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-topic.md new file mode 100644 index 000000000000..49095ecb6d31 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/advanced-topic.md @@ -0,0 +1,244 @@ +--- +title: Advanced topics +headerTitle: Advanced topics +linkTitle: Advanced topics +description: Advanced topics for Change Data Capture in YugabyteDB. +aliases: + - /preview/explore/change-data-capture/using-logical-replication/advanced-topic/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: advanced-topics + weight: 50 +type: docs +--- + +This section explores a range of topics designed to provide deeper insights and enhance your understanding of advanced functionalities. + +## Schema evolution + +A change in the schema of the tables (ALTER TABLE) being streamed is transparently handled by the database without manual intervention. + +This is illustrated in the following example. The client used for the example is [pg_recvlogical](../../../../explore/change-data-capture/#try-it-out). + +1. Create a table and create the replication slot. pg_recvlogical uses the test_decoding output plugin by default. + + ```sql + CREATE TABLE demo_table (id INT PRIMARY KEY); + ``` + + ```sh + pg_recvlogical -d yugabyte --slot=demo_slot --create-slot + ``` + +1. Start streaming the changes from the replication slot. + + ```sh + pg_recvlogical -d yugabyte --slot=demo_slot --start -f - + ``` + +1. In a new shell, start ysqlsh and insert some data. + + ```sh + bin/ysqlsh + ``` + + ```sql + INSERT INTO demo_table VALUES (1); + INSERT INTO demo_table VALUES (2); + ``` + + pg_recvlogical receives the inserts and prints it on the console. + + ```output + BEGIN 2 + TABLE public.demo_table: INSERT: id[integer]:1 + COMMIT 2 + BEGIN 3 + TABLE public.demo_table: INSERT: id[integer]:2 + COMMIT 3 + ``` + +1. Add a new column to the `demo_table` and insert some more rows. + + ```sql + ALTER TABLE demo_table ADD COLUMN address TEXT; + INSERT INTO demo_table VALUES (3, 'address1'); + INSERT INTO demo_table VALUES (4, 'address2'); + ``` + + Without any manual intervention, pg_recvlogical receives the inserts with the new schema and prints it on the console. + + ```output + BEGIN 4 + TABLE public.demo_table: INSERT: id[integer]:3 col_text[text]:'address1' + COMMIT 4 + BEGIN 5 + TABLE public.demo_table: INSERT: id[integer]:4 col_text[text]:'address2' + COMMIT 5 + ``` + +## Adding tables to publication + +The Publication's tables list can change in two ways. The first way is by adding a table to the publication by performing an alter publication. + +```sql +CREATE TABLE test_table_1(id INT PRIMARY KEY, aa INT, bb INT); +CREATE TABLE test_table_2(id INT PRIMARY KEY, aa INT, bb INT); + +CREATE PUBLICATION PUB FOR TABLE test_table_1; + +-- Start consumption through a replication slot. + +ALTER PUBLICATION ADD TABLE test_table_2; + +CREATE TABLE test_table_3(id INT PRIMARY KEY, aa INT, bb INT); + +ALTER PUBLICATION ADD TABLE test_table_3; +``` + +The second way is when a table is added to `ALL TABLES` publication upon creation. + +```sql +CREATE TABLE test_table_1(id INT PRIMARY KEY, aa INT, bb INT); + +CREATE PUBLICATION PUB FOR ALL TABLES; + +-- Start consumption through a replication slot. + +CREATE TABLE test_table_2(id INT PRIMARY KEY, aa INT, bb INT); +-- Since the publication was created for ALL TABLES, alter publication is not requirred. +``` + +### YugabyteDB semantics + +Unlike PostgreSQL, any changes made to the publication's tables list are not applied immediately in YugabyteDB. Instead the publication's tables list is periodically refreshed, and changes (if any) are applied. The refresh interval is managed using the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag. The default is 15 minutes (900 seconds). This means that any changes made to the publication's tables list will be applied after `cdcsdk_publication_list_refresh_interval_secs` in the worst case. + +Consider an example where the `cdcsdk_publication_list_refresh_interval_secs` flag is set to 900 seconds (15 minutes) and the publication's tables list is being refreshed every 15 minutes at 8:00 am, 8:15 am, 8:30 am, and so on. + +A change made to the publication's tables list at 8:01 am will be applied at 8:15 am. Equally, a change made to the publication's tables list at 8:14 am will also be applied at 8:15 am. + +You can change the value of this flag at run time, but the change becomes effective only after some time. For example, suppose you change the `cdcsdk_publication_list_refresh_interval_secs` flag from 900 seconds (15 minutes) to 300 seconds (5 minutes) at 8:01 am. + +This change will only be applied after 8:15 am. That is, the publication's tables list will next be refreshed at 8:15 am. Then the next refresh will happen at 8:20 am, and subsequent refreshes will take place every 5 minutes. + +### Required settings + +To enable dynamic table addition, perform the following steps: + +1. Set the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag to a lower value, such as 60 or 120 seconds. Note that the effect of this setting takes place after the upcoming publication refresh is performed. + + ```sh + ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 120 + ``` + +1. After you start receiving records from the newly added table in the publication, reset the `cdcsdk_publication_list_refresh_interval_secs` flag back to its original value (i.e 900 seconds). + + ```sh + ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 900 + ``` + +{{< note title="Important" >}} +If you lower the value of `cdcsdk_publication_list_refresh_interval_secs`, you should set the value of the flag back to its original value after you start receiving changes from the new table, as every refresh incurs overhead. +{{< /note >}} + +## Initial snapshot + +The [initial snapshot](../../../../architecture/docdb-replication/cdc-logical-replication/#initial-snapshot) data for a table is consumed by executing a snapshot query (SELECT statement). To ensure that the streaming phase continues exactly from where the snapshot left, this snapshot query is executed as of a specific database state. In YugabyteDB, this database state is represented by a value of `HybridTime`. Changes due to transactions with commit time strictly greater than this snapshot `HybridTime` will be consumed during the streaming phase. + +The consistent database state on which the snapshot query is to be executed is specified using the following command: + +```sql +SET LOCAL yb_read_time TO ' ht'; +``` + +This command should first be executed on the connection (session). The SELECT statement corresponding to the snapshot query should then be executed as part of the same transaction. The HybridTime value to use in the `SET LOCAL yb_read_time` command is the value of the `snapshot_name` field that is returned by the [CREATE REPLICATION SLOT](../../../../api/ysql/the-sql-language/statements/#streaming-replication-protocol-statements) command. + +You can also obtain this value by executing the following query: + +```sql +select yb_restart_commit_ht +from pg_replication_slots where slot_name = ; +``` + +For more information on the `pg_replication_slots` catalog view, refer to [pg_replication_slots](../monitor/#pg-replication-slots). + +### Using the HYBRID_TIME LSN + +YugabyteDB currently supports two types of [LSN](../key-concepts/#lsn-type), SEQUENCE and HYBRID_TIME. In HYBRID_TIME mode, you can specify a hybrid time value `t` in the `pg_lsn` format and the replication stream will begin streaming transactions committed after `t`. + +To obtain the current hybrid time value, use the `yb_get_current_hybrid_time_lsn()` function: + +```sql +SELECT * FROM yb_get_current_hybrid_time_lsn(); +``` + +This gives an output in terms of a long value. You can further convert this to `pg_lsn` format by defining the following method: + +```sql +CREATE OR REPLACE FUNCTION get_current_lsn_format() +RETURNS text AS $$ +DECLARE + ht_lsn bigint; + formatted_lsn text; +BEGIN + SELECT yb_get_current_hybrid_time_lsn() INTO ht_lsn; + SELECT UPPER(format('%s/%s', to_hex(ht_lsn >> 32), to_hex(ht_lsn & 4294967295))) + INTO formatted_lsn; + RETURN formatted_lsn; +END; +$$ LANGUAGE plpgsql; +``` + +Using the value from the method `get_current_lsn_format()`, you can now start your replication stream using: + +```sh +START_REPLICATION SLOT rs LOGICAL 62D63025/5462E000; +``` + +{{< note title="Important" >}} + +The replication slot being used must be created with LSN type `HYBRID_TIME`. + +The `yb_get_current_hybrid_time_lsn()` function only works with LSN type `HYBRID_TIME`, and will not work with `SEQUENCE`. + +{{< /note >}} + +### Permissions + +Only a superuser can execute the command to set the value of `yb_read_time`. + +For a non-superuser to be able to perform an initial snapshot, perform the following additional setup as a superuser (in addition to granting the required SELECT and USAGE privileges): + +```sql +CREATE ROLE appuser WITH LOGIN REPLICATION; +CREATE SCHEMA appuser AUTHORIZATION appuser; + +CREATE OR REPLACE PROCEDURE appuser.set_yb_read_time(value TEXT) +LANGUAGE plpgsql +AS $$ +BEGIN + EXECUTE 'SET LOCAL yb_read_time = ' || quote_literal(value); +END; +$$ +SECURITY DEFINER; + +CREATE OR REPLACE PROCEDURE appuser.disable_catalog_version_check() +LANGUAGE plpgsql +AS $$ +BEGIN + EXECUTE 'SET yb_disable_catalog_version_check = true'; +END; +$$ +SECURITY DEFINER; + +REVOKE EXECUTE ON PROCEDURE appuser.disable_catalog_version_check FROM PUBLIC; +GRANT EXECUTE ON PROCEDURE appuser.disable_catalog_version_check TO appuser; +``` + +With this setup, the command to be executed by the application user as part of the transaction prior to executing the snapshot SELECT query would be: + +```sql +CALL set_yb_read_time(' ht'); +CALL disable_catalog_version_check(); +``` diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/best-practices.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/best-practices.md new file mode 100644 index 000000000000..90edd1382272 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/best-practices.md @@ -0,0 +1,30 @@ +--- +title: Best Practices for logical replication +headerTitle: Best practices +linkTitle: Best practices +description: Best Practices for logical replication with Change Data Capture in YugabyteDB. +aliases: + - /preview/explore/change-data-capture/using-logical-replication/best-practices/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: best-practices-cdc + weight: 60 +type: docs +--- + +This section describes best practices to achieve scalability and performance while using CDC with logical replication. + +## Parallel consumption + +The recommended approach towards addressing the requirement of consuming changes in parallel from different tables is to use multiple replication slots. One replication slot per table could be used. Each replication slot is independent of the other and the changes from the tables can be consumed in parallel. + +## Fan out + +Consider the requirement where there are multiple applications, all of them requiring to consume changes from the same table. The recommended approach to address this requirement is to use one replication slot to consume the changes from the table and write the changes to a system like Kafka. The fan out can then be implemented with the multiple applications consuming from Kafka. + +## Load balancing consumption + +An application can connect to any of the YB-TServer nodes to consume from a replication slot. Furthermore, even in case of an interruption, a fresh connection can be made to a different node (different from the node from which consumption was previously happening) to continue consumption from the same replication slot. + +When there are multiple consuming applications each consuming from a different replication slot, it is best that the applications connect to different YB-TServer nodes in the cluster. This ensures better load balancing. The [YugabyteDB smart driver](../../../../drivers-orms/smart-drivers/) does this automatically, so it is recommended that applications use this smart driver. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/get-started.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/get-started.md new file mode 100644 index 000000000000..d9dd0ce79eb5 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/get-started.md @@ -0,0 +1,461 @@ +--- +title: Get started with CDC logical replication in YugabyteDB +headerTitle: Get started +linkTitle: Get started +description: Get started with Change Data Capture in YugabyteDB. +headcontent: Start using CDC with logical replication +aliases: + - /preview/explore/change-data-capture/using-logical-replication/get-started/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: get-started + weight: 20 +type: docs +--- + +Use the following steps to get started streaming data change events from a YugabyteDB database using a replication slot and the YugabyteDB connector. + +For an example of logical replication using the pg_recvlogical utility, see [Change data capture](../../../../explore/change-data-capture/). + +{{< note title="Note" >}} + +CDC via logical replication is supported in YugabyteDB starting from version 2024.1.1. + +{{< /note >}} + +## Get started with YugabyteDB connector + +This tutorial demonstrates how to use the YugabyteDB connector to monitor a YugabyteDB database. As the data in the database changes, you will see the resulting event streams. + +In this tutorial you will start the Debezium services, run a YugabyteDB instance with a basic example database, and use the connector to monitor the database for changes. + +**Prerequisites** + +- Docker is installed and running. + + This tutorial uses Docker and the Debezium container images to run the required services. You should use the latest version of Docker. For more information, see the [Docker Engine installation](https://docs.docker.com/engine/installation/) documentation. + +### Start the services + +Using the connector requires three separate services: [Zookeeper](http://zookeeper.apache.org/), [Kafka](https://kafka.apache.org), and the YugabyteDB connector service. + +In this tutorial, you will set up a single instance of each service using Docker and the Debezium container images. + +To start the services needed for this tutorial, you must: + +- [Start Zookeeper](#start-zookeeper) +- [Start Kafka](#start-kafka) +- [Start YugabyteDB](#start-yugabytedb) +- [Start Kafka Connect](#start-kafka-connect) + +#### Start Zookeeper + +Zookeeper is the first service you must start. + +Open a terminal and use it to start Zookeeper in a container. This command runs a new container using version `2.5.2.Final` of the `debezium/zookeeper` image: + +```sh +docker run -d --rm --name zookeeper -p 2181:2181 -p 2888:2888 -p 3888:3888 debezium/zookeeper:2.5.2.Final +``` + +#### Start Kafka + +After starting Zookeeper, you can start Kafka in a new container. + +Open a new terminal and use it to start Kafka in a container. This command runs a new container using version `2.5.2.Final` of the `debezium/kafka` image: + +```sh +docker run -d --rm --name kafka -p 9092:9092 --link zookeeper:zookeeper debezium/kafka:2.5.2.Final +``` + +{{< note title="Note" >}} + +In this tutorial, you will always connect to Kafka from in a Docker container. Any of these containers can communicate with the `kafka` container by linking to it. If you need to connect to Kafka from outside of a Docker container, you have to set the `-e` option to advertise the Kafka address through the Docker host (`-e ADVERTISED_HOST_NAME=` followed by either the IP address or resolvable host name of the Docker host). + +{{< /note >}} + +#### Start YugabyteDB + +At this point, you have started Zookeeper and Kafka, but you still need a database server from which the connector can capture changes. In this procedure, you start a YugabyteDB instance with an example database. The example uses sample data in SQL scripts that are included with your YugabyteDB installation in the `share` directory. + +Follow the [Quick Start](/preview/quick-start/macos/) to start an instance using yugabyted. + +{{< note title="Note" >}} + +You need to start the database on an IP that is resolvable by the docker containers. If you use the localhost address (that is, `127.0.0.1`) then if you deploy the connectors in the docker containers, they won't be able to talk to the database and will keep trying to connect to `127.0.0.1` inside the container. Use the [--advertise_address option for yugabyted](../../../../reference/configuration/yugabyted#flags-8) to specify the IP you want to start your database instance. + +For example, Linux users can use the following: + +```sh +./bin/yugabyted start --advertise_address $(hostname -i) +``` + +{{< /note >}} + +##### Use the YSQL command line client + +After starting YugabyteDB, use ysqlsh to create your database: + +1. Connect the client to the database process running on the IP you specified when you started up the database instance. + + ```sh + ./bin/ysqlsh -h + ``` + + You should see output similar to the following: + + ```output + ysqlsh (15.2-YB-{{}}-b0) + Type "help" for help. + + yugabyte=# + ``` + +1. Load the schema of the sample tables. + + ```sql + yugabyte=# \i share/schema.sql + CREATE TABLE + CREATE TABLE + CREATE TABLE + CREATE TABLE + ``` + +1. List the tables + + ```sql + yugabyte=# \d + ``` + + ```output + List of relations + Schema | Name | Type | Owner + --------+-----------------+----------+---------- + public | orders | table | yugabyte + public | orders_id_seq | sequence | yugabyte + public | products | table | yugabyte + public | products_id_seq | sequence | yugabyte + public | reviews | table | yugabyte + public | reviews_id_seq | sequence | yugabyte + public | users | table | yugabyte + public | users_id_seq | sequence | yugabyte + (8 rows) + ``` + +1. Load data in one of the tables and verify the count. + + ```sql + yugabyte=# \i share/products.sql + ``` + + ```output + yugabyte=# select count(*) from products; + count + ------- + 200 + (1 row) + ``` + +#### Start Kafka Connect + +After starting YugabyteDB, you start the Kafka Connect service. This service exposes a REST API to manage the YugabyteDB connector. + +1. Open a new terminal, and use it to start the Kafka Connect service in a container. + + The following command runs a new container using the `dz.2.5.2.yb.2024.1` version of the `quay.io/yugabyte/ybdb-debezium` image: + + ```sh + docker run -it --rm --name connect -p 8083:8083 -p 1976:1976 -e GROUP_ID=1 -e CONFIG_STORAGE_TOPIC=my_connect_configs -e OFFSET_STORAGE_TOPIC=my_connect_offsets -e STATUS_STORAGE_TOPIC=my_connect_statuses -e CLASSPATH=/kafka/connect/ --link zookeeper:zookeeper --link kafka:kafka quay.io/yugabyte/ybdb-debezium:dz.2.5.2.yb.2024.1 + ``` + +1. Verify that Kafka Connect started and is ready to accept connections. You should see output similar to the following: + + ```output + ... + 2024-07-19 12:04:33,044 INFO || Kafka version: 3.6.1 [org.apache.kafka.common.utils.AppInfoParser] + ... + 2024-07-19 12:04:33,661 INFO || [Worker clientId=connect-1, groupId=1] Starting connectors and tasks using config offset -1 [org.apache.kafka.connect.runtime.distributed.DistributedHerder] + 2024-07-19 12:04:33,661 INFO || [Worker clientId=connect-1, groupId=1] Finished starting connectors and tasks [org.apache.kafka.connect.runtime.distributed.DistributedHerder] + ``` + +1. Use the Kafka Connect REST API to check the status of the Kafka Connect service. + + Kafka Connect exposes a REST API to manage Debezium connectors. To communicate with the Kafka Connect service, you can use the `curl` command to send API requests to port 8083 of the Docker host (which you mapped to port 8083 in the `connect` container when you started Kafka Connect). + + Open a new terminal and check the status of the Kafka Connect service: + + ```sh + $ curl -H "Accept:application/json" localhost:8083/ + + {"version":"3.6.1","commit":"5e3c2b738d253ff5","kafka_cluster_id":"kafka-cluster-id"} + ``` + +{{< note title="Note" >}} + +These commands use `localhost`. If you are using a non-native Docker platform (such as Docker Toolbox), replace `localhost` with the IP address of your Docker host. + +{{< /note >}} + +### Deploy the YugabyteDB connector + +After starting the Debezium and YugabyteDB service, you are ready to deploy the YugabyteDB connector. To deploy the connector, do the following: + +- [Register the YugabyteDB connector to monitor the `yugabyte` database](#register-a-connector-to-monitor-yugabyte-database). +- [Watch the connector start](#watch-the-connector-start). + +#### Register a connector to monitor yugabyte database + +By registering the YugabyteDB connector, the connector will start monitoring the YugabyteDB database's table `products`. When a row in the table changes, Debezium generates a change event. + +{{< note title="Note" >}} + +In a production environment, you would typically either use the Kafka tools to manually create the necessary topics, including specifying the number of replicas, or you would use the Kafka Connect mechanism for customizing the settings of [auto-created](https://debezium.io/documentation/reference/2.5/configuration/topic-auto-create-config.html) topics. However, for this tutorial, Kafka is configured to automatically create the topics with just one replica. + +{{< /note >}} + +1. Review the configuration of the YugabyteDB connector that you will register. Before registering the connector, you should be familiar with its configuration. In the next step, you will register the following connector: + + ```json + { + "name": "ybconnector", + "config": { + "tasks.max":"1", + "connector.class": "io.debezium.connector.postgresql.YugabyteDBConnector", + "database.hostname":"'$(hostname -i)'", + "database.port":"5433", + "database.user": "yugabyte", + "database.password":"yugabyte", + "database.dbname":"yugabyte", + "topic.prefix":"dbserver1", + "snapshot.mode":"initial", + "table.include.list":"public.products", + "plugin.name":"yboutput", + "slot.name":"yb_replication_slot" + } + } + ``` + + - `name` - The name of the connector. + - `config` - The connector's configuration. + - `database.hostname` - The database host, which is the IP of the machine running YugabyteDB. If YugabyteDB were running on a normal network, you would specify the IP address or resolvable host name for this value. + - `topic.prefix` - A unique topic prefix. This name will be used as the prefix for all Kafka topics. + - `table.include.list` - Only changes in the table `products` of the schema `public` will be detected. + - `plugin.name` - [Plugin](../key-concepts/#output-plugin) to be used for replication. + - `slot.name` - Name of the [replication slot](../key-concepts/#replication-slot). + + For more information, see [YugabyteDB connector configuration properties](../yugabytedb-connector-properties). + +1. Open a new terminal and use the `curl` command to register the YugabyteDB connector. + + This command uses the Kafka Connect service API to submit a `POST` request against the `/connectors` resource with a `JSON` document that describes the new connector (called `ybconnector`). + + ```sh + curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" localhost:8083/connectors/ -d '{ + "name": "ybconnector", + "config": { + "tasks.max":"1", + "connector.class": "io.debezium.connector.postgresql.YugabyteDBConnector", + "database.hostname":"'$(hostname -i)'", + "database.port":"5433", + "database.user": "yugabyte", + "database.password":"yugabyte", + "database.dbname":"yugabyte", + "topic.prefix":"dbserver1", + "snapshot.mode":"initial", + "table.include.list":"public.products", + "plugin.name":"yboutput", + "slot.name":"yb_replication_slot" + } + }' + ``` + + {{< note title="Note" >}} +Windows users may need to escape the double-quotes. + {{< /note >}} + +1. Verify that `ybconnector` is included in the list of connectors: + + ```sh + $ curl -H "Accept:application/json" localhost:8083/connectors/ + + ["ybconnector"] + ``` + +#### Watch the connector start + +When you register a connector, it generates a large amount of log output in the Kafka Connect container. By reviewing this output, you can better understand the process that the connector goes through from the time it is created until it begins reading the change events. + +After registering the `ybconnector` connector, you can review the log output in the Kafka Connect container (`connect`) to track the connector's status. + +Kafka Connect reports some "errors". However, you can safely ignore these warnings: these messages just mean that new Kafka topics were created and that Kafka had to assign a new leader for each one: + +```output +2021-11-30 01:38:45,555 WARN || [Producer clientId=connector-producer-inventory-connector-0] Error while fetching metadata with correlation id 3 : {dbserver1=LEADER_NOT_AVAILABLE} [org.apache.kafka.clients.NetworkClient] +2021-11-30 01:38:45,691 WARN || [Producer clientId=connector-producer-inventory-connector-0] Error while fetching metadata with correlation id 9 : {dbserver1.public.orders=LEADER_NOT_AVAILABLE} [org.apache.kafka.clients.NetworkClient] +2021-11-30 01:38:45,813 WARN || [Producer clientId=connector-producer-inventory-connector-0] Error while fetching metadata with correlation id 13 : {dbserver1.public.users=LEADER_NOT_AVAILABLE} [org.apache.kafka.clients.NetworkClient] +2021-11-30 01:38:45,927 WARN || [Producer clientId=connector-producer-inventory-connector-0] Error while fetching metadata with correlation id 18 : {dbserver1.public.products=LEADER_NOT_AVAILABLE} [org.apache.kafka.clients.NetworkClient] +2021-11-30 01:38:46,043 WARN || [Producer clientId=connector-producer-inventory-connector-0] Error while fetching metadata with correlation id 22 : {dbserver1.public.reviews=LEADER_NOT_AVAILABLE} [org.apache.kafka.clients.NetworkClient] +``` + +### View change events + +After deploying the YugabyteDB connector, it starts monitoring the `yugabyte` database for data change events. + +For this tutorial, you will explore the `dbserver1.public.products` topic. + +#### View a change event + +Open a new terminal, and use it to start the watch-topic utility to watch the `dbserver1.public.products` topic from the beginning of the topic. + +The following command runs the `watch-topic` utility in a new container using the `2.5.2.Final` version of the `debezium/kafka` image: + +```sh +docker run -it --rm --name consumer --link zookeeper:zookeeper --link kafka:kafka debezium/kafka:2.5.2.Final watch-topic -a dbserver1.public.products +``` + +The `watch-topic` utility returns the event records from the `products` table. There will be 200 events, one for each row in the table which was snapshotted. Each event is formatted in JSON, because that is how you configured the Kafka Connect service. There are two JSON documents for each event: one for the key, and one for the value. + +You should see output similar to the following: + +```output.json +Using ZOOKEEPER_CONNECT=172.17.0.2:2181 +Using KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://172.17.0.7:9092 +Using KAFKA_BROKER=172.17.0.3:9092 +Contents of topic dbserver1.public.products: +... +{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int64","optional":false,"default":0,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":false,"name":"id","field":"id"},{"type":"struct","fields":[{"type":"int64","optional":true,"name":"io.debezium.time.MicroTimestamp","version":1,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"created_at","field":"created_at"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"category","field":"category"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"ean","field":"ean"},{"type":"struct","fields":[{"type":"double","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"price","field":"price"},{"type":"struct","fields":[{"type":"int32","optional":true,"default":5000,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"quantity","field":"quantity"},{"type":"struct","fields":[{"type":"double","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"rating","field":"rating"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"title","field":"title"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"vendor","field":"vendor"}],"optional":true,"name":"dbserver1.public.products.Value","field":"before"},{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int64","optional":false,"default":0,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":false,"name":"id","field":"id"},{"type":"struct","fields":[{"type":"int64","optional":true,"name":"io.debezium.time.MicroTimestamp","version":1,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"created_at","field":"created_at"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"category","field":"category"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"ean","field":"ean"},{"type":"struct","fields":[{"type":"double","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"price","field":"price"},{"type":"struct","fields":[{"type":"int32","optional":true,"default":5000,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"quantity","field":"quantity"},{"type":"struct","fields":[{"type":"double","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"rating","field":"rating"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"title","field":"title"},{"type":"struct","fields":[{"type":"string","optional":true,"field":"value"},{"type":"boolean","optional":false,"field":"set"}],"optional":true,"name":"vendor","field":"vendor"}],"optional":true,"name":"dbserver1.public.products.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":false,"field":"schema"},{"type":"string","optional":false,"field":"table"},{"type":"int64","optional":true,"field":"txId"},{"type":"int64","optional":true,"field":"lsn"},{"type":"int64","optional":true,"field":"xmin"}],"optional":false,"name":"io.debezium.connector.postgresql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.public.products.Envelope","version":1},"payload":{"before":null,"after":{"id":{"value":147,"set":true},"created_at":{"value":1500306107286000,"set":true},"category":{"value":"Doohickey","set":true},"ean":{"value":"6590063715","set":true},"price":{"value":44.4315141414441,"set":true},"quantity":{"value":5000,"set":true},"rating":{"value":4.6,"set":true},"title":{"value":"Mediocre Wool Toucan","set":true},"vendor":{"value":"Bradtke, Wilkinson and Reilly","set":true}},"source":{"version":"dz.2.5.2.yb.2024.1","connector":"postgresql","name":"dbserver1","ts_ms":1721400304248,"snapshot":"true","db":"yugabyte","sequence":"[null,\"2\"]","schema":"public","table":"products","txId":2,"lsn":2,"xmin":null},"op":"r","ts_ms":1721400309609,"transaction":null}} +... +``` + +{{< note title="Note" >}} + +This utility keeps watching the topic, so any new events will automatically appear as long as the utility is running. + +{{< /note >}} + +#### Update the database and view the update event + +Now that you have seen how the YugabyteDB connector captured the create events in the `yugabyte` database, change one of the records and see how the connector captures it. + +By completing this procedure, you will learn how to find details about what changed in a database commit, and how you can compare change events to determine when the change occurred in relation to other changes. + +1. In the terminal that is running ysqlsh, run the following statement: + + ```sql + update products set title = 'Enormous Granite Shiny Shoes' where id = 22; + ``` + +1. View the updated `products` table: + + ```sql + yugabyte=# select * from products where id = 22; + ``` + + ```output + id | created_at | category | ean | price | quantity | rating | title | vendor + ----+-------------------------+----------+---------------+------------------+----------+--------+------------------------------+--------------------------- + 22 | 2017-11-24 20:14:28.415 | Gizmo | 7595223735110 | 21.4245199604423 | 5000 | 4.2 | Enormous Granite Shiny Shoes | Mayer, Kiehn and Turcotte + (1 row) + ``` + +1. Switch to the terminal running `watch-topic` to see a new event. + + By changing a record in the `products` table, the YugabyteDB connector generated a new event. + + The details for the payload of the *update* event will look similar to the following (formatted for readability): + + ```json + { + "before": null, + "after": { + "id": { + "value": 22, + "set": true + }, + "created_at": null, + "category": null, + "ean": null, + "price": null, + "quantity": null, + "rating": null, + "title": { + "value": "Enormous Granite Shiny Shoes", + "set": true + }, + "vendor": null + } + } + ``` + +Note that the fields which were not updated are coming out as `null`. This is because the [REPLICA IDENTITY](../key-concepts/#replica-identity) of the table is `CHANGE` by default, where you only send the values of the updated columns in the change event. + +#### Delete a row and view the delete event + +1. In the terminal that is running ysqlsh, run the following statement: + + ```sql + delete from products where id = 22; + ``` + +1. Switch to the terminal running `watch-topic` to see two new events. By deleting a row in the `products` table, the YugabyteDB connector generated 2 new events. + + The details for the payload of the first event will look similar to the following (formatted for readability): + + ```json + { + "before": { + "id": { + "value": 22, + "set": true + }, + "created_at": { + "value": null, + "set": true + }, + "category": { + "value": null, + "set": true + }, + "ean": { + "value": null, + "set": true + }, + "price": { + "value": null, + "set": true + }, + "quantity": { + "value": 5000, + "set": true + }, + "rating": { + "value": null, + "set": true + }, + "title": { + "value": null, + "set": true + }, + "vendor": { + "value": null, + "set": true + } + }, + "after": null + } + ``` + +The second event will have a *key* but the *value* will be `null`; that is a [tombstone event](../yugabytedb-connector/#tombstone-events) generated by the YugabyteDB connector. + +### Clean up + +After you are finished with the tutorial, you can use Docker to stop all of the running containers. + +Run the following command: + +```sh +docker stop zookeeper kafka connect consumer +``` + +Docker stops each container. Because you used the `--rm` option when you started them, Docker also removes them. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/key-concepts.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/key-concepts.md new file mode 100644 index 000000000000..d94ec8bb66e2 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/key-concepts.md @@ -0,0 +1,136 @@ +--- +title: Key concepts - logical replication +headerTitle: Key concepts +linkTitle: Key concepts +description: Change Data Capture in YugabyteDB. +headcontent: PostgreSQL logical replication concepts +aliases: + - /preview/explore/change-data-capture/using-logical-replication/key-concepts/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: key-concepts + weight: 10 +type: docs +--- + +The YugabyteDB logical replication feature uses [PostgreSQL Logical Replication](https://www.postgresql.org/docs/15/logical-replication.html), which operates using a publish-subscribe model. Understanding the following key concepts will help you set up and manage a logical replication environment effectively. + +## Concepts + +### Replication slot + +A replication slot represents a stream of changes that can be replayed to a client in the order they were made on the origin server. Each slot streams a sequence of changes from a single database. + +In logical replication, the fundamental unit of data transmission is a transaction. A logical slot emits each change just once in normal operation. The current position of each slot is persisted only at checkpoint, so if a replication process is interrupted and restarts, even if the checkpoint or the starting Log Sequence Number ([LSN](#lsn-type)) falls in the middle of a transaction, **the entire transaction is retransmitted**. This behavior guarantees that clients receive complete transactions without missing any intermediate changes, maintaining data integrity across the replication stream​. Logical decoding clients are responsible for avoiding ill effects from handling the same message more than once. Clients may wish to record the last LSN they saw when decoding and skip over any repeated data or (when using the replication protocol) request that decoding start from that LSN rather than letting the server determine the start point. + +For more information, refer to [Replication slots](https://www.postgresql.org/docs/15/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS) in the PostgreSQL documentation. + +#### LSN type + +A Log Sequence Number (LSN) in YugabyteDB differs from what you may be accustomed to in PostgreSQL. In PostgreSQL, an LSN represents a specific 'location' in the WAL, and has significance that spans databases and replication slots. In YugabyteDB, an LSN uniquely identifies a change event, and the LSN is valid only in the context of a specific replication slot. Due to these differences, there are inherent limitations in how LSNs can be used. + +You can specify the type of LSN to use when you create a replication slot. YugabyteDB currently supports the following types: + +- SEQUENCE - (Default) PostgreSQL-style LSN that is valid in the context of a slot. It is a monotonic increasing number that determines the record in global order in the context of a slot. It can't be compared across two different slots. +- HYBRID_TIME - A hybrid time value which can be used natively with YugabyteDB. HYBRID_TIME is denoted by the HybridTime of the transaction commit record. All the records of the transaction that is streamed will have the same LSN as that of the commit record. You need to ensure that the changes of a transaction are applied in totality and the acknowledgement is sent only if the commit record of a transaction is processed. + +### Publication + +A publication is a set of changes generated from a table or a group of tables, and might also be described as a change set or replication set. Each publication exists in only one database. + +Publications are different from schemas and do not affect how the table is accessed. Each table can be added to multiple publications if needed. Publications may currently only contain tables. Objects must be added explicitly, except when a publication is created for ALL TABLES. + +For more information, refer to [Publication](https://www.postgresql.org/docs/15/logical-replication-publication.html#LOGICAL-REPLICATION-PUBLICATION) in the PostgreSQL documentation. + +### Output plugin + +Output plugins transform the data from the write-ahead log's internal representation into the format that can be consumed by replication clients. These plugins are notified about the change events that need to be processed and sent via various callbacks. These callbacks are only invoked when the transaction actually commits. + +YugabyteDB supports the following four output plugins: + +- `yboutput` +- `pgoutput` +- `test_decoding` +- `wal2json` + +All these plugins are pre-packaged with YugabyteDB and do not require any external installation. + +{{< note title="Note" >}} + +The plugin `yboutput` is YugabyteDB specific. It is similar to `pgoutput` in most aspects. The only difference being that replica identity `CHANGE` is not supported in `pgoutput`. All other plugins support replica identity `CHANGE`. + +{{}} + +For more information, refer to [Logical Decoding Output Plugins](https://www.postgresql.org/docs/15/logicaldecoding-output-plugin.html) in the PostgreSQL documentation. + +### LSN + +LSN (Log Sequence Number) in YugabyteDB is an unsigned 64-bit integer that uniquely identifies a change record or a transaction boundary record that is consumed from a given replication slot. + +In YugabyteDB, LSN values from different slots are considered unrelated and should not be compared. In YugabyteDB, LSN no longer represents the byte offset of a WAL record. + +LSN values for a single replication slot satisfy the following properties: + +- **Uniqueness** + + LSN values for the change and `COMMIT` records for a given replication slot are unique. In particular, changes from different tablets of the same or different tables will have unique LSN values for a replication slot. + +- **Ordering** + + LSN values can be compared ( `<`, `>`, `=` ). + + The LSN of the change records in a transaction will be strictly lower than the LSN of the COMMIT record of the same transaction. + + The LSNs of change records in a transaction will be in increasing order and will correspond to the order in which those changes were made in that transaction. That is, the LSN of an earlier change will have a strictly lower value than the LSN of a later change in the same transaction. This is the case even if the changes correspond to rows in different tablets of the same or different tables. + + For a given replication slot, the LSN of a `COMMIT` record of an earlier transaction will be strictly lower than the LSN of the `COMMIT` record of a later transaction. + +- **Determinism** + + For a given replication slot, the LSN value of a change record (or a transaction boundary record) remains the same for the lifetime of that replication slot. In particular, this is true across server and client restarts and client re-connections. Thus, LSN values for a single replication slot may be used to uniquely identify records that are consumed from that replication slot. The values can be compared for determining duplicates at the client side. + +### Replica identity + +Replica identity is a table-level parameter that controls the amount of information being written to the change records. YugabyteDB supports the following four replica identities: + +- CHANGE (default) +- DEFAULT +- FULL +- NOTHING + +The PostgreSQL replica identity `INDEX` is not supported in YugabyteDB. + +Replica identity `CHANGE` is the best performant and the default replica identity. The replica identity of a table can be changed by performing an ALTER TABLE. However, for a given slot, any ALTER TABLE performed to change the replica identity after the creation of the slot will have no effect. This means that the effective replica identity for any table for a slot, is the replica identity of the table that existed at the time of slot creation. A dynamically created table (a table created after slot creation) will have the default replica identity. For a replica identity modified after slot creation to take effect, a new slot will have to be created after performing the ALTER TABLE. + +The [ysql_yb_default_replica_identity](../../../../reference/configuration/yb-tserver/#ysql-yb-default-replica-identity) flag determines the default replica identity for user tables at the time of table creation. The default value is `CHANGE`. The purpose of this flag is to set the replica identities for dynamically created tables. In order to create a dynamic table with desired replica identity, the flag must be set accordingly and then the table must be created. + +{{< note title="Advisory" >}} +You should refrain from altering the replica identity of a dynamically created table for at least 5 minutes after its creation. +{{< /note >}} + +For more information, refer to [Replica Identity](../yugabytedb-connector/#replica-identity). + +For information on replica identity in PostgreSQL, refer to [REPLICA IDENTITY](https://www.postgresql.org/docs/15/sql-altertable.html#SQL-ALTERTABLE-REPLICA-IDENTITY) in the PostgreSQL documentation. + +### Replication protocols + +PostgreSQL has defined protocols for replication that need to be followed by clients to establish replication connection as well as message structures for streaming data. This includes the [Streaming Replication protocol](https://www.postgresql.org/docs/15/protocol-replication.html) and the [Logical Streaming Replication protocol](https://www.postgresql.org/docs/15/protocol-logical-replication.html). + +The logical streaming replication protocol sends individual transactions one-by-one. This means that all messages between a pair of `BEGIN` and `COMMIT` messages belong to the same transaction. + +YugabyteDB supports both the streaming replication protocols used in PostgreSQL to support logical replication, maintaining the same semantics described in PostgreSQL: + +- Streaming Replication Protocol - This protocol is followed by all output plugins. + +- Logical Streaming Replication Protocol - This protocol is followed by `pgoutput` and `yboutput`, in addition to the Streaming replication protocol. + +{{< note title="Note" >}} + +YugabyteDB does not support Physical Replication. + +{{< /note >}} + +## Learn more + +[CDC using Logical Replication architecture](../../../../architecture/docdb-replication/cdc-logical-replication/) diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/monitor.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/monitor.md new file mode 100644 index 000000000000..4702b6bc4b6e --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/monitor.md @@ -0,0 +1,119 @@ +--- +title: CDC monitoring in YugabyteDB +headerTitle: Monitor +linkTitle: Monitor +description: Monitor Change Data Capture in YugabyteDB. +aliases: + - /preview/explore/change-data-capture/using-logical-replication/monitor/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: monitor + weight: 30 +type: docs +--- + +## Catalog objects and views + +### pg_publication + +Contains all publication objects contained in the database. + +| Column name | Data type | Description | +| :----- | :----- | :----- | +| oid | oid | Row identifier | +| pubname | name | Name of the publication | +| pubowner | oid | OID of the owner. | +| puballtables | bool | If true, this publication includes all tables in the database including those added in the future. | +| pubinsert | bool | If true, INSERT operations are replicated for tables in the publication. | +| pubupdate | bool | If true, UPDATE operations are replicated for tables in the publication. | +| pubdelete | bool | If true, DELETE operations are replicated for tables in the publication. | +| pubtruncate | bool | If true, TRUNCATE operations are replicated for tables in the publication. | + +### pg_publication_rel + +Contains mapping between publications and tables. This is a many-to-many mapping. + +| Column name | Data type | Description | +| :----- | :----- | :----- | +| oid | oid | Row identifier. | +| prpubid | oid | OID of the publication. References pg_publication.oid. | +| prrelid| oid | OID of the relation. References pg_class.oid. | + +### pg_publication_tables + +Contains mapping between publications and tables. It is a wrapper over `pg_publication_rel` as it expands the publications defined as FOR ALL TABLES, so for such publications there will be a row for each eligible table. + +| Column name | Data type | Description | +| :----- | :----- | :----- | +| pubname | name | Name of publication. | +| schemaname | name | Name of schema containing table. | +| tablename | name | Name of table. | + +### pg_replication_slots + +Provides a list of all replication slots that currently exist on the database cluster, along with their metadata. + +| Column name | Data type | Description | +| :----- | :----- | :----- | +| slot_name | name | Name of the replication slot. | +| plugin | name | Output plugin name. | +| slot_type | text | Always logical. | +| datoid | oid | The OID of the database this slot is associated with. | +| database | text | The name of the database this slot is associated with. | +| temporary | boolean | True if this is a temporary replication slot. Temporary slots are automatically dropped on error or when the session has finished. | +| active | boolean | True if this slot is currently actively being used. In YSQL, an "active" replication slot means a slot which has been consumed at least once in a certain time frame. The time is defined using the `ysql_cdc_active_replication_slot_window_ms` flag, which has a default of 5 minutes. | +| active_pid | integer | The process ID of the session using this slot if the slot is currently actively being used. `NULL` if no replication process is ongoing. | +| xmin | xid | The oldest transaction that this slot needs the database to retain. | +| catalog_xmin | xid | Not applicable for YSQL. Always set to xmin. | +| restart_lsn | pg_lsn | The Log Sequence Number ([LSN](../key-concepts/#lsn-type)) of the oldest change record which still might be required by the consumer of this slot and thus won't be automatically removed during checkpoints. | +| confirmed_flush_lsn | pg_lsn | The LSN up to which the logical slot's consumer has confirmed receiving data. Data older than this is not available anymore. Transactions with commit LSN lower than the `confirmed_flush_lsn` are not available anymore. | +| yb_stream_id | text | UUID of the CDC stream | +| yb_restart_commit_ht | int8 | A uint64 representation of the commit Hybrid Time corresponding to the `restart_lsn`. This can be used by the client (like YugabyteDB connector) to perform a consistent snapshot (as of the `consistent_point`) in the case when a replication slot already exists. | + +### pg_stat_replication + +Displays information about active WAL senders, providing insights into the state of replication for each connected standby or logical replication client. + +| Column name | Data type | Description | +| :----- | :----- | :----- | +| pid | integer | Process ID of WAL sender process. | +| usesysid | oid | OID of the user logged into this WAL sender process. | +| usename | name | Name of the user logged into this WAL sender process. | +| application_name | text | Name of the application that is connected to this WAL sender. | +| client_addr | inet | IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine. | +| client_hostname | text | Host name of the connected client, as reported by a reverse DNS lookup of client_addr. This field will only be non-null for IP connections, and only when the [log_hostname](https://www.postgresql.org/docs/15/runtime-config-logging.html#GUC-LOG-HOSTNAME) configuration parameter is enabled. | +| client_port | integer | TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used. | +| backend_start | timestamp with time zone | Time when this process was started (that is, when the client connected to this WAL sender). | +| backend_xmin | xid | The oldest transaction the client is interested in. | +| state | text | Current WAL sender state. Always `streaming`. | +| sent_lsn | pg_lsn | Last write-ahead log location sent on this connection. | +| write_lsn | pg_lsn | The last LSN acknowledged by the logical replication client. | +| flush_lsn | pg_lsn | Same as `write_lsn`. | +| replay_lsn | pg_lsn | Same as `write_lsn`. | +| write_lag | interval | The difference between the timestamp of the latest record in WAL and the timestamp of the last acknowledged record. Since YugabyteDB does not differentiate between write, flush, or replay, this value is the same for all three lag metrics. | +| flush_lag | interval | Same as `write_lag`. | +| replay_lag | interval | Same as `write_lag`. | +| sync_priority | integer | Synchronous state of this standby server. Always 0, as logical replication only supports asynchronous replication. | +| sync_state | text | Synchronous state of this standby server. Always `async`. | +| reply_time | timestamp with time zone | Timestamp of the last reply message received from the client. | + +## CDC service metrics + +Provide information about the CDC service in YugabyteDB. + +| Metric name | Type | Description | +| :---- | :---- | :---- | +| cdcsdk_change_event_count | `long` | The number of records sent by the CDC Service. | +| cdcsdk_traffic_sent | `long` | Total traffic sent, in bytes. | +| cdcsdk_sent_lag_micros | `long` | This lag metric is calculated by subtracting the timestamp of the latest record in the WAL of a tablet from the last record sent to the CDC connector. | +| cdcsdk_expiry_time_ms | `long` | The time left to read records from WAL is tracked by the Stream Expiry Time (ms). | +| cdcsdk_flush_lag | `long` | This lag metric shows the difference between the timestamp of the latest record in the WAL and the replication slot's restart time.| + +CDC service metrics are only calculated for tablets that are of interest for a replication slot. By default, tablets are considered to be of interest if they are polled at least once in 4 hours. You can configure the frequency using the [cdcsdk_tablet_not_of_interest_timeout_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-tablet-not-of-interest-timeout-secs) YB-TServer flag. Metrics are calculated considering unpolled tablets until this timeout elapses. + +## Connector metrics + + + +Refer to [Monitoring](../yugabytedb-connector/#monitoring) for information on YugabyteDB connector metrics. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/transformers.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/transformers.md new file mode 100644 index 000000000000..162e2cde6e77 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/transformers.md @@ -0,0 +1,190 @@ +--- +title: YugabyteDB connector transformers +headerTitle: YugabyteDB connector transformers +linkTitle: Connector transformers +description: YugabyteDB connector transformers for Change Data Capture. +menu: + preview: + parent: yugabytedb-connector + identifier: yugabytedb-connector-transformers + weight: 70 +type: docs +--- + +The YugabyteDB Connector comes bundled with Single Message Transformers (SMTs). SMTs are applied to messages as they flow through Kafka Connect so that sinks understand the format in which data is sent. SMTs transform inbound messages after a source connector has produced them, but before they are written to Kafka. SMTs transform outbound messages before they are sent to a sink connector. + +The following SMTs are bundled with the connector jar file available on [GitHub releases](https://github.com/yugabyte/debezium/releases): + +* YBExtractNewRecordState +* PGCompatible + +{{< note title="Important" >}} + +These SMTs are only compatible with the [yboutput plugin](../key-concepts#output-plugin). + +{{< /note >}} + +## Example + +For simplicity, only `before` and `after` fields of the `payload` of the message published by the connector are mentioned in the following examples. Any information pertaining to the record schema, if it is the same as the standard Debezium connector for PostgreSQL, is skipped. + +Consider a table created using the following statement: + +```sql +CREATE TABLE test (id INT PRIMARY KEY, name TEXT, aura INT); +``` + +The following DML statements will be used to demonstrate payload in case of individual replica identities: + +```sql +-- statement 1 +INSERT INTO test VALUES (1, 'Vaibhav', 9876); + +-- statement 2 +UPDATE test SET aura = 9999 WHERE id = 1; + +-- statement 3 +UPDATE test SET name = 'Vaibhav Kushwaha', aura = 10 WHERE id = 1; + +-- statement 4 +UPDATE test SET aura = NULL WHERE id = 1; + +-- statement 5 +DELETE FROM test WHERE id = 1; +``` + +By default, the YugabyteDB CDC service publishes events with a schema that only includes columns that have been modified. The source connector then sends the value as `null` for columns that are missing in the payload. Each column payload includes a `set` field that is used to signal if a column has been set to `null` because it wasn't present in the payload from YugabyteDB. + +## YBExtractNewRecordState + +**Transformer class:** `io.debezium.connector.postgresql.transforms.YBExtractNewRecordState` + +The SMT `YBExtractNewRecordState` is used to flatten the records published by the connector and just keep the payload field in a flattened format. The flattened format can then be consumed by downstream connectors that do not support consuming the complex record format published by the Debezium connector. + +The following examples show what the payload would look like for each [replica identity](../key-concepts/#replica-identity). Note that in this example, as you have set the property `delete.tombstone.handling.mode` to `none` for the transformer, it will not drop the delete records from the stream. `YBExtractNewRecordState` is applied to the after field of an event; because the after field for a `DELETE` event is `null`, the output after applying this transformer on a `DELETE` event is also `null`. + +### CHANGE + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"aura":null} + +-- statement 5 +null +``` + +### DEFAULT + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +null +``` + +### FULL + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +null +``` + +## PGCompatible + +**Transformer class:** `io.debezium.connector.postgresql.transforms.PGCompatible` + +Some sink connectors may not understand the payload format published by the connector. `PGCompatible` transforms the payload to a format that is compatible with the format of standard change data events. Specifically, it transforms column schema and value to remove the `set` field and collapse the payload such that it only contains the data type schema and value. + +`PGCompatible` differs from `YBExtractNewRecordState` by recursively modifying all the fields in a payload. + +The following examples show what the payload would look like for each [replica identity](../key-concepts/#replica-identity). + +### CHANGE + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":null,"after":{"id":1,"name":null,"aura":9999} + +-- statement 3 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":null,"after":{"id":1,"name":null,"aura":null} + +-- statement 5 +"before":{"id":1,"name":null,"aura":null},"after":null +``` + +Note that for statement 2 and 4, the columns that were not updated as a part of the UPDATE statement are `null` in the output field. + +### DEFAULT + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +"before":{"id":1,"name":null,"aura":null},"after":null +``` + +### FULL + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":{"id":1,"name":"Vaibhav","aura":9876},"after":{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +"before":{"id":1,"name":"Vaibhav","aura":9999},"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":{"id":1,"name":"Vaibhav Kushwaha","aura":10},"after":{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +"before":{"id":1,"name":"Vaibhav Kushwaha","aura":null},"after":null +``` diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector-properties.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector-properties.md new file mode 100644 index 000000000000..2c8789a9a317 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector-properties.md @@ -0,0 +1,783 @@ +--- +title: YugabyteDB connector properties +headerTitle: YugabyteDB connector properties +linkTitle: Connector properties +description: YugabyteDB connector properties for Change Data Capture in YugabyteDB. +aliases: + - /preview/explore/change-data-capture/using-logical-replication/yugabytedb-connector-properties/ +menu: + preview: + parent: yugabytedb-connector + identifier: yugabytedb-connector-properties + weight: 70 +type: docs +--- + +The connector has many configuration properties that you can use to achieve the right connector behavior for your application. Many properties have default values. + +## Required configuration properties + +The following configuration properties are _required_ unless a default value is available. + +##### name + +Unique name for the connector. Attempting to register again with the same name will fail. This property is required by all Kafka Connect connectors. + +No default. + +##### connector.class + +The name of the Java class for the connector. Always use a value of `io.debezium.connector.postgresql.YugabyteDBConnector` for the YugabyteDB connector. + +No default. + +##### tasks.max + +The maximum number of tasks that should be created for this connector. The YugabyteDB connector always uses a single task and therefore does not use this value, so the default is always acceptable. + +Default: 1 + +##### plugin.name + +The name of the YugabyteDB [logical decoding plugin](../key-concepts/#output-plugin) installed on the YugabyteDB server. + +Supported values are `yboutput`, and `pgoutput`. + +Default: `decoderbufs` + +##### slot.name + +The name of the YugabyteDB logical decoding slot that was created for streaming changes from a particular plugin for a particular database/schema. The server uses this slot to stream events to the Debezium connector that you are configuring. + +Slot names can contain lower-case letters, numbers, and the underscore character. + +Default: `debezium` + +##### slot.drop.on.stop + +Whether or not to delete the logical replication slot when the connector stops in a graceful, expected way. The default behavior is that the replication slot remains configured for the connector when the connector stops. When the connector restarts, having the same replication slot enables the connector to start processing where it left off. + +Set to true in only testing or development environments. Dropping the slot allows the database to discard WAL segments. When the connector restarts it performs a new snapshot or it can continue from a persistent offset in the Kafka Connect offsets topic. + +Default: false + +##### publication.name + +The name of the YugabyteDB publication created for streaming changes when using `pgoutput`. + +This publication is created at start-up if it does not already exist and it includes all tables. Debezium then applies its own include/exclude list filtering, if configured, to limit the publication to change events for the specific tables of interest. The connector user must have superuser permissions to create this publication, so it is usually preferable to create the publication before starting the connector for the first time. + +If the publication already exists, either for all tables or configured with a subset of tables, Debezium uses the publication as it is defined. + +Default: `dbz_publication` + +##### database.hostname + +IP address or hostname of the YugabyteDB database server. This needs to be in the format `IP1:PORT1,IP2:PORT2,IP3:PORT3` + +No default. + +##### database.port + +Integer port number of the YugabyteDB database server. + +Default: 5433 + +##### database.user + +Name of the YugabyteDB database user for connecting to the YugabyteDB database server. + +No default. + +##### database.password + +Password to use when connecting to the YugabyteDB database server. + +No default. + +##### database.dbname + +The name of the YugabyteDB database from which to stream the changes. + +No default. + +##### topic.prefix + +Topic prefix that provides a namespace for the particular YugabyteDB database server or cluster in which Debezium is capturing changes. The prefix should be unique across all other connectors, as it is used as a topic name prefix for all Kafka topics that receive records from this connector. Only alphanumeric characters, hyphens, dots, and underscores must be used in the database server logical name. + +{{< warning title="Warning" >}} Do not change the value of this property. If you change the name value, after a restart, instead of continuing to emit events to the original topics, the connector emits subsequent events to topics whose names are based on the new value. {{< /warning >}} + +No default. + +##### schema.include.list + +An optional, comma-separated list of regular expressions that match names of schemas for which you **want** to capture changes. Any schema name not included in `schema.include.list` is excluded from having its changes captured. By default, all non-system schemas have their changes captured. + +To match the name of a schema, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire identifier for the schema; it does not match substrings that might be present in a schema name. + +If you include this property in the configuration, do not also set the `schema.exclude.list` property. + +No default. + +##### schema.exclude.list + +An optional, comma-separated list of regular expressions that match names of schemas for which you **do not** want to capture changes. Any schema whose name is not included in `schema.exclude.list` has its changes captured, with the exception of system schemas. + +To match the name of a schema, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire identifier for the schema; it does not match substrings that might be present in a schema name. + +If you include this property in the configuration, do not set the `schema.include.list` property. + +No default. + +##### table.include.list + +An optional, comma-separated list of regular expressions that match fully-qualified table identifiers for tables whose changes you want to capture. When this property is set, the connector captures changes only from the specified tables. Each identifier is of the form `schemaName.tableName`. By default, the connector captures changes in every non-system table in each schema whose changes are being captured. + +To match the name of a table, Debezium applies the regular expression that you specify as an anchored regular expression. That is, the specified expression is matched against the entire identifier for the table; it does not match substrings that might be present in a table name. + +If you include this property in the configuration, do not also set the `table.exclude.list` property. + +No default. + +##### table.exclude.list + +An optional, comma-separated list of regular expressions that match fully-qualified table identifiers for tables whose changes you do not want to capture. Each identifier is of the form `schemaName.tableName`. When this property is set, the connector captures changes from every table that you do not specify. + +To match the name of a table, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire identifier for the table; it does not match substrings that might be present in a table name. + +If you include this property in the configuration, do not set the `table.include.list` property. + +No default. + +##### column.include.list + +An optional, comma-separated list of regular expressions that match the fully-qualified names of columns that should be included in change event record values. Fully-qualified names for columns are of the form `schemaName.tableName.columnName`. + +To match the name of a column, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the expression is used to match the entire name string of the column; it does not match substrings that might be present in a column name. + +If you include this property in the configuration, do not also set the `column.exclude.list` property. + +No default. + +##### column.exclude.list + +An optional, comma-separated list of regular expressions that match the fully-qualified names of columns that should be excluded from change event record values. Fully-qualified names for columns are of the form `schemaName.tableName.columnName`. + +To match the name of a column, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the expression is used to match the entire name string of the column; it does not match substrings that might be present in a column name. + +If you include this property in the configuration, do not set the `column.include.list` property. + +No default + +##### skip.messages.without.change + +Specifies whether to skip publishing messages when there is no change in included columns. This would essentially filter messages if there is no change in columns included as per `column.include.list` or `column.exclude.list` properties. + +Note: Only works when REPLICA IDENTITY of the table is set to FULL. + +Default: false + +##### time.precision.mode + +Time, date, and timestamps can be represented with different kinds of precision: + +* `adaptive` captures the time and timestamp values exactly as in the database using either millisecond, microsecond, or nanosecond precision values based on the database column's type. +* `adaptive_time_microseconds` captures the date, datetime and timestamp values exactly as in the database using either millisecond, microsecond, or nanosecond precision values based on the database column's type. An exception is `TIME` type fields, which are always captured as microseconds. +* `connect` always represents time and timestamp values by using Kafka Connect built-in representations for `Time`, `Date`, and `Timestamp`, which use millisecond precision regardless of the database columns' precision. + +For more information, see [Temporal types](#temporal-types). + +Default: adaptive + +##### decimal.handling.mode + +Specifies how the connector should handle values for `DECIMAL` and `NUMERIC` columns: + +* `double` represents values by using double values, which might result in a loss of precision but which is easier to use. +* `string` encodes values as formatted strings, which are easy to consume but semantic information about the real type is lost. + +For more information, see [Decimal types](#decimal-types). + +Default: precise + +##### interval.handling.mode + +Specifies how the connector should handle values for interval columns: + +* `numeric` represents intervals using approximate number of microseconds. +* `string` represents intervals exactly by using the string pattern representation `PYMDTHMS`. For example: `P1Y2M3DT4H5M6.78S`. + +For more information, see [Basic types](#basic-types). + +Default: numeric + +##### database.sslmode + +Whether to use an encrypted connection to the YugabyteDB server. Options include: + +* `disable` uses an unencrypted connection. +* `allow` attempts to use an unencrypted connection first and, failing that, a secure (encrypted) connection. +* `prefer` attempts to use a secure (encrypted) connection first and, failing that, an unencrypted connection. +* `require` uses a secure (encrypted) connection, and fails if one cannot be established. +* `verify-ca` behaves like require but also verifies the server TLS certificate against the configured Certificate Authority (CA) certificates, or fails if no valid matching CA certificates are found. +* `verify-full` behaves like verify-ca but also verifies that the server certificate matches the host to which the connector is trying to connect. + +For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +Default: prefer + +##### database.sslcert + +The path to the file that contains the SSL certificate for the client. For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +No default. + +##### database.sslkey + +The path to the file that contains the SSL private key of the client. For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +No default. + +##### database.sslpassword + +The password to access the client private key from the file specified by `database.sslkey`. For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +No default. + +##### database.sslrootcert + +The path to the file that contains the root certificate(s) against which the server is validated. For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +No default. + +##### database.tcpKeepAlive + +Enable TCP keep-alive probe to verify that the database connection is still alive. For more information, see the [PostgreSQL documentation](https://www.postgresql.org/docs/15/static/libpq-connect.html). + +Default: true + +##### tombstones.on.delete + +Controls whether a delete event is followed by a tombstone event. + +* `true` - a delete operation is represented by a delete event and a subsequent tombstone event. + +* `false` - only a delete event is emitted. + +After a source record is deleted, emitting a tombstone event (the default behavior) allows Kafka to completely delete all events that pertain to the key of the deleted row in case log compaction is enabled for the topic. + +Default: true + +##### column.truncate.to.length.chars + +An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns. Set this property if you want to truncate the data in a set of columns when it exceeds the number of characters specified by the length in the property name. Set `length` to a positive integer value, for example, `column.truncate.to.20.chars`. + +The fully-qualified name of a column observes the following format: `..`. To match the name of a column, Debezium applies the regular expression that you specify as an anchored regular expression. That is, the specified expression is matched against the entire name string of the column; the expression does not match substrings that might be present in a column name. + +You can specify multiple properties with different lengths in a single configuration. + +Default: n/a + +##### column.mask.with.length.chars + +An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns. Set this property if you want the connector to mask the values for a set of columns, for example, if they contain sensitive data. Set `length` to a positive integer to replace data in the specified columns with the number of asterisk (`*`) characters specified by the length in the property name. Set length to `0` (zero) to replace data in the specified columns with an empty string. + +The fully-qualified name of a column observes the following format: schemaName.tableName.columnName. To match the name of a column, Debezium applies the regular expression that you specify as an anchored regular expression. That is, the specified expression is matched against the entire name string of the column; the expression does not match substrings that might be present in a column name. + +You can specify multiple properties with different lengths in a single configuration. + +Default: n/a + +##### column.mask.hash.hashAlgorithm.with.salt._salt_;
column.mask.hash.v2.hashAlgorithm.with.salt._salt_ + +An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns. Fully-qualified names for columns are of the form `..`. + +To match the name of a column Debezium applies the regular expression that you specify as an anchored regular expression. That is, the specified expression is matched against the entire name string of the column; the expression does not match substrings that might be present in a column name. In the resulting change event record, the values for the specified columns are replaced with pseudonyms. + +A pseudonym consists of the hashed value that results from applying the specified hashAlgorithm and salt. Based on the hash function that is used, referential integrity is maintained, while column values are replaced with pseudonyms. Supported hash functions are described in the [MessageDigest](https://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#MessageDigest) section of the Java Cryptography Architecture Standard Algorithm Name Documentation. + +In the following example, `CzQMA0cB5K` is a randomly selected salt: + +```column.mask.hash.SHA-256.with.salt.CzQMA0cB5K = inventory.orders.customerName, inventory.shipment.customerName``` + +If necessary, the pseudonym is automatically shortened to the length of the column. The connector configuration can include multiple properties that specify different hash algorithms and salts. + +Depending on the `hashAlgorithm` used, the salt selected, and the actual data set, the resulting data set might not be completely masked. + +Hashing strategy version 2 should be used to ensure fidelity if the value is being hashed in different places or systems. + +Default: n/a + +##### column.propagate.source.type + +An optional, comma-separated list of regular expressions that match the fully-qualified names of columns for which you want the connector to emit extra parameters that represent column metadata. When this property is set, the connector adds the following fields to the schema of event records: + +* __debezium.source.column.type +* __debezium.source.column.length +* __debezium.source.column.scale + +These parameters propagate a column's original type name and length (for variable-width types), respectively. + +Enabling the connector to emit this extra data can assist in properly sizing specific numeric or character-based columns in sink databases. + +The fully-qualified name of a column observes one of the following formats: `databaseName.tableName.columnName`, or `databaseName.schemaName.tableName.columnName`. + +To match the name of a column, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire name string of the column; the expression does not match substrings that might be present in a column name. + +Default: n/a + +##### datatype.propagate.source.type + +An optional, comma-separated list of regular expressions that specify the fully-qualified names of data types that are defined for columns in a database. When this property is set, for columns with matching data types, the connector emits event records that include the following extra fields in their schema: + +* __debezium.source.column.type +* __debezium.source.column.length +* __debezium.source.column.scale + +These parameters propagate a column's original type name and length (for variable-width types), respectively. + +Enabling the connector to emit this extra data can assist in properly sizing specific numeric or character-based columns in sink databases. + +The fully-qualified name of a column observes one of the following formats: `databaseName.tableName.typeName`, or `databaseName.schemaName.tableName.typeName`. + +To match the name of a data type, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire name string of the data type; the expression does not match substrings that might be present in a type name. + +For the list of YugabyteDB-specific data type names, see [Data type mappings](#data-type-mappings). + +Default: n/a + +##### message.key.columns + +A list of expressions that specify the columns that the connector uses to form custom message keys for change event records that it publishes to the Kafka topics for specified tables. + +By default, Debezium uses the primary key column of a table as the message key for records that it emits. In place of the default, or to specify a key for tables that lack a primary key, you can configure custom message keys based on one or more columns. + +To establish a custom message key for a table, list the table, followed by the columns to use as the message key. Each list entry takes the following format: + +`:,` + +To base a table key on multiple column names, insert commas between the column names. + +Each fully-qualified table name is a regular expression in the format `.`. + +The property can include entries for multiple tables. Use a semicolon to separate table entries in the list. + +The following example sets the message key for the tables `inventory.customers` and `purchase.orders`: + +`inventory.customers:pk1,pk2;(.*).purchaseorders:pk3,pk4` + +For the table `inventory.customer`, the columns `pk1` and `pk2` are specified as the message key. For the `purchaseorders` tables in any schema, the columns `pk3` and `pk4` server as the message key. + +There is no limit to the number of columns that you use to create custom message keys. However, it's best to use the minimum number that are required to specify a unique key. + +Note that having this property set and `REPLICA IDENTITY` set to `DEFAULT` on the tables, will cause the tombstone events to not be created properly if the key columns are not part of the primary key of the table. Setting `REPLICA IDENTITY` to `FULL` is the only solution. + +Default: _empty string_ + +##### publication.autocreate.mode + +Applies only when streaming changes by using the [pgoutput plugin](https://www.postgresql.org/docs/15/sql-createpublication.html). The setting determines how creation of a [publication](https://www.postgresql.org/docs/15/logical-replication-publication.html) should work. Specify one of the following values: + +* `all_tables` - If a publication exists, the connector uses it. If a publication does not exist, the connector creates a publication for all tables in the database for which the connector is capturing changes. For the connector to create a publication it must access the database through a database user account that has permission to create publications and perform replications. You grant the required permission by using the following SQL command `CREATE PUBLICATION FOR ALL TABLES;`. +* `disabled` - The connector does not attempt to create a publication. A database administrator or the user configured to perform replications must have created the publication before running the connector. If the connector cannot find the publication, the connector throws an exception and stops. +* `filtered` - If a publication exists, the connector uses it. If no publication exists, the connector creates a new publication for tables that match the current filter configuration as specified by the `schema.include.list`, `schema.exclude.list`, and `table.include.list`, and `table.exclude.list` connector configuration properties. + + For example: `CREATE PUBLICATION FOR TABLE `. If the publication exists, the connector updates the publication for tables that match the current filter configuration. For example: `ALTER PUBLICATION SET TABLE `. + +Default: `all_tables` + +##### replica.identity.autoset.values + +The setting determines the value for [replica identity](#replica-identity) at table level. + +This option will overwrite the existing value in database. A comma-separated list of regular expressions that match fully-qualified tables and replica identity value to be used in the table. + +Each expression must match the pattern `:`, where the table name could be defined as (`SCHEMA_NAME.TABLE_NAME`), and the replica identity values are: + +* `DEFAULT` - Records the old values of the columns of the primary key, if any. This is the default for non-system tables. +* `FULL` - Records the old values of all columns in the row +* `NOTHING` - Records no information about the old row. This is the default for system tables. + + For example: + + ```json + schema1.*:FULL,schema2.table2:NOTHING,schema2.table3:DEFAULT + ``` + +{{< warning title="Warning" >}} Tables in YugabyteDB will always have the replica identity present at the time of replication slot creation, it cannot be altered at runtime. If it needs to be altered, it will only be reflected on a new slot created after altering the replica identity. {{< /warning >}} + +Default: _empty string_ + +##### binary.handling.mode + +Specifies how binary (`bytea`) columns should be represented in change events: + +* `bytes` represents binary data as byte array. +* `base64` represents binary data as base64-encoded strings. +`base64-url-safe` represents binary data as base64-url-safe-encoded strings. +* `hex` represents binary data as hex-encoded (base16) strings. + +Default: bytes + +##### schema.name.adjustment.mode + +Specifies how schema names should be adjusted for compatibility with the message converter used by the connector. Possible settings: + +* `none` does not apply any adjustment. +* `avro` replaces the characters that cannot be used in the Avro type name with underscore. +* `avro_unicode` replaces the underscore or characters that cannot be used in the Avro type name with corresponding unicode like _uxxxx. Note: `_` is an escape sequence like backslash in Java. + +No default. + +##### field.name.adjustment.mode + +Specifies how field names should be adjusted for compatibility with the message converter used by the connector. Possible settings: + +* `none` does not apply any adjustment. +* `avro` replaces the characters that cannot be used in the Avro type name with underscore. +* `avro_unicode` replaces the underscore or characters that cannot be used in the Avro type name with corresponding unicode like _uxxxx. Note: `_` is an escape sequence like backslash in Java. + +For more information, see [Avro naming](https://debezium.io/documentation/reference/2.5/configuration/avro.html#avro-naming). + +No default. + +##### money.fraction.digits + +Specifies how many decimal digits should be used when converting PostgreSQL `money` type to `java.math.BigDecimal`, which represents the values in change events. Applicable only when `decimal.handling.mode` is set to `precise`. + +Default: 2 + +## Advanced configuration properties + +The following advanced configuration properties have defaults that work in most situations and therefore rarely need to be specified in the connector configuration. + +##### converters + +Enumerates a comma-separated list of the symbolic names of the custom converter instances that the connector can use. For example, `isbn`. + +You must set the converters property to enable the connector to use a custom converter. + +For each converter that you configure for a connector, you must also add a `.type` property, which specifies the fully-qualified name of the class that implements the converter interface. The `.type` property uses the following format: + +```properties +.type +``` + +For example: + +```properties +isbn.type: io.debezium.test.IsbnConverter +``` + +If you want to further control the behavior of a configured converter, you can add one or more configuration parameters to pass values to the converter. To associate any additional configuration parameter with a converter, prefix the parameter names with the symbolic name of the converter. For example, + +```properties +isbn.schema.name: io.debezium.YugabyteDB.type.Isbn +``` + +No default. + +##### snapshot.mode + +Specifies the criteria for performing a snapshot when the connector starts: + +* `initial` - The connector performs a snapshot only when no offsets have been recorded for the logical server name. +* `never` - The connector never performs snapshots. When a connector is configured this way, its behavior when it starts is as follows. If there is a previously stored Log Sequence Number ([LSN](../key-concepts/#lsn-type)) in the Kafka offsets topic, the connector continues streaming changes from that position. If no LSN has been stored, the connector starts streaming changes from the point in time when the YugabyteDB logical replication slot was created on the server. The never snapshot mode is useful only when you know all data of interest is still reflected in the WAL. +* `initial_only` - The connector performs an initial snapshot and then stops, without processing any subsequent changes. + +Default: `initial` + +##### snapshot.include.collection.list + +An optional, comma-separated list of regular expressions that match the fully-qualified names (`.`) of the tables to include in a snapshot. The specified items must be named in the connector's `table.include.list` property. This property takes effect only if the connector's `snapshot.mode` property is set to a value other than `never`. + +This property does not affect the behavior of incremental snapshots. + +To match the name of a table, Debezium applies the regular expression that you specify as an _anchored_ regular expression. That is, the specified expression is matched against the entire name string of the table; it does not match substrings that might be present in a table name. + +Default: All tables included in `table.include.list` + +##### snapshot.select.statement.overrides + +Specifies the table rows to include in a snapshot. Use the property if you want a snapshot to include only a subset of the rows in a table. This property affects snapshots only. It does not apply to events that the connector reads from the log. + +The property contains a comma-separated list of fully-qualified table names in the form `.`. For example: + +```properties +"snapshot.select.statement.overrides": "inventory.products,customers.orders" +``` + +For each table in the list, add a further configuration property that specifies the `SELECT` statement for the connector to run on the table when it takes a snapshot. The specified `SELECT` statement determines the subset of table rows to include in the snapshot. Use the following format to specify the name of this `SELECT` statement property: + +```properties +snapshot.select.statement.overrides.. +``` + +For example: + +```properties +snapshot.select.statement.overrides.customers.orders +``` + +For example, from a `customers.orders` table that includes the soft-delete column `delete_flag`, add the following properties if you want a snapshot to include only those records that are not soft-deleted: + +```properties +"snapshot.select.statement.overrides": "customer.orders", +"snapshot.select.statement.overrides.customer.orders": "SELECT * FROM [customers].[orders] WHERE delete_flag = 0 ORDER BY id DESC" +``` + +In the resulting snapshot, the connector includes only the records for which `delete_flag = 0`. + +No default. + +##### event.processing.failure.handling.mode + +Specifies how the connector should react to exceptions during processing of events: + +* `fail` propagates the exception, indicates the offset of the problematic event, and causes the connector to stop. +* `warn` logs the offset of the problematic event, skips that event, and continues processing. +* `skip` skips the problematic event and continues processing. + +Default: fail + +##### max.batch.size + +Positive integer value that specifies the maximum size of each batch of events that the connector processes. + +Default: 2048 + +##### max.queue.size + +Positive integer value that specifies the maximum number of records that the blocking queue can hold. When Debezium reads events streamed from the database, it places the events in the blocking queue before it writes them to Kafka. The blocking queue can provide backpressure for reading change events from the database in cases where the connector ingests messages faster than it can write them to Kafka, or when Kafka becomes unavailable. Events that are held in the queue are disregarded when the connector periodically records offsets. Always set the value of `max.queue.size` to be larger than the value of `max.batch.size`. + +Default: 8192 + +##### max.queue.size.in.bytes + +A long integer value that specifies the maximum volume of the blocking queue in bytes. By default, volume limits are not specified for the blocking queue. To specify the number of bytes that the queue can consume, set this property to a positive long value. + +If `max.queue.size` is also set, writing to the queue is blocked when the size of the queue reaches the limit specified by either property. For example, if you set `max.queue.size=1000`, and `max.queue.size.in.bytes=5000`, writing to the queue is blocked after the queue contains 1000 records, or after the volume of the records in the queue reaches 5000 bytes. + +Default: 0 + +##### poll.interval.ms + +Positive integer value that specifies the number of milliseconds the connector should wait for new change events to appear before it starts processing a batch of events. Defaults to 500 milliseconds. + +Default: 500 + +##### include.unknown.datatypes + +Specifies connector behavior when the connector encounters a field whose data type is unknown. The default behavior is that the connector omits the field from the change event and logs a warning. + +Set this property to `true` if you want the change event to contain an opaque binary representation of the field. This lets consumers decode the field. You can control the exact representation by setting the [binary handling mode](#connector-properties) property. + +{{< note title="Note" >}} Consumers risk backward compatibility issues when `include.unknown.datatypes` is set to `true`. Not only may the database-specific binary representation change between releases, but if the data type is eventually supported by Debezium, the data type will be sent downstream in a logical type, which would require adjustments by consumers. In general, when encountering unsupported data types, create a feature request so that support can be added. {{< /note >}} + +Default: false + +##### database.initial.statements + +A semicolon separated list of SQL statements that the connector executes when it establishes a JDBC connection to the database. To use a semicolon as a character and not as a delimiter, specify two consecutive semicolons, `;;`. + +The connector may establish JDBC connections at its own discretion. Consequently, this property is useful for configuring session parameters only, and not for executing DML statements. + +The connector does not execute these statements when it creates a connection for reading the transaction log. + +No default + +##### status.update.interval.ms + +Frequency for sending replication connection status updates to the server, given in milliseconds. The property also controls how frequently the database status is checked to detect a dead connection in case the database was shut down. + +Default: 10000 + +##### schema.refresh.mode + +Specify the conditions that trigger a refresh of the in-memory schema for a table. + +`columns_diff` is the safest mode. It ensures that the in-memory schema stays in sync with the database table's schema at all times. + +`columns_diff_exclude_unchanged_toast` instructs the connector to refresh the in-memory schema cache if there is a discrepancy with the schema derived from the incoming message, unless unchanged TOASTable data fully accounts for the discrepancy. + +This setting can significantly improve connector performance if there are frequently-updated tables that have TOASTed data that are rarely part of updates. However, it is possible for the in-memory schema to become outdated if TOASTable columns are dropped from the table. + +Default: `columns_diff` + +##### snapshot.delay.ms + +An interval in milliseconds that the connector should wait before performing a snapshot when the connector starts. If you are starting multiple connectors in a cluster, this property is useful for avoiding snapshot interruptions, which might cause re-balancing of connectors. + +No default + +##### snapshot.fetch.size + +During a snapshot, the connector reads table content in batches of rows. This property specifies the maximum number of rows in a batch. + +Default: 10240 + +##### slot.stream.params + +Semicolon separated list of parameters to pass to the configured logical decoding plugin. + +No default + +##### slot.max.retries + +If connecting to a replication slot fails, this is the maximum number of consecutive attempts to connect. + +Default: 6 + +##### slot.retry.delay.ms + +The number of milliseconds to wait between retry attempts when the connector fails to connect to a replication slot. + +Default: 10000 (10 seconds) + +##### unavailable.value.placeholder + +Specifies the constant that the connector provides to indicate that the original value is a toasted value that is not provided by the database. If the setting of `unavailable.value.placeholder` starts with the `hex:` prefix it is expected that the rest of the string represents hexadecimally encoded octets. + +Default: `__debezium_unavailable_value` + +##### provide.transaction.metadata + +Determines whether the connector generates events with transaction boundaries and enriches change event envelopes with transaction metadata. Specify true if you want the connector to do this. For more information, see [Transaction metadata](#transaction-metadata). + +Default: false + +##### flush.lsn.source + +Determines whether the connector should commit the LSN of the processed records in the source YugabyteDB database so that the WAL logs can be deleted. Specify `false` if you don't want the connector to do this. Please note that if set to `false` LSN will not be acknowledged by Debezium and as a result WAL logs will not be cleared which might result in disk space issues. User is expected to handle the acknowledgement of LSN outside Debezium. + +Default: true + +##### retriable.restart.connector.wait.ms + +The number of milliseconds to wait before restarting a connector after a retriable error occurs. + +Default: 30000 (30 seconds) + +##### skipped.operations + +A comma-separated list of operation types that will be skipped during streaming. The operations include: `c` for inserts/create, `u` for updates, `d` for deletes, `t` for truncates, and `none` to not skip any operations. By default, truncate operations are skipped. + +Default: t + +##### xmin.fetch.interval.ms + +How often, in milliseconds, the XMIN will be read from the replication slot. The XMIN value provides the lower bounds of where a new replication slot could start from. The default value of `0` disables tracking XMIN tracking. + +Default: 0 + +##### topic.naming.strategy + +The name of the TopicNamingStrategy class that should be used to determine the topic name for data change, schema change, transaction, heartbeat event etc., defaults to `SchemaTopicNamingStrategy`. + +Default: `io.debezium.schema.SchemaTopicNamingStrategy` + +##### topic.delimiter + +Specify the delimiter for topic name. + +Default: `.` + +##### topic.cache.size + +The size used for holding the topic names in bounded concurrent hash map. This cache will help to determine the topic name corresponding to a given data collection. + +Default: 10000 + +##### topic.heartbeat.prefix + +Controls the name of the topic to which the connector sends heartbeat messages. The topic name has this pattern: + +`.` + +For example, if the topic prefix is `fulfillment`, the default topic name is `__debezium-heartbeat.fulfillment`. + +Default: `__debezium-heartbeat` + +##### topic.transaction + +Controls the name of the topic to which the connector sends transaction metadata messages. The topic name has this pattern: + +`.` + +For example, if the `topic.prefix` is `fulfillment`, the default topic name is `fulfillment.transaction`. + +Default: transaction + +##### snapshot.max.threads + +Specifies the number of threads that the connector uses when performing an initial snapshot. To enable parallel initial snapshots, set the property to a value greater than 1. In a parallel initial snapshot, the connector processes multiple tables concurrently. This feature is incubating. + +Default: 1 + +##### custom.metric.tags + +The custom metric tags will accept key-value pairs to customize the MBean object name which should be appended the end of regular name, each key would represent a tag for the MBean object name, and the corresponding value would be the value of that tag the key is. For example: `k1=v1,k2=v2`. + +No default + +##### errors.max.retries + +The maximum number of retries on retriable errors (for example, connection errors) before failing (-1 = no limit, 0 = disabled, > 0 = num of retries). + +Default: 60 + +##### slot.lsn.type + +The type of LSN to use for the specified replication slot: + +* SEQUENCE - A monotonic increasing number that determines the record in global order in the context of a slot. +* HYBRID_TIME - A hybrid time value that can be used to compare transactions across slots. + +##### streaming.mode + +Specifies whether the connector should stream changes using a single slot or multiple slots in parallel. + +* `default` uses a single task to stream all changes. +* `parallel` uses multi task mode and streams changes using the number of specified replication slots. + +{{< note title="Important" >}} + +When deploying the connector using `parallel` streaming mode, you need to ensure that the `table.include.list` only contains one table for which the streaming is supposed to happen in parallel. + +{{< /note >}} + +{{< note title="Usage with snapshot" >}} + +If `snapshot.mode` is set to `initial` or `initial_only`, you need to ensure that the configuration also contains a valid value for the configuration property `primary.key.hash.columns`. + +{{< /note >}} + +##### slot.names + +A list of slot names, provided as comma-separated values, to be used by each task when using `streaming.mode=parallel`. This property applies only when `streaming.mode` is set to `parallel`; otherwise it has no effect. + +No default. + +##### publication.names + +A list of publication names, provided as comma-separated values, to be used by each task when using `streaming.mode=parallel`. This property applies only when `streaming.mode` is set to `parallel`; otherwise it has no effect. + +No default. + +##### slot.ranges + +A range of slots to be used by each task when using `streaming.mode=parallel`, provided as tablet hash code ranges separated by semi colons. This property applies only when `streaming.mode` is set to `parallel`; otherwise it has no effect. + +No default. + +For example, suppose you have a table with 3 tablets where the tablets have hash ranges of `[0,21845)`, `[21845,43690)`, and `[43690,65536)`. The value for this configuration would be `slot.ranges=0,21845;21845,43690;43690,65536`. + +##### primary.key.hash.columns + +The columns of the table which constitute the hash part of the primary key. This property is only valid when `streaming.mode` is set to `parallel`. + +## Pass-through configuration properties + +The connector also supports pass-through configuration properties that are used when creating the Kafka producer and consumer. + +Be sure to consult the [Kafka documentation](https://kafka.apache.org/documentation.html) for all of the configuration properties for Kafka producers and consumers. The YugabyteDB connector does use the [new consumer configuration properties](https://kafka.apache.org/documentation.html#consumerconfigs). diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector.md b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector.md new file mode 100644 index 000000000000..439f6ae3f9df --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-logical-replication/yugabytedb-connector.md @@ -0,0 +1,1734 @@ +--- +title: YugabyteDB connector +headerTitle: YugabyteDB connector +linkTitle: YugabyteDB connector +description: YugabyteDB connector for Change Data Capture in YugabyteDB. +aliases: + - /preview/explore/change-data-capture/using-logical-replication/yugabytedb-connector/ +menu: + preview: + parent: explore-change-data-capture-logical-replication + identifier: yugabytedb-connector + weight: 70 +type: docs +rightNav: + hideH4: true +--- + +The YugabyteDB Connector is based on the Debezium Connector, and captures row-level changes in the schemas of a YugabyteDB database using the PostgreSQL replication protocol. + +The first time it connects to a YugabyteDB server, the connector takes a consistent snapshot of all schemas. After that snapshot is complete, the connector continuously captures row-level changes that insert, update, and delete database content, and that were committed to a YugabyteDB database. The connector generates data change event records and streams them to Kafka topics. For each table, the default behavior is that the connector streams all generated events to a separate Kafka topic for that table. Applications and services consume data change event records from that topic. + +## Overview + +YugabyteDB CDC using logical decoding is a mechanism that allows the extraction of changes that were committed to the transaction log and the processing of these changes in a user-friendly manner with the help of a [PostgreSQL output plugin](https://www.postgresql.org/docs/15/logicaldecoding-output-plugin.html). The output plugin enables clients to consume the changes. + +The YugabyteDB connector contains two main parts that work together to read and process database changes: + +* You must configure a replication slot that uses your chosen output plugin before running the YugabyteDB server. The plugin can be one of the following: + + + * `yboutput` is the plugin packaged with YugabyteDB. It is maintained by Yugabyte and is always present with the distribution. + + * `pgoutput` is the standard logical decoding output plugin in PostgreSQL 10+. It is maintained by the PostgreSQL community, and used by PostgreSQL itself for logical replication. YugabyteDB bundles this plugin with the standard distribution so it is always present and no additional libraries need to be installed. The YugabyteDB connector interprets the raw replication event stream directly into change events. + + +* Java code (the actual Kafka Connect connector) that reads the changes produced by the chosen logical decoding output plugin. It uses the [streaming replication protocol](https://www.postgresql.org/docs/15/protocol-replication.html), by means of the YugabyteDB JDBC driver. + +The connector produces a change event for every row-level insert, update, and delete operation that was captured, and sends change event records for each table in a separate Kafka topic. Client applications read the Kafka topics that correspond to the database tables of interest, and can react to every row-level event they receive from those topics. + +YugabyteDB normally purges write-ahead log (WAL) segments after some period of time. This means that the connector does not have the complete history of all changes that have been made to the database. Therefore, when the YugabyteDB connector first connects to a particular YugabyteDB database, it starts by performing a consistent snapshot of each of the configured tables. After the connector completes the snapshot, it continues streaming changes from the exact point at which the snapshot was made. This way, the connector starts with a consistent view of all of the data, and does not omit any changes that were made while the snapshot was being taken. + +The connector is tolerant of failures. As the connector reads changes and produces events, it records the Log Sequence Number ([LSN](../key-concepts/#lsn-type)) for each event. If the connector stops for any reason (including communication failures, network problems, or crashes), upon restart the connector continues reading the WAL where it last left off. + +{{< tip title="Use UTF-8 encoding" >}} + +Debezium supports databases with UTF-8 character encoding only. With a single-byte character encoding, it's not possible to correctly process strings that contain extended ASCII code characters. + +{{< /tip >}} + +## How the connector works + +To optimally configure and run a Debezium connector, it is helpful to understand how the connector performs snapshots, streams change events, determines Kafka topic names, and uses metadata. + +### Security + +To use the Debezium connector to stream changes from a YugabyteDB database, the connector must operate with specific privileges in the database. Although one way to grant the necessary privileges is to provide the user with `superuser` privileges, doing so potentially exposes your YugabyteDB data to unauthorized access. Rather than granting excessive privileges to the Debezium user, it is best to create a dedicated Debezium replication user to which you grant specific privileges. + +For more information about configuring privileges for the Debezium replication user, see [Setting up permissions](#setting-up-permissions). + +### Snapshots + +Most YugabyteDB servers are configured to not retain the complete history of the database in the WAL segments. This means that the YugabyteDB connector would be unable to see the entire history of the database by reading only the WAL. Consequently, the first time that the connector starts, it performs an initial consistent snapshot of the database. + +#### Default workflow behavior of initial snapshots + +The default behavior for performing a snapshot consists of the following steps. You can change this behavior by setting the `snapshot.mode` [connector configuration property](../yugabytedb-connector-properties/#advanced-configuration-properties) to a value other than `initial`. + +1. Start a transaction. +2. Set the transaction read time to the [consistent point](../../../../architecture/docdb-replication/cdc-logical-replication/#initial-snapshot) associated with the replication slot. +3. Execute snapshot through the execution of a `SELECT` query. +4. Generate a `READ` event for each row and write to the appropriate table-specific Kafka topic. +5. Record successful completion of the snapshot in the connector offsets. + +If the connector fails, is rebalanced, or stops after Step 1 begins but before Step 5 completes, upon restart the connector begins a new snapshot. After the connector completes its initial snapshot, the YugabyteDB connector continues streaming from the position that it read in Step 2. This ensures that the connector does not miss any updates. If the connector stops again for any reason, upon restart, the connector continues streaming changes from where it previously left off. + +The following table describes the options for the `snapshot.mode` connector configuration property. + +| Option | Description | +| :--- | :--- | +| `never` | The connector never performs snapshots. When a connector is configured this way, its behavior when it starts is as follows. If there is a previously stored LSN in the Kafka offsets topic, the connector continues streaming changes from that position. If no LSN has been stored, the connector starts streaming changes from the point in time when the YugabyteDB logical replication slot was created on the server. The `never` snapshot mode is beneficial only when you know all data of interest is still reflected in the WAL. | +| `initial` (default) | The connector performs a database snapshot when no Kafka offsets topic exists. After the database snapshot completes the Kafka offsets topic is written. If there is a previously stored LSN in the Kafka offsets topic, the connector continues streaming changes from that position. | +| `initial_only` | The connector performs a database snapshot and stops before streaming any change event records. If the connector had started but did not complete a snapshot before stopping, the connector restarts the snapshot process and stops when the snapshot completes. | + +### Streaming changes + +The YugabyteDB connector typically spends the vast majority of its time streaming changes from the YugabyteDB server to which it is connected. This mechanism relies on [PostgreSQL's replication protocol](https://www.postgresql.org/docs/15/protocol-replication.html). This protocol enables clients to receive changes from the server as they are committed in the server's transaction logs. + +Whenever the server commits a transaction, a separate server process invokes a callback function from the [logical decoding plugin](../key-concepts/#output-plugin). This function processes the changes from the transaction, converts them to a specific format and writes them on an output stream, which can then be consumed by clients. + +The YugabyteDB connector acts as a YugabyteDB client. When the connector receives changes it transforms the events into Debezium create, update, or delete events that include the LSN of the event. The YugabyteDB connector forwards these change events in records to the Kafka Connect framework, which is running in the same process. The Kafka Connect process asynchronously writes the change event records in the same order in which they were generated to the appropriate Kafka topic. + +Periodically, Kafka Connect records the most recent offset in another Kafka topic. The offset indicates source-specific position information that Debezium includes with each event. For the YugabyteDB connector, the LSN recorded in each change event is the offset. + +When Kafka Connect gracefully shuts down, it stops the connectors, flushes all event records to Kafka, and records the last offset received from each connector. When Kafka Connect restarts, it reads the last recorded offset for each connector, and starts each connector at its last recorded offset. When the connector restarts, it sends a request to the YugabyteDB server to send the events starting just after that position. + +### Logical decoding plugin support + +As of YugabyteDB v2024.1.1 and later, YugabyteDB supports the [yboutput plugin](../key-concepts/#output-plugin), a native output plugin for logical decoding. + +Additionally, YugabyteDB also supports the PostgreSQL `pgoutput` plugin natively. This means that the YugabyteDB connector can work with an existing setup configured using `pgoutput`. + +### Topic names + +By default, the YugabyteDB connector writes change events for all `INSERT`, `UPDATE`, and `DELETE` operations that occur in a table to a single Apache Kafka topic that is specific to that table. The connector names change event topics as _topicPrefix.schemaName.tableName_. + +The components of a topic name are as follows: + +* _topicPrefix_ - the topic prefix as specified by the `topic.prefix` configuration property. +* _schemaName_ - the name of the database schema in which the change event occurred. +* _tableName_ - the name of the database table in which the change event occurred. + +For example, suppose that `dbserver` is the topic prefix in the configuration for a connector that is capturing changes in a YugabyteDB installation that has a `yugabyte` database and an `inventory` schema that contains four tables: `products`, `products_on_hand`, `customers`, and `orders`. The connector would stream records to these four Kafka topics: + +* `dbserver.inventory.products` +* `dbserver.inventory.products_on_hand` +* `dbserver.inventory.customers` +* `dbserver.inventory.orders` + +Now suppose that the tables are not part of a specific schema but were created in the default public YugabyteDB schema. The names of the Kafka topics would be: + +* `dbserver.public.products` +* `dbserver.public.products_on_hand` +* `dbserver.public.customers` +* `dbserver.public.orders` + +The connector applies similar naming conventions to label its [transaction metadata topics](#transaction-metadata). + +If the default topic names don't meet your requirements, you can configure custom topic names. To configure custom topic names, you specify regular expressions in the logical topic routing SMT. For more information about using the logical topic routing SMT to customize topic naming, see the Debezium documentation on [Topic routing](https://debezium.io/documentation/reference/2.5/transformations/topic-routing.html). + +### Transaction metadata + +Debezium can generate events that represent transaction boundaries and that enrich data change event messages. + +{{< note title="Limits on when Debezium receives transaction metadata" >}} + +Debezium registers and receives metadata only for transactions that occur _after you deploy the connector_. Metadata for transactions that occur before you deploy the connector is not available. + +{{< /note >}} + +For every transaction `BEGIN` and `END`, Debezium generates an event containing the following fields: + +* `status` - `BEGIN` or `END`. +* `id` - String representation of the unique transaction identifier composed of YugabyteDB transaction ID itself and LSN of given operation separated by colon, that is, the format is `txID:LSN`. +* `ts_ms` - The time of a transaction boundary event (`BEGIN` or `END` event) at the data source. If the data source does not provide Debezium with the event time, then the field instead represents the time at which Debezium processes the event. +* `event_count` (for `END` events) - total number of events emitted by the transaction. +* `data_collections` (for `END` events) - an array of pairs of `data_collection` and `event_count` that provides the number of events emitted by changes originating from given data collection. + +For example: + +```output.json +{ + "status": "BEGIN", + "id": "571:53195829", + "ts_ms": 1486500577125, + "event_count": null, + "data_collections": null +} + +{ + "status": "END", + "id": "571:53195832", + "ts_ms": 1486500577691, + "event_count": 2, + "data_collections": [ + { + "data_collection": "s1.a", + "event_count": 1 + }, + { + "data_collection": "s2.a", + "event_count": 1 + } + ] +} +``` + +Unless overridden via the `transaction.topic` option, transaction events are written to the topic and named __.transaction. + +#### Change data event enrichment + +When transaction metadata is enabled the data message `Envelope` is enriched with a new `transaction` field. This field provides information about every event in the form of a composite of fields: + +* `id` - string representation of unique transaction identifier +* `total_order` - absolute position of the event among all events generated by the transaction +* `data_collection_order` - the per-data collection position of the event among all events emitted by the transaction + +Following is an example of a message: + +```output.json +{ + "before": null, + "after": { + "pk": "2", + "aa": "1" + }, + "source": { + ... + }, + "op": "c", + "ts_ms": "1580390884335", + "transaction": { + "id": "571:53195832", + "total_order": "1", + "data_collection_order": "1" + } +} +``` + +## Data change events + +The YugabyteDB connector generates a data change event for each row-level `INSERT`, `UPDATE`, and `DELETE` operation. Each event contains a key and a value. The structure of the key and the value depends on the table that was changed. + +Debezium and Kafka Connect are designed around _continuous streams of event messages_. However, the structure of these events may change over time, which can be difficult for consumers to handle. To address this, each event contains the schema for its content or, if you are using a schema registry, a schema ID that a consumer can use to obtain the schema from the registry. This makes each event self-contained. + +The following skeleton JSON shows the basic four parts of a change event. However, how you configure the Kafka Connect converter that you choose to use in your application determines the representation of these four parts in change events. A `schema` field is in a change event only when you configure the converter to produce it. Likewise, the event key and event payload are in a change event only if you configure a converter to produce it. If you use the JSON converter and you configure it to produce all four basic change event parts, change events have this structure: + +```output.json +{ + "schema": { --> 1 + ... + }, + "payload": { --> 2 + ... + }, + "schema": { --> 3 + ... + }, + "payload": { --> 4 + ... + } +} +``` + +The following table describes the content of the change events. + +| Item | Field name | Description | +| :--: | :--------- | :---------- | +| 1 | `schema` | The first `schema` field is part of the event key. It specifies a Kafka Connect schema that describes what is in the event key's `payload` portion. In other words, the first `schema` field describes the structure of the primary key, or the unique key if the table does not have a primary key, for the table that was changed. | +| 2 | `payload` | The first `payload` field is part of the event key. It has the structure described by the previous `schema` field and it contains the key for the row that was changed. | +| 3 | `schema` | The second `schema` field is part of the event value. It specifies the Kafka Connect schema that describes what is in the event value's `payload` portion. In other words, the second `schema` describes the structure of the row that was changed. Typically, this schema contains nested schemas. | +| 4 | `payload` | The second `payload` field is part of the event value. It has the structure described by the previous `schema` field and it contains the actual data for the row that was changed. | + +By default, the connector streams change event records to [Kafka topics](#topic-names) with names that are the same as the event's originating table. + +{{< note title="Note" >}} + +Starting with Kafka 0.10, Kafka can optionally record the event key and value with the timestamp at which the message was created (recorded by the producer) or written to the log by Kafka. + +{{< /note >}} + +{{< warning title="Warning" >}} + +The YugabyteDB connector ensures that all Kafka Connect schema names adhere to the Avro schema name format. This means that the logical server name must start with a Latin letter or an underscore, that is, `a-z`, `A-Z`, or `_`. Each remaining character in the logical server name and each character in the schema and table names must be a Latin letter, a digit, or an underscore, that is, `a-z`, `A-Z`, `0-9`, or `_`. If there is an invalid character it is replaced with an underscore character. + +This can lead to unexpected conflicts if the topic prefix, a schema name, or a table name contains invalid characters, and the only characters that distinguish names from one another are invalid and thus replaced with underscores. + +{{< /warning >}} + +### Change event keys + +For a given table, the change event's key has a structure that contains a field for each column in the primary key of the table at the time the event was created. Alternatively, if the table has `REPLICA IDENTITY` set to `FULL` there is a field for each unique key constraint. + +Consider a `customers` table defined in the `public` database schema and the example of a change event key for that table. + +**Example table:** + +```sql +CREATE TABLE customers ( + id SERIAL, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + PRIMARY KEY(id) +); +``` + +#### Example change event key + +If the `topic.prefix` connector configuration property has the value `YugabyteDB_server`, every change event for the `customers` table while it has this definition has the same key structure, which in JSON looks like this: + +```output.json +{ + "schema": { --> 1 + "type": "struct", + "name": "YugabyteDB_server.public.customers.Key", --> 2 + "optional": false, --> 3 + "fields": [ --> 4 + { + "name": "id", + "index": "0", + "schema": { + "type": "INT32", + "optional": "false" + } + } + ] + }, + "payload": { --> 5 + "id": "1" + }, +} +``` + +**Description of a change event key:** + +| Item | Field name | Description | +| :--- | :--------- | :---------- | +| 1 | schema | The schema portion of the key specifies a Kafka Connect schema that describes what is in the key's `payload` portion. | +| 2 | YugabyteDB_server.public.customers.Key | Name of the schema that defines the structure of the key's payload. This schema describes the structure of the primary key for the table that was changed. Key schema names have the format _connector-name.database-name.table-name.Key_. In this example:
`YugabyteDB_server` is the name of the connector that generated this event.
`public` is the schema which contains the table that was changed.
`customers` is the table that was updated. | +| 3 | optional | Indicates whether the event key must contain a value in its `payload` field. In this example, a value in the key's payload is required. | +| 4 | fields | Specifies each field that is expected in the payload, including each field's name, index, and schema. | +| 5 | payload | Contains the key for the row for which this change event was generated. In this example, the key, contains a single `id` field whose value is `1`. | + +{{< note title="Note" >}} + +Although the `column.exclude.list` and `column.include.list` connector configuration properties allow you to capture only a subset of table columns, all columns in a primary or unique key are always included in the event's key. + +{{< /note >}} + +{{< warning title="Warning" >}} + +CDC is not supported for tables without primary keys. + +{{< /warning >}} + +### Change event values + +The value in a change event is a bit more complicated than the key. Like the key, the value has a `schema` section and a `payload` section. The `schema` section contains the schema that describes the `Envelope` structure of the `payload` section, including its nested fields. Change events for operations that create, update or delete data all have a value payload with an envelope structure. + +Consider the same sample table that was used to show an example of a change event key: + +```sql +CREATE TABLE customers ( + id SERIAL, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + PRIMARY KEY(id) +); +``` + +The value portion of a change event for a change to this table varies according to the `REPLICA IDENTITY` setting and the operation that the event is for. + +### Replica Identity + +[REPLICA IDENTITY](https://www.postgresql.org/docs/15/sql-altertable.html#SQL-ALTERTABLE-REPLICA-IDENTITY) is a YugabyteDB-specific table-level setting that determines the amount of information that is available to the logical decoding plugin for `UPDATE` and `DELETE` events. More specifically, the `REPLICA IDENTITY` setting controls what (if any) information is available for the previous values of the table columns involved, whenever an `UPDATE` or `DELETE` event occurs. + +There are 4 possible values for `REPLICA IDENTITY`: + +* `CHANGE` - Emitted events for `UPDATE` operations will only contain the value of the changed column along with the primary key column with no previous values present. `DELETE` operations will only contain the previous value of the primary key column in the table. +* `DEFAULT` - The default behavior is that only `DELETE` events contain the previous values for the primary key columns of a table. For an `UPDATE` event, no previous values will be present and the new values will be present for all the columns in the table. +* `FULL` - Emitted events for `UPDATE` and `DELETE` operations contain the previous values of all columns in the table. +* `NOTHING` - Emitted events for `UPDATE` and `DELETE` operations do not contain any information about the previous value of any table column. + +{{< note title="Note">}} + +The pgoutput plugin does not support replica identity CHANGE. + +The PostgreSQL replica identity `INDEX` is not supported in YugabyteDB. + +{{< /note >}} + +For information on setting the replica identity of tables, refer to [Replica identity](../key-concepts/#replica-identity). + +#### Message formats for replica identities + +Consider the following employee table into which a row is inserted, subsequently updated, and deleted: + +```sql +CREATE TABLE employee ( + employee_id INT PRIMARY KEY, + employee_name VARCHAR, + employee_dept TEXT); + +INSERT INTO employee VALUES (1001, 'Alice', 'Packaging'); + +UPDATE employee SET employee_name = 'Bob' WHERE employee_id = 1001; + +DELETE FROM employee WHERE employee_id = 1001; +``` + +{{< tabpane text=true >}} + + {{% tab header="CHANGE" lang="change" %}} + +**yboutput plugin** + + + + + + + + + + + + + + +
INSERT UPDATE DELETE
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+        "value": "Packaging",
+        "set": true
+    }
+  }
+  "op": "c"
+}
+
+
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": null
+  }
+  "op": "u"
+}
+
+
+
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": null,
+    "employee_dept": null
+  },
+  "after": null,
+  "op": "d"
+}
+
+
+ + {{% /tab %}} + + {{% tab header="DEFAULT" lang="default" %}} + +**yboutput plugin** + + + + + + + + + + + + + + +
INSERT UPDATE DELETE
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+        "value": "Packaging",
+        "set": true
+    }
+  }
+  "op": "c"
+}
+
+
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": null
+  }
+  "op": "u"
+}
+
+
+
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": null,
+    "employee_dept": null
+  },
+  "after": null,
+  "op": "d"
+}
+
+
+ +**pgoutput plugin** + + + + + + + + + + + + + + + +
INSERT UPDATE DELETE
+
+{
+  "before": null,
+  "after": {
+    "employee_id": 1001,
+    "employee_name": "Alice",
+    "employee_dept": "Packaging"
+  }
+  "op": "c"
+}
+
+
+
+{
+  "before": null,
+  "after": {
+    "employee_id": 1001,
+    "employee_name": "Bob",
+    "employee_dept": "Packaging"
+  }
+  "op": "u"
+}
+
+
+
+{
+  "before": {
+    "employee_id": 1001
+  },
+  "after": null,
+  "op": "d"
+}
+
+
+ + {{% /tab %}} + + {{% tab header="FULL" lang="full" %}} + +**yboutput plugin** + + + + + + + + + + + + + + +
INSERT UPDATE DELETE
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+        "value": "Packaging",
+        "set": true
+    }
+  }
+  "op": "c"
+}
+
+
+
+{
+  "before": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  }
+  "op": "u"
+}
+
+
+
+{
+  "before": {
+    "employee_id": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "after": null,
+  "op": "d"
+}
+
+
+ +**pgoutput plugin** + + + + + + + + + + + + + + +
INSERT UPDATE DELETE
+
+{
+  "before": null,
+  "after": {
+    "employee_id": 1001,
+    "employee_name": "Alice",
+    "employee_dept": "Packaging",
+    }
+  }
+  "op": "c"
+}
+
+
+
+{
+  "before": {
+    "employee_id": 1001,
+    "employee_name": "Alice",
+    "employee_dept": "Packaging"
+  },
+  "after": {
+    "employee_id": 1001,
+    "employee_name": "Bob",
+    "employee_dept": "Packaging"
+  }
+  "op": "u"
+}
+
+
+
+{
+  "before": {
+    "employee_id": 1001,
+    "employee_name": "Bob",
+    "employee_dept": "Packaging"
+  },
+  "after": null,
+  "op": "d"
+}
+
+
+ + {{% /tab %}} + + {{% tab header="NOTHING" lang="nothing" %}} + +**yboutput plugin** + + + + + + + + + + +
INSERT
+
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+        "value": "Packaging",
+        "set": true
+    }
+  }
+  "op": "c"
+}
+
+
+ +**pgoutput plugin** + + + + + + + + + + +
INSERT
+
+{
+  "before": null,
+  "after": {
+    "employee_id": 1001,
+    "employee_name": "Alice",
+    "employee_dept": "Packaging",
+  }
+  "op": "c"
+}
+
+
+ + {{% /tab %}} + +{{< /tabpane >}} + +{{< note title="Note" >}} + +If `UPDATE` and `DELETE` operations will be performed on a table in publication without any replica identity (that is, `REPLICA IDENTITY` set to `NOTHING`), then the operations will cause an error on the publisher. For more details, see [Publication](https://www.postgresql.org/docs/15/logical-replication-publication.html). + +{{< /note >}} + +### *create* events + +The following example shows the value portion of a change event that the connector generates for an operation that creates data in the `customers` table: + +```output.json +{ + "schema": { --> 1 + "type": "struct", + "fields": [ + { + "type": "struct", + "fields": [ + { + "type": "int32", + "optional": false, + "field": "id" + }, + { + "type": "string", + "optional": false, + "field": "first_name" + }, + { + "type": "string", + "optional": false, + "field": "last_name" + }, + { + "type": "string", + "optional": false, + "field": "email" + } + ], + "optional": true, + "name": "YugabyteDB_server.inventory.customers.Value", --> 2 + "field": "before" + }, + { + "type": "struct", + "fields": [ + { + "type": "int32", + "optional": false, + "field": "id" + }, + { + "type": "string", + "optional": false, + "field": "first_name" + }, + { + "type": "string", + "optional": false, + "field": "last_name" + }, + { + "type": "string", + "optional": false, + "field": "email" + } + ], + "optional": true, + "name": "YugabyteDB_server.inventory.customers.Value", + "field": "after" + }, + { + "type": "struct", + "fields": [ + { + "type": "string", + "optional": false, + "field": "version" + }, + { + "type": "string", + "optional": false, + "field": "connector" + }, + { + "type": "string", + "optional": false, + "field": "name" + }, + { + "type": "int64", + "optional": false, + "field": "ts_ms" + }, + { + "type": "boolean", + "optional": true, + "default": false, + "field": "snapshot" + }, + { + "type": "string", + "optional": false, + "field": "db" + }, + { + "type": "string", + "optional": false, + "field": "schema" + }, + { + "type": "string", + "optional": false, + "field": "table" + }, + { + "type": "int64", + "optional": true, + "field": "txId" + }, + { + "type": "int64", + "optional": true, + "field": "lsn" + }, + { + "type": "int64", + "optional": true, + "field": "xmin" + } + ], + "optional": false, + "name": "io.debezium.connector.postgresql.Source", --> 3 + "field": "source" + }, + { + "type": "string", + "optional": false, + "field": "op" + }, + { + "type": "int64", + "optional": true, + "field": "ts_ms" + } + ], + "optional": false, + "name": "YugabyteDB_server.public.customers.Envelope" --> 4 + }, + "payload": { --> 5 + "before": null, --> 6 + "after": { --> 7 + "id": 1, + "first_name": "Anne", + "last_name": "Kretchmar", + "email": "annek@noanswer.org" + }, + "source": { --> 8 + "version": "2.5.2.Final", + "connector": "YugabyteDB", + "name": "YugabyteDB_server", + "ts_ms": 1559033904863, + "snapshot": true, + "db": "postgres", + "sequence": "[\"24023119\",\"24023128\"]", + "schema": "public", + "table": "customers", + "txId": 555, + "lsn": 24023128, + "xmin": null + }, + "op": "c", --> 9 + "ts_ms": 1559033904863 --> 10 + } +} +``` + +The following table describes the create event value fields. + +| Item | Field name | Description | +| :---- | :------ | :------------ | +| 1 | schema | The value's schema, which describes the structure of the value's payload. A change event's value schema is the same in every change event that the connector generates for a particular table. | +| 2 | name | In the schema section, each name field specifies the schema for a field in the value's payload.

`YugabyteDB_server.inventory.customers.Value` is the schema for the payload's _before_ and _after_ fields. This schema is specific to the customers table.

Names of schemas for _before_ and _after_ fields are of the form `logicalName.tableName.Value`, which ensures that the schema name is unique in the database. This means that when using the [Avro Converter](https://www.confluent.io/hub/confluentinc/kafka-connect-avro-converter), the resulting Avro schema for each table in each logical source has its own evolution and history. | +| 3 | name | `io.debezium.connector.postgresql.Source` is the schema for the payload's `source` field. This schema is specific to the YugabyteDB connector. The connector uses it for all events that it generates. | +| 4 | name | `YugabyteDB_server.inventory.customers.Envelope` is the schema for the overall structure of the payload, where `YugabyteDB_server` is the connector name, `public` is the schema, and `customers` is the table. | +| 5 | payload | The value's actual data. This is the information that the change event is providing.

It may appear that the JSON representations of the events are much larger than the rows they describe. This is because the JSON representation must include the schema and the payload portions of the message. However, by using the Avro converter, you can significantly decrease the size of the messages that the connector streams to Kafka topics. | +| 6 | before | An optional field that specifies the state of the row before the event occurred. When the op field is `c` for create, as it is in this example, the `before` field is `null` as this change event is for new content.
{{< note title="Note" >}}Whether or not this field is available is dependent on the [REPLICA IDENTITY](#replica-identity) setting for each table.{{< /note >}} | +| 7 | after | An optional field that specifies the state of the row after the event occurred. In this example, the `after` field contains the values of the new row's `id`, `first_name`, `last_name`, and `email` columns. | +| 8 | source | Mandatory field that describes the source metadata for the event. This field contains information that you can use to compare this event with other events, with regard to the origin of the events, the order in which the events occurred, and whether events were part of the same transaction. The source metadata includes:
  • Debezium version
  • Connector type and name
  • Database and table that contains the new row
  • Stringified JSON array of additional offset information. The first value is always the last committed LSN, the second value is always the current LSN. Either value may be null.
  • Schema name
  • If the event was part of a snapshot
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 9 | op | Mandatory string that describes the type of operation that caused the connector to generate the event. In this example, `c` indicates that the operation created a row. Valid values are:
  • `c` = create
  • `r` = read (applies to only snapshots)
  • `u` = update
  • `d` = delete
| +| 10 | ts_ms | Optional field that displays the time at which the connector processed the event. The time is based on the system clock in the JVM running the Kafka Connect task.

In the `source` object, `ts_ms` indicates the time that the change was made in the database. By comparing the value for `payload.source.ts_ms` with the value for `payload.ts_ms`, you can determine the lag between the source database update and Debezium. | + +### *update* events + +The value of a change event for an update in the sample `customers` table has the same schema as a create event for that table. Likewise, the event value's payload has the same structure. However, the event value payload contains different values in an update event. The following is an example of a change event value in an event that the connector generates for an update in the `customers` table: + + + +```sql +{ + "schema": { ... }, + "payload": { + "before": null, --> 1 + "after": { --> 2 + "id": 1, + "first_name": "Anne Marie", + "last_name": "Kretchmar", + "email": "annek@noanswer.org" + }, + "source": { --> 3 + "version": "2.5.2.Final", + "connector": "YugabyteDB", + "name": "YugabyteDB_server", + "ts_ms": 1559033904863, + "snapshot": false, + "db": "postgres", + "schema": "public", + "table": "customers", + "txId": 556, + "lsn": 24023128, + "xmin": null + }, + "op": "u", --> 4 + "ts_ms": 1465584025523 --> 5 + } +} +``` + +The following table describes the update event value fields. + +| Item | Field name | Description | +| :---- | :------ | :------------ | +| 1 | before | An optional field that contains values that were in the row before the database commit. In this example, no previous value for any of the columns, is present because the table's [REPLICA IDENTITY](#replica-identity) setting is, `DEFAULT`. For an update event to contain the previous values of all columns in the row, you would have to change the `customers` table by running `ALTER TABLE customers REPLICA IDENTITY FULL`. | +| 2 | after | An optional field that specifies the state of the row after the event occurred. In this example, the `first_name` value is now `Anne Marie`. | +| 3 | source | Mandatory field that describes the source metadata for the event. The `source` field structure has the same fields as in a create event, but some values are different. The source metadata includes:
  • Debezium version
  • Connector type and name
  • Database and table that contains the new row
  • Schema name
  • If the event was part of a snapshot (always `false` for _update_ events)
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 4 | op | Mandatory string that describes the type of operation. In an update event value, the `op` field value is `u`, signifying that this row changed because of an update. | +| 5 | ts_ms | Optional field that displays the time at which the connector processed the event. The time is based on the system clock in the JVM running the Kafka Connect task.

In the `source` object, `ts_ms` indicates the time that the change was made in the database. By comparing the value for `payload.source.ts_ms` with the value for `payload.ts_ms`, you can determine the lag between the source database update and Debezium. | + +{{< note title="Note" >}} + +Updating the columns for a row's primary/unique key changes the value of the row's key. When a key changes, Debezium outputs three events: a `DELETE` event and a [tombstone event](#tombstone-events) with the old key for the row, followed by an event with the new key for the row. Details are in the next section. + +{{< /note >}} + +### Primary key updates + +An `UPDATE` operation that changes a row's primary key field(s) is known as a primary key change. For a primary key change, in place of sending an `UPDATE` event record, the connector sends a `DELETE` event record for the old key and a `CREATE` event record for the new (updated) key. + +### _delete_ events + +The value in a _delete_ change event has the same `schema` portion as create and update events for the same table. The `payload` portion in a delete event for the sample `customers` table looks like this: + +```output.json +{ + "schema": { ... }, + "payload": { + "before": { --> 1 + "id": 1 + }, + "after": null, --> 2 + "source": { --> 3 + "version": "2.5.4.Final", + "connector": "YugabyteDB", + "name": "YugabyteDB_server", + "ts_ms": 1559033904863, + "snapshot": false, + "db": "postgres", + "schema": "public", + "table": "customers", + "txId": 556, + "lsn": 46523128, + "xmin": null + }, + "op": "d", --> 4 + "ts_ms": 1465581902461 --> 5 + } +} +``` + +The following table describes the delete event value fields. + +| Item | Field name | Description | +| :---- | :------ | :------------ | +| 1 | before | Optional field that specifies the state of the row before the event occurred. In a _delete_ event value, the `before` field contains the values that were in the row before it was deleted with the database commit.

In this example, the before field contains only the primary key column because the table's [REPLICA IDENTITY](#replica-identity) setting is `DEFAULT`. | +| 2 | after | Optional field that specifies the state of the row after the event occurred. In a delete event value, the `after` field is `null`, signifying that the row no longer exists. | +| 3 | source | Mandatory field that describes the source metadata for the event. In a delete event value, the source field structure is the same as for create and update events for the same table. Many source field values are also the same. In a delete event value, the `ts_ms` and `lsn` field values, as well as other values, might have changed. But the source field in a delete event value provides the same metadata:
  • Debezium version
  • Connector type and name
  • Database and table that contained the deleted row
  • Schema name
  • If the event was part of a snapshot (always false for delete events)
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 4 | op | Mandatory string that describes the type of operation. The `op` field value is `d`, signifying that this row was deleted. | +| 5 | ts_ms | Optional field that displays the time at which the connector processed the event. The time is based on the system clock in the JVM running the Kafka Connect task.

In the `source` object, `ts_ms` indicates the time that the change was made in the database. By comparing the value for `payload.source.ts_ms` with the value for `payload.ts_ms`, you can determine the lag between the source database update and Debezium. | + +A _delete_ change event record provides a consumer with the information it needs to process the removal of this row. + +YugabyteDB connector events are designed to work with [Kafka log compaction](https://kafka.apache.org/documentation#compaction). Log compaction enables removal of some older messages as long as at least the most recent message for every key is kept. This lets Kafka reclaim storage space while ensuring that the topic contains a complete data set and can be used for reloading key-based state. + +#### Tombstone events + +When a row is deleted, the _delete_ event value still works with log compaction, because Kafka can remove all earlier messages that have that same key. However, for Kafka to remove all messages that have that same key, the message value must be `null`. To make this possible, the YugabyteDB connector follows a _delete_ event with a special tombstone event that has the same key but a `null` value. + +If the downstream consumer from the topic relies on tombstone events to process deletions and uses the [YBExtractNewRecordState transformer](../transformers/#ybextractnewrecordstate) (SMT), it is recommended to set the `delete.tombstone.handling.mode` SMT configuration property to `tombstone`. This ensures that the connector converts the delete records to tombstone events and drops the tombstone events. + +To set the property, follow the SMT configuration conventions. For example: + +```json +"transforms": "flatten", +"transforms.flatten.type": "io.debezium.connector.postgresql.transforms.yugabytedb.YBExtractNewRecordState", +"transforms.flatten.delete.tombstone.handling.mode": "tombstone" +``` + +### Updating or deleting a row inserted in the same transaction + +If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the replica identity is not CHANGE, then CDC will throw an error while processing such events. + +To handle such updates/deletes with a non-CHANGE replica identity, set the YB-TServer flag [cdc_send_null_before_image_if_not_exists](../../../../reference/configuration/yb-tserver/#cdc-send-null-before-image-if-not-exists) to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. + + + +## Data type mappings + +The YugabyteDB connector represents changes to rows with events that are structured like the table in which the row exists. The event contains a field for each column value. How that value is represented in the event depends on the YugabyteDB data type of the column. The following sections describe how the connector maps YugabyteDB data types to a literal type and a semantic type in event fields. + +* `literal` type describes how the value is literally represented using Kafka Connect schema types: `INT8`, `INT16`, `INT32`, `INT64`, `FLOAT32`, `FLOAT64`, `BOOLEAN`, `STRING`, `BYTES`, `ARRAY`, `MAP`, and `STRUCT`. +* `semantic` type describes how the Kafka Connect schema captures the meaning of the field using the name of the Kafka Connect schema for the field. + +If the default data type conversions do not meet your needs, you can [create a custom converter](https://debezium.io/documentation/reference/2.5/development/converters.html#custom-converters) for the connector. + +### Basic types + +| YugabyteDB data type| Literal type (schema type) | Semantic type (schema name) and Notes | +| :------------------ | :------------------------- | :-------------------------- | +| `BOOLEAN` | `BOOLEAN` | N/A | +| `BIT(1)` | `BOOLEAN` | N/A | +| `BIT( > 1)` | `BYTES` | `io.debezium.data.Bits`
The `length` schema parameter contains an integer that represents the number of bits. The resulting `byte[]` contains the bits in little-endian form and is sized to contain the specified number of bits. For example, `numBytes = n/8 + (n % 8 == 0 ? 0 : 1)` where `n` is the number of bits. | +| `BIT VARYING[(M)]` | `BYTES` | `io.debezium.data.Bits`
The `length` schema parameter contains an integer that represents the number of bits (2^31 - 1 in case no length is given for the column). The resulting `byte[]` contains the bits in little-endian form and is sized based on the content. The specified size (`M`) is stored in the length parameter of the `io.debezium.data.Bits` type. | +| `SMALLINT`, `SMALLSERIAL` | `INT16` | N/A | +| `INTEGER`, `SERIAL` | `INT32` | N/A | +| `BIGINT`, `BIGSERIAL`, `OID` | `INT64` | N/A | +| `REAL` | `FLOAT32` | N/A | +| `DOUBLE PRECISION` | `FLOAT64` | N/A | +| `CHAR [(M)]` | `STRING` | N/A | +| `VARCHAR [(M)]` | `STRING` | N/A | +| `CHARACTER [(M)]` | `STRING` | N/A | +| `CHARACTER VARYING [(M)]` | `STRING` | N/A | +| `TIMESTAMPTZ`, `TIMESTAMP WITH TIME ZONE` | `STRING` | `io.debezium.time.ZonedTimestamp`
A string representation of a timestamp with timezone information, where the timezone is GMT. | +| `TIMETZ`, `TIME WITH TIME ZONE` | `STRING` | `io.debezium.time.ZonedTime`
A string representation of a time value with timezone information, where the timezone is GMT. | +| `INTERVAL [P]` | `INT64` | `io.debezium.time.MicroDuration` (default)
The approximate number of microseconds for a time interval using the `365.25 / 12.0` formula for days per month average. | +| `INTERVAL [P]` | `STRING` | `io.debezium.time.Interval`
(when `interval.handling.mode` is `string`)
The string representation of the interval value that follows the pattern
P\Y\M\DT\H\M\S.
For example, `P1Y2M3DT4H5M6.78S`. | +| `BYTEA` | `BYTES` or `STRING` | n/a

Either the raw bytes (the default), a base64-encoded string, or a base64-url-safe-encoded String, or a hex-encoded string, based on the connector's `binary handling mode` setting.

Debezium only supports Yugabyte `bytea_output` configuration of value `hex`. For more information about PostgreSQL binary data types, see the [Binary data types](../../../../api/ysql/datatypes/type_binary/). | +| `JSON`, `JSONB` | `STRING` | `io.debezium.data.Json`
Contains the string representation of a JSON document, array, or scalar. | +| `UUID` | `STRING` | `io.debezium.data.Uuid`
Contains the string representation of a YugabyteDB UUID value. | +| `INT4RANGE` | `STRING` | Range of integer. | +| `INT8RANGE` | `STRING` | Range of `bigint`. | +| `NUMRANGE` | `STRING` | Range of `numeric`. | +| `TSRANGE` | `STRING` | n/a

The string representation of a timestamp range without a time zone. | +| `TSTZRANGE` | `STRING` | n/a

The string representation of a timestamp range with the local system time zone. | +| `DATERANGE` | `STRING` | n/a

The string representation of a date range. Always has an _exclusive_ upper bound. | +| `ENUM` | `STRING` | `io.debezium.data.Enum`

Contains the string representation of the YugabyteDB `ENUM` value. The set of allowed values is maintained in the allowed schema parameter. | + +### Temporal types + +Other than YugabyteDB's `TIMESTAMPTZ` and `TIMETZ` data types, which contain time zone information, how temporal types are mapped depends on the value of the `time.precision.mode` connector configuration property. The following sections describe these mappings: + +* `time.precision.mode=adaptive` +* `time.precision.mode=adaptive_time_microseconds` +* `time.precision.mode=connect` + +#### time.precision.mode=adaptive + +When the `time.precision.mode` property is set to `adaptive`, the default, the connector determines the literal type and semantic type based on the column's data type definition. This ensures that events _exactly_ represent the values in the database. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `DATE` | `INT32` | `io.debezium.time.Date`
Represents the number of days since the epoch. | +| `TIME(1)`, `TIME(2)`, `TIME(3)` | `INT32` | `io.debezium.time.Time`
Represents the number of milliseconds past midnight, and does not include timezone information. | +| `TIME(4)`, `TIME(5)`, `TIME(6)` | `INT64` | `io.debezium.time.MicroTime`
Represents the number of microseconds past midnight, and does not include timezone information. | +| `TIMESTAMP(1)`, `TIMESTAMP(2)`, `TIMESTAMP(3)` | `INT64` | `io.debezium.time.Timestamp`
Represents the number of milliseconds since the epoch, and does not include timezone information. | +| `TIMESTAMP(4)`, `TIMESTAMP(5)`, `TIMESTAMP(6)`, `TIMESTAMP` | `INT64` | `io.debezium.time.MicroTimestamp`
Represents the number of microseconds since the epoch, and does not include timezone information. | + +#### time.precision.mode=adaptive_time_microseconds + +When the `time.precision.mode` configuration property is set to `adaptive_time_microseconds`, the connector determines the literal type and semantic type for temporal types based on the column's data type definition. This ensures that events _exactly_ represent the values in the database, except all `TIME` fields are captured as microseconds. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `DATE` | `INT32` | `io.debezium.time.Date`
Represents the number of days since the epoch. | +| `TIME([P])` | `INT64` | `io.debezium.time.MicroTime`
Represents the time value in microseconds and does not include timezone information. YugabyteDB allows precision `P` to be in the range 0-6 to store up to microsecond precision. | +| `TIMESTAMP(1)` , `TIMESTAMP(2)`, `TIMESTAMP(3)` | `INT64` | `io.debezium.time.Timestamp`
Represents the number of milliseconds past the epoch, and does not include timezone information. | +| `TIMESTAMP(4)`, `TIMESTAMP(5)`, `TIMESTAMP(6)`, `TIMESTAMP` | `INT64` | `io.debezium.time.MicroTimestamp`
Represents the number of microseconds past the epoch, and does not include timezone information. | + +#### time.precision.mode=connect + +When the `time.precision.mode` configuration property is set to `connect`, the connector uses Kafka Connect logical types. This may be useful when consumers can handle only the built-in Kafka Connect logical types and are unable to handle variable-precision time values. However, because YugabyteDB supports microsecond precision, the events generated by a connector with the connect time precision mode results in a loss of precision when the database column has a fractional second precision value that is greater than 3. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `DATE` | `INT32` | `org.apache.kafka.connect.data.Date`
Represents the number of days since the epoch. | +| `TIME([P])` | `INT64` | `org.apache.kafka.connect.data.Time`
Represents the number of milliseconds since midnight, and does not include timezone information. YugabyteDB allows `P` to be in the range 0-6 to store up to microsecond precision, though this mode results in a loss of precision when `P` is greater than 3. | +| `TIMESTAMP([P])` | `INT64` | `org.apache.kafka.connect.data.Timestamp`
Represents the number of milliseconds since the epoch, and does not include timezone information. YugabyteDB allows `P` to be in the range 0-6 to store up to microsecond precision, though this mode results in a loss of precision when `P` is greater than 3. | + +### TIMESTAMP type + +The `TIMESTAMP` type represents a timestamp without time zone information. Such columns are converted into an equivalent Kafka Connect value based on UTC. For example, the `TIMESTAMP` value "2018-06-20 15:13:16.945104" is represented by an `io.debezium.time.MicroTimestamp` with the value "1529507596945104" when `time.precision.mode` is not set to `connect`. + +The timezone of the JVM running Kafka Connect and Debezium does not affect this conversion. + +YugabyteDB supports using +/-infinite values in `TIMESTAMP` columns. These special values are converted to timestamps with value `9223372036825200000` in case of positive infinity or `-9223372036832400000` in case of negative infinity. This behavior mimics the standard behavior of the YugabyteDB JDBC driver. For reference, see the [`org.postgresql.PGStatement`](https://jdbc.postgresql.org/documentation/publicapi/org/postgresql/PGStatement.html) interface. + +### Decimal types + +The setting of the YugabyteDB connector configuration property `decimal.handling.mode` determines how the connector maps decimal types. + +#### decimal.handling.mode=double + +When the `decimal.handling.mode` property is set to `double`, the connector represents all `DECIMAL`, `NUMERIC` and `MONEY` values as Java double values and encodes them as shown in the following table. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `NUMERIC[(M[,D])]` | `FLOAT64` | | +| `DECIMAL[(M[,D])]` | `FLOAT64` | | +| `MONEY[(M[,D])]` | `FLOAT64` | | + +#### decimal.handling.mode=string + +The last possible setting for the `decimal.handling.mode` configuration property is `string`. In this case, the connector represents `DECIMAL`, `NUMERIC` and `MONEY` values as their formatted string representation, and encodes them as shown in the following table. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `NUMERIC[(M[,D])]` | `STRING` | | +| `DECIMAL[(M[,D])]` | `STRING` | | +| `MONEY[(M[,D])]` | `STRING` | | + +{{< note title="Note" >}} + +Decimal handling mode `precise` is not yet supported by `YugabyteDBConnector`. + +{{< /note >}} + +### HSTORE types + +The setting of the YugabyteDB connector configuration property `hstore.handling.mode` determines how the connector maps `HSTORE` values. + +When the `hstore.handling.mode` property is set to json (the default), the connector represents `HSTORE` values as string representations of `JSON` values and encodes them as shown in the following table. When the `hstore.handling.mode` property is set to map, the connector uses the `MAP` schema type for `HSTORE` values. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `HSTORE` | `STRING` | `io.debezium.data.Json`

Example: output representation using the JSON converter is `{"key" : "val"}` | +| `HSTORE` | `MAP` | n/a

Example: output representation using the `JSON` converter is `{"key" : "val"}` | + +### Domain types + +YugabyteDB supports user-defined types that are based on other underlying types. When such column types are used, Debezium exposes the column's representation based on the full type hierarchy. + +{{< note title="Note" >}} + +Capturing changes in columns that use YugabyteDB domain types requires special consideration. When a column is defined to contain a domain type that extends one of the default database types and the domain type defines a custom length or scale, the generated schema inherits that defined length or scale. + +When a column is defined to contain a domain type that extends another domain type that defines a custom length or scale, the generated schema does not inherit the defined length or scale because that information is not available in the YugabyteDB driver's column metadata. + +{{< /note >}} + +### Network address types + +YugabyteDB has data types that can store IPv4, IPv6, and MAC addresses. It is better to use these types instead of plain text types to store network addresses. Network address types offer input error checking and specialized operators and functions. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) and Notes | +| :----- | :----- | :----- | +| `INET` | `STRING` | n/a

IPv4 and IPv6 networks | +| `CIDR` | `STRING` | n/a

IPv4 and IPv6 hosts and networks | +| `MACADDR` | `STRING` | n/a

MAC addresses | +| `MACADDR8` | `STRING` | n/a

MAC addresses in EUI-64 format | + + + + +## Setting up YugabyteDB + +### Setting up permissions + +Setting up a YugabyteDB server to run the connector requires a database user that can perform replications. Replication can be performed only by a database user that has appropriate permissions and only for a configured number of hosts. + +Although, by default, superusers have the necessary `REPLICATION` and `LOGIN` roles, as mentioned in [Security](#security), it is best not to provide the replication user with elevated privileges. Instead, create a Debezium user that has the minimum required privileges. + +**Prerequisites:** + +* YugabyteDB administrative permissions. + +**Procedure:** + +To provide a user with replication permissions, define a YugabyteDB role that has at least the `REPLICATION` and `LOGIN` permissions, and then grant that role to the user. For example: + +```sql +CREATE ROLE REPLICATION LOGIN; +``` + +### Setting privileges to enable the connector to create YugabyteDB publications when you use `pgoutput` or `yboutput` + +If you use `pgoutput` or `yboutput` as the logical decoding plugin, the connector must operate in the database as a user with specific privileges. + +The connector streams change events for YugabyteDB source tables from publications that are created for the tables. Publications contain a filtered set of change events that are generated from one or more tables. The data in each publication is filtered based on the publication specification. The specification can be created by the `YugabyteDB` database administrator or by the connector. To permit the connector to create publications and specify the data to replicate to them, the connector must operate with specific privileges in the database. + +There are several options for determining how publications are created. In general, it is best to manually create publications for the tables that you want to capture, before you set up the connector. However, you can configure your environment in a way that permits the connector to create publications automatically, and to specify the data that is added to them. + +Debezium uses include list and exclude list properties to specify how data is inserted in the publication. For more information about the options for enabling the connector to create publications, see `publication.autocreate.mode`. + +For the connector to create a YugabyteDB publication, it must run as a user that has the following privileges: + +* Replication privileges in the database to add the table to a publication. +* `CREATE` privileges on the database to add publications. +* `SELECT` privileges on the tables to copy the initial table data. Table owners automatically have `SELECT` permission for the table. + +To add tables to a publication, the user must be an owner of the table. But because the source table already exists, you need a mechanism to share ownership with the original owner. To enable shared ownership, create a YugabyteDB replication group, then add the existing table owner and the replication user to the group. + +Procedure + +1. Create a replication group. + + ```sql + CREATE ROLE ; + ``` + +2. Add the original owner of the table to the group. + + ```sql + GRANT REPLICATION_GROUP TO ; + ``` + +3. Add the Debezium replication user to the group. + + ```sql + GRANT REPLICATION_GROUP TO ; + ``` + +4. Transfer ownership of the table to ``. + + ```sql + ALTER TABLE OWNER TO REPLICATION_GROUP; + ``` + +For Debezium to specify the capture configuration, the value of `publication.autocreate.mode` must be set to `filtered`. + +### Configuring YugabyteDB to allow replication with the connector host + +To enable Debezium to replicate YugabyteDB data, you must configure the database to permit replication with the host that runs the YugabyteDB connector. To specify the clients that are permitted to replicate with the database, add entries to the YugabyteDB host-based authentication file, `ysql_hba.conf`. For more information about the pg_hba.conf file, see the [YugabyteDB documentation](../../../../secure/authentication/host-based-authentication/#ysql-hba-conf-file). + +Procedure + +* Add entries to the `ysql_hba.conf` file to specify the connector hosts that can replicate with the database host. For example, + +```sh +--ysql_hba_conf_csv="local replication trust, local replication 127.0.0.1/32 trust, host replication ::1/128 trust" +``` + +### Supported YugabyteDB topologies + +As mentioned in the beginning, YugabyteDB (for all versions > 2024.1.1) supports logical replication slots. The YugabyteDB connector can communicate with the server by connecting to any node using the [YugabyteDB Java driver](../../../../drivers-orms/java/yugabyte-jdbc-reference/). Should any node fail, the connector receives an error and restarts. Upon restart, the connector connects to any available node and continues streaming from that node. + +### Setting up multiple connectors for same database server + +Debezium uses [replication slots](https://www.postgresql.org/docs/15/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS) to stream changes from a database. These replication slots maintain the current position in form of a LSN. This helps YugabyteDB keep the WAL available until it is processed by Debezium. A single replication slot can exist only for a single consumer or process - as different consumer might have different state and may need data from different position. + +Because a replication slot can only be used by a single connector, it is essential to create a unique replication slot for each connector. Although when a connector is not active, YugabyteDB may allow other connectors to consume the replication slot - which could be dangerous as it may lead to data loss as a slot will emit each change just once. + +In addition to replication slot, Debezium uses publication to stream events when using the `pgoutput`or `yboutput` plugin. Similar to replication slot, publication is at database level and is defined for a set of tables. Thus, you'll need a unique publication for each connector, unless the connectors work on same set of tables. For more information about the options for enabling Debezium to create publications, see `publication.autocreate.mode`. + +See `slot.name` and `publication.name` on how to set a unique replication slot name and publication name for each connector. + +## Deployment + +To deploy the connector, you install the connector archive, configure the connector, and start the connector by adding its configuration to Kafka Connect. + +**Prerequisites** + +* [Zookeeper](https://zookeeper.apache.org/), [Kafka](http://kafka.apache.org/), and [Kafka Connect](https://kafka.apache.org/documentation.html#connect) are installed. +* YugabyteDB is installed and is [set up to run the connector](#setting-up-yugabytedb). + +**Procedure** + +1. Download the latest [YugabyteDB connector plugin archive](https://github.com/yugabyte/debezium/releases/). +2. Extract the files into your Kafka Connect environment. +3. Add the directory with the JAR files to the [Kafka Connect `plugin.path`](https://kafka.apache.org/documentation/#connectconfigs). +4. Restart your Kafka Connect process to pick up the new JAR files. + +### Creating Kafka topics + +If [auto creation of topics](https://debezium.io/documentation/reference/2.5/configuration/topic-auto-create-config.html) is not enabled in the Kafka Connect cluster then you will need to create the following topics manually: + +* Topic for each table in the format `..` +* Heartbeat topic in the format `.`. The [topic.heartbeat.prefix](../yugabytedb-connector-properties/#topic-heartbeat-prefix) has a default value of `__debezium-heartbeat`. + +### Connector configuration example + +Following is an example of the configuration for a YugabyteDB connector that connects to a YugabyteDB server on port `5433` at `192.168.99.100`, whose topic prefix is `fulfillment`. Typically, you configure the YugabyteDB connector in a JSON file by setting the configuration properties available for the connector. + +You can choose to produce events for a subset of the schemas and tables in a database. Optionally, you can ignore, mask, or truncate columns that contain sensitive data, are larger than a specified size, or that you do not need. + +```output.json +{ + "name": "fulfillment-connector", --> 1 + "config": { + "connector.class": "io.debezium.connector.postgresql.YugabyteDBConnector", --> 2 + "database.hostname": "192.168.99.100:5433,192.168.1.10:5433,192.168.1.68:5433", --> 3 + "database.port": "5432", --> 4 + "database.user": "postgres", --> 5 + "database.password": "postgres", --> 6 + "database.dbname" : "postgres", --> 7 + "topic.prefix": "fulfillment", --> 8 + "table.include.list": "public.inventory" --> 9 + } +} +``` + +1. The name of the connector when registered with a Kafka Connect service. +2. The name of this YugabyteDB connector class. +3. The addresses of the YugabyteDB YB-TServer nodes. This can take a value of multiple addresses in the format `IP1:PORT1,IP2:PORT2,IP3:PORT3`. +4. The port number of the YugabyteDB server. +5. The name of the YugabyteDB user that has the [required privileges](#setting-up-yugabytedb). +6. The password for the YugabyteDB user that has the [required privileges](#setting-up-yugabytedb). +7. The name of the YugabyteDB database to connect to +8. The topic prefix for the YugabyteDB server/cluster, which forms a namespace and is used in all the names of the Kafka topics to which the connector writes, the Kafka Connect schema names, and the namespaces of the corresponding Avro schema when the Avro converter is used. +9. A list of all tables hosted by this server that this connector will monitor. This is optional, and there are other properties for listing the schemas and tables to include or exclude from monitoring. + +See the [complete list of YugabyteDB connector properties](../yugabytedb-connector-properties/) that can be specified in these configurations. + +You can send this configuration with a `POST` command to a running Kafka Connect service. The service records the configuration and starts one connector task that performs the following actions: + +* Connects to the YugabyteDB database. +* Reads the transaction log. +* Streams change event records to Kafka topics. + +### Adding connector configuration + +To run the connector, create a connector configuration and add the configuration to your Kafka Connect cluster. + +**Prerequisites** + +* [YugabyteDB is configured to support logical replication.](#setting-up-yugabytedb) +* The YugabyteDB connector is installed. + +**Procedure** + +1. Create a configuration for the YugabyteDB connector. +2. Use the [Kafka Connect REST API](https://kafka.apache.org/documentation/#connect_rest) to add that connector configuration to your Kafka Connect cluster. + +#### Results + +After the connector starts, it performs a consistent snapshot of the YugabyteDB server databases that the connector is configured for. The connector then starts generating data change events for row-level operations and streaming change event records to Kafka topics. + +## Monitoring + +The YugabyteDB connector provides two metrics in addition to the built-in support for JMX metrics that Zookeeper, Kafka, and Kafka Connect provide: + +* [Snapshot metrics](#snapshot-metrics) provide information about connector operation while performing a snapshot. +* [Streaming metrics](#streaming-metrics) provide information about connector operation when the connector is capturing changes and streaming change event records. + +[Debezium monitoring documentation](https://debezium.io/documentation/reference/2.5/operations/monitoring.html#monitoring-debezium) provides details for how to expose these metrics by using JMX. + +### Snapshot metrics + +The **MBean** is `debezium.postgres:type=connector-metrics,context=snapshot,server=`. + +Snapshot metrics are not exposed unless a snapshot operation is active, or if a snapshot has occurred since the last connector start. + +The following table lists the snapshot metrics that are available. + +| Attributes | Type | Description | +| :--------- | :--- | :---------- | +| `LastEvent` | string | The last snapshot event that the connector has read. | +| `MilliSecondsSinceLastEvent` | long | The number of milliseconds since the connector has read and processed the most recent event. | +| `TotalNumberOfEventsSeen` | long | The total number of events that this connector has seen since last started or reset. | +| `NumberOfEventsFiltered` | long | The number of events that have been filtered by include/exclude list filtering rules configured on the connector. | +| `CapturedTables` | string[] | The list of tables that are captured by the connector. | +| `QueueTotalCapacity` | int | The length the queue used to pass events between the snapshotter and the main Kafka Connect loop. | +| `QueueRemainingCapacity` | int | The free capacity of the queue used to pass events between the snapshotter and the main Kafka Connect loop. | +| `TotalTableCount` | int | The total number of tables that are being included in the snapshot. | +| `RemainingTableCount` | int | The number of tables that the snapshot has yet to copy. | +| `SnapshotRunning` | boolean | Whether the snapshot was started. | +| `SnapshotPaused` | boolean | Whether the snapshot was paused. | +| `SnapshotAborted` | boolean | Whether the snapshot was aborted. | +| `SnapshotCompleted` | boolean | Whether the snapshot completed. | +| `SnapshotDurationInSeconds` | long | The total number of seconds that the snapshot has taken so far, even if not complete. Includes also time when snapshot was paused. | +| `SnapshotPausedDurationInSeconds` | long | The total number of seconds that the snapshot was paused. If the snapshot was paused several times, the paused time adds up. | +| `RowsScanned` | Map | Map containing the number of rows scanned for each table in the snapshot. Tables are incrementally added to the Map during processing. Updates every 10,000 rows scanned and upon completing a table. | +| `MaxQueueSizeInBytes` | long | The maximum buffer of the queue in bytes. This metric is available if `max.queue.size.in.bytes` is set to a positive long value. | +| `CurrentQueueSizeInBytes` | long | The current volume, in bytes, of records in the queue. | + +### Streaming metrics + +The **MBean** is `debezium.postgres:type=connector-metrics,context=streaming,server=`. + +The following table lists the streaming metrics that are available. + +| Attributes | Type | Description | +| :--------- | :--- | :---------- | +| `LastEvent` | string | The last streaming event that the connector has read. | +| `MilliSecondsSinceLastEvent` | long | The number of milliseconds since the connector has read and processed the most recent event. | +| `TotalNumberOfEventsSeen` | long | The total number of events that this connector has seen since the last start or metrics reset. | +| `TotalNumberOfCreateEventsSeen` | long | The total number of create events that this connector has seen since the last start or metrics reset. | +| `TotalNumberOfUpdateEventsSeen` | long | The total number of update events that this connector has seen since the last start or metrics reset. | +| `TotalNumberOfDeleteEventsSeen` | long | The total number of delete events that this connector has seen since the last start or metrics reset. | +| `NumberOfEventsFiltered` | long | The number of events that have been filtered by include/exclude list filtering rules configured on the connector. | +| `CapturedTables` | string[] | The list of tables that are captured by the connector. | +| `QueueTotalCapacity` | int | The length the queue used to pass events between the streamer and the main Kafka Connect loop. | +| `QueueRemainingCapacity` | int | The free capacity of the queue used to pass events between the streamer and the main Kafka Connect loop. | +| `Connected` | boolean | Flag that denotes whether the connector is currently connected to the database server. | +| `MilliSecondsBehindSource` | long | The number of milliseconds between the last change event's timestamp and the connector processing it. The values will incorporate any differences between the clocks on the machines where the database server and the connector are running. | +| `NumberOfCommittedTransactions` | long | The number of processed transactions that were committed. | +| `SourceEventPosition` | Map | The coordinates of the last received event. | +| `LastTransactionId` | string | Transaction identifier of the last processed transaction. | +| `MaxQueueSizeInBytes` | long | The maximum buffer of the queue in bytes. This metric is available if `max.queue.size.in.bytes` is set to a positive long value. | +| `CurrentQueueSizeInBytes` | long | The current volume, in bytes, of records in the queue. | + +## Advanced + +### Parallel streaming + +{{}}YugabyteDB also supports parallel streaming of a single table using logical replication. This means that you can start the replication for the table using parallel tasks, where each task polls on specific tablets. + +{{< note title="Important" >}} + +Parallel streaming is {{}}. To enable the feature, set the `ysql_enable_pg_export_snapshot` and `ysql_yb_enable_consistent_replication_from_hash_range` flags to true. + +{{< /note >}} + +Use the following steps to configure parallel streaming using the YugabyteDB Connector. + +#### Step 1: Decide on the number of tasks + +This is important, as you need to create the same number of replication slots and publications. Note that the number of tasks cannot be greater than the number of tablets you have in the table to be streamed. + +For example, if you have a table `test` with 3 tablets, you will create 3 tasks. + +#### Step 2: Create publication and replication slots + +If you are creating a slot and publication yourself, ensure that a publication is created before you create the replication slot. + +If you do not want to create the publication and slots, decide on names so that the connector can create the publication and slots. + +```sql +CREATE PUBLICATION pb FOR TABLE test; +CREATE PUBLICATION pb2 FOR TABLE test; +CREATE PUBLICATION pb3 FOR TABLE test; + +CREATE_REPLICATION_SLOT rs LOGICAL yboutput; +CREATE_REPLICATION_SLOT rs2 LOGICAL yboutput; +CREATE_REPLICATION_SLOT rs3 LOGICAL yboutput; +``` + +#### Step 3: Get hash ranges + +Execute the following query in YSQL for a `table_name` and number of tasks to get the ranges. Replace `num_ranges` and `table_name` as appropriate. + +```sql +WITH params AS ( + SELECT + num_ranges::int AS num_ranges, + 'table_name'::text AS table_name +), +yb_local_tablets_cte AS ( + SELECT *, + COALESCE(('x' || encode(partition_key_start, 'hex'))::BIT(16)::INT, 0) AS partition_key_start_int, + COALESCE(('x' || encode(partition_key_end, 'hex'))::BIT(16)::INT, 65536) AS partition_key_end_int + FROM yb_local_tablets + WHERE table_name = (SELECT table_name FROM params) +), + +grouped AS ( + SELECT + yt.*, + NTILE((SELECT num_ranges FROM params)) OVER (ORDER BY partition_key_start_int) AS bucket_num + FROM yb_local_tablets_cte yt +), + +buckets AS ( + SELECT + bucket_num, + MIN(partition_key_start_int) AS bucket_start, + MAX(partition_key_end_int) AS bucket_end + FROM grouped + GROUP BY bucket_num +), +distinct_ranges AS ( + SELECT DISTINCT + b.bucket_start, + b.bucket_start || ',' || b.bucket_end AS partition_range + FROM grouped g + JOIN buckets b ON g.bucket_num = b.bucket_num +) +SELECT STRING_AGG(partition_range, ';' ORDER BY bucket_start) AS concatenated_ranges +FROM distinct_ranges; +``` + +The output is in a format that can be added as ranges in the connector configuration: + +```output + concatenated_ranges +--------------------------------- + 0,21845;21845,43690;43690,65536 +``` + +Copy the output as you will need it later on. + +#### Step 4: Build connector configuration + +Using the output from the preceding step, add the following additional configuration properties to the connector and deploy it: + +```json +{ + ... + "streaming.mode":"parallel", + "slot.names":"rs,rs2,rs3", + "publication.names":"pb,pb2,pb3", + "slot.ranges":"0,21845;21845,43690;43690,65536" + ... +} +``` + +If you have to take the snapshot, you'll need to add 2 other configuration properties: + +```json +{ + ... + "snapshot.mode":"initial", + "primary.key.hash.columns":"id" + ... +} +``` + +For information on parallel streaming configuration properties, refer to [Advanced connector properties](../yugabytedb-connector-properties/#streaming-mode). + +{{< warning title="Warning" >}} + +The order of slot names, publication names, and slot ranges is important as the assignment of ranges to slots is sequential, and you want the same range assigned to the same slot across restarts. + +The configuration for the connector shouldn't change on restart. + +{{< /warning >}} + +{{< note title="Important" >}} + +Adding the configuration value for `primary.key.hash.columns` is important, as you will need the columns that form the hash part of the primary key. The connector relies on the column names to figure out the appropriate range each task should be polling. + +{{< /note >}} + +## Behavior when things go wrong + +Debezium is a distributed system that captures all changes in multiple upstream databases; it never misses or loses an event. When the system is operating normally or being managed carefully then Debezium provides _exactly once_ delivery of every change event record. If a fault does happen then the system does not lose any events. However, while it is recovering from the fault, it's possible that the connector might emit some duplicate change events. In these abnormal situations, Debezium, like Kafka, provides _at least once_ delivery of change events. + +The rest of this section describes how Debezium handles various kinds of faults and problems. + +### Configuration and startup errors + +In the following situations, the connector fails when trying to start, reports an error/exception in the log, and stops running: + +* The connector's configuration is invalid. +* The connector cannot successfully connect to YugabyteDB by using the specified connection parameters. +* The connector is restarting from a previously-recorded LSN and YugabyteDB no longer has that history available. + +In these cases, the error message has details about the problem and possibly a suggested workaround. After you correct the configuration or address the YugabyteDB problem, restart the connector. + +### YB-TServer becomes unavailable + +When the connector is running, the YB-TServer that it is connected to could become unavailable for any number of reasons. If this happens, the connector fails with an error and retries to connect to the YugabyteDB server. Because the connector uses the [YugabyteDB Java driver](../../../../drivers-orms/java/), the connection is handled internally and the connector restores the connection to another running node. + +The YugabyteDB connector externally stores the last processed offset in the form of a YugabyteDB LSN. After a connector restarts and connects to a server instance, the connector communicates with the server to continue streaming from that particular offset. This offset is available as long as the Debezium replication slot remains intact. + +{{< warning title="Warning" >}} + +Never drop a replication slot on the server or you will lose data. + +{{< /warning >}} + +### Cluster failures + +When the connector is running, it is possible that the YugabyteDB server becomes unavailable for any number of reasons. If that happens, the connector fails with and error and initiates retries but as the complete YugabyteDB server is unavailable, all the retries will fail. + +When the YugabyteDB server is back up, restart the connector to continue streaming where it left off. + +### Kafka Connect process stops gracefully + +Suppose that Kafka Connect is being run in distributed mode and a Kafka Connect process is stopped gracefully. Prior to shutting down that process, Kafka Connect migrates the process's connector tasks to another Kafka Connect process in that group. The new connector tasks start processing exactly where the prior tasks stopped. There is a short delay in processing while the connector tasks are stopped gracefully and restarted on the new processes. + +### Kafka Connect process crashes + +If the Kafka Connector process stops unexpectedly, any connector tasks it was running terminate without recording their most recently processed offsets. When Kafka Connect is being run in distributed mode, Kafka Connect restarts those connector tasks on other processes. However, YugabyteDB connectors resume from the last offset that was recorded by the earlier processes. This means that the new replacement tasks might generate some of the same change events that were processed just prior to the crash. The number of duplicate events depends on the offset flush period and the volume of data changes just before the crash. + +Because there is a chance that some events might be duplicated during a recovery from failure, consumers should always anticipate some duplicate events. Debezium changes are idempotent, so a sequence of events always results in the same state. + +In each change event record, Debezium connectors insert source-specific information about the origin of the event, including the YugabyteDB server's time of the event, the ID of the server transaction, and the position in the write-ahead log where the transaction changes were written. Consumers can keep track of this information, especially the LSN, to determine whether an event is a duplicate. + +### Kafka becomes unavailable + +As the connector generates change events, the Kafka Connect framework records those events in Kafka by using the Kafka producer API. Periodically, at a frequency that you specify in the Kafka Connect configuration, Kafka Connect records the latest offset that appears in those change events. If the Kafka brokers become unavailable, the Kafka Connect process that is running the connectors repeatedly tries to reconnect to the Kafka brokers. In other words, the connector tasks pause until a connection can be re-established, at which point the connectors resume exactly where they left off. + +### Connector is stopped for a duration + +If the connector is gracefully stopped, the database can continue to be used. Any changes are recorded in the YugabyteDB WAL. When the connector restarts, it resumes streaming changes where it left off. That is, it generates change event records for all database changes that were made while the connector was stopped. + +A properly configured Kafka cluster is able to handle massive throughput. Kafka Connect is written according to Kafka best practices, and given enough resources a Kafka Connect connector can also handle very large numbers of database change events. Because of this, after being stopped for a while, when a Debezium connector restarts, it is very likely to catch up with the database changes that were made while it was stopped. How quickly this happens depends on the capabilities and performance of Kafka and the volume of changes being made to the data in YugabyteDB. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md new file mode 100644 index 000000000000..f09ab349580f --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md @@ -0,0 +1,92 @@ +--- +title: CDC using YugabyteDB gRPC replication protocol +headerTitle: CDC using gRPC replication protocol +linkTitle: gRPC protocol +description: CDC using YugabyteDB gRPC replication protocol. +headcontent: Capture changes made to data in the database +tags: + feature: early-access +aliases: + - /preview/explore/change-data-capture/cdc-overview/ + - /preview/explore/change-data-capture/using-yugabytedb-grpc-replication/ +menu: + preview: + identifier: explore-change-data-capture-grpc-replication + parent: explore-change-data-capture + weight: 280 +type: indexpage +showRightNav: true +--- + +YugabyteDB CDC captures changes made to data in the database and streams those changes to external processes, applications, or other databases. CDC allows you to track and propagate changes in a YugabyteDB database to downstream consumers based on its Write-Ahead Log (WAL). YugabyteDB CDC uses Debezium to capture row-level changes resulting from INSERT, UPDATE, and DELETE operations in the upstream database, and publishes them as events to Kafka using Kafka Connect-compatible connectors. + +![What is CDC](/images/explore/cdc-overview-work.png) + + + +## Get started + +Get started with Yugabyte gRPC replication. + +For tutorials on streaming data to Kafka environments, including Amazon MSK, Azure Event Hubs, and Confluent Cloud, see [Kafka environments](/preview/tutorials/cdc-tutorials/). + +{{}} +[Get started](./cdc-get-started) using the connector. +{{}} + +## Monitoring + +You can monitor the activities and status of the deployed connectors using the http end points provided by YugabyteDB. + +{{}} +Learn how to [monitor](./cdc-monitor/) your CDC setup. +{{}} + +## YugabyteDB gRPC Connector + +To capture and stream your changes in YugabyteDB to an external system, you need a connector that can read the changes in YugabyteDB and stream it out. For this, you can use the YugabyteDB gRPC connector, which is based on the Debezium platform. The connector is deployed as a set of Kafka Connect-compatible connectors, so you first need to define a YugabyteDB connector configuration and then start the connector by adding it to Kafka Connect. + +{{}} +For reference documentation, see [YugabyteDB gRPC Connector](./debezium-connector-yugabytedb/). +{{}} + +## Known limitations + +* A single stream can only be used to stream data from one namespace only. +* There should be a primary key on the table you want to stream the changes from. +* CDC is not supported on tables that are also the target of xCluster replication (see issue {{}}). However, both CDC and xCluster can work simultaneously on the same source tables. + + When performing [switchover](../../../deploy/multi-dc/async-replication/async-transactional-switchover/) or [failover](../../../deploy/multi-dc/async-replication/async-transactional-failover/) on xCluster, if you are using CDC, remember to also reconfigure CDC to use the new primary universe. + +* Currently, CDC doesn't support schema evolution for changes that require table rewrites (for example, [ALTER TYPE](../../../api/ysql/the-sql-language/statements/ddl_alter_table/#alter-type-with-table-rewrite)), or DROP TABLE and TRUNCATE TABLE operations. +* YCQL tables aren't currently supported. Issue {{}}. +* [Composite types](../../../explore/ysql-language-features/data-types#composite-types) are currently not supported. Issue {{}}. + +* If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the replica identity is not CHANGE, then CDC will throw an error while processing such events. + + To handle updates/deletes with a non-CHANGE replica identity, set the YB-TServer flag `cdc_send_null_before_image_if_not_exists` to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. + +In addition, CDC support for the following features will be added in upcoming releases: + +* Support for point-in-time recovery (PITR) is tracked in issue {{}}. +* Support for transaction savepoints is tracked in issue {{}}. +* Support for enabling CDC on Read Replicas is tracked in issue {{}}. +* Support for schema evolution with before image is tracked in issue {{}}. + +## Learn more + +* [CDC architecture](../../../architecture/docdb-replication/change-data-capture/) +* [Examples of CDC usage and patterns](https://github.com/yugabyte/cdc-examples/tree/main) {{}} +* [Tutorials to deploy in different Kafka environments](/preview/tutorials/cdc-tutorials/) {{}} +* [Data Streaming Using YugabyteDB CDC, Kafka, and SnowflakeSinkConnector](https://www.yugabyte.com/blog/data-streaming-using-yugabytedb-cdc-kafka-and-snowflakesinkconnector/) {{}} +* [Unlock Azure Storage Options With YugabyteDB CDC](https://www.yugabyte.com/blog/unlocking-azure-storage-options-with-yugabytedb-cdc/) {{}} +* [Change Data Capture From YugabyteDB to Elasticsearch](https://www.yugabyte.com/blog/change-data-capture-cdc-yugabytedb-elasticsearch/) {{}} +* [Snowflake CDC: Publishing Data Using Amazon S3 and YugabyteDB](https://www.yugabyte.com/blog/snowflake-cdc-publish-data-using-amazon-s3-yugabytedb/) {{}} +* [Streaming Changes From YugabyteDB to Downstream Databases](https://www.yugabyte.com/blog/streaming-changes-yugabytedb-cdc-downstream-databases/) {{}} +* [Change Data Capture from YugabyteDB CDC to ClickHouse](https://www.yugabyte.com/blog/change-data-capture-cdc-yugabytedb-clickhouse/) {{}} +* [How to Run Debezium Server with Kafka as a Sink](https://www.yugabyte.com/blog/change-data-capture-cdc-run-debezium-server-kafka-sink/) {{}} +* [Change Data Capture Using a Spring Data Processing Pipeline](https://www.yugabyte.com/blog/change-data-capture-cdc-spring-data-processing-pipeline/) {{}} diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started.md b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started.md new file mode 100644 index 000000000000..98d6caa85c5e --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started.md @@ -0,0 +1,597 @@ +--- +title: Get started with CDC in YugabyteDB +headerTitle: Get started +linkTitle: Get started +description: Get started with Change Data Capture in YugabyteDB. +headcontent: Get set up for using CDC in YugabyteDB +aliases: + - /preview/explore/change-data-capture/cdc-get-started/ + - /preview/explore/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started/ +menu: + preview: + parent: explore-change-data-capture-grpc-replication + identifier: cdc-get-started + weight: 10 +type: docs +--- + +## Set up YugabyteDB for CDC + +To set up YugabyteDB for use with the YugabyteDB gRPC connector, do the following: + +- Create a database stream ID. + + Before you use the YugabyteDB connector to retrieve data change events from YugabyteDB, create a stream ID using the `create_change_data_stream` yb-admin CLI command. Refer to the [yb-admin](../../../../admin/yb-admin/#change-data-capture-cdc-commands) CDC command reference for more details. + + Note that CDC currently only supports YSQL tables. + +- Make sure the YB-Master and YB-TServer ports are open. + + The connector connects to the YB-Master and YB-TServer processes running on the YugabyteDB server. Make sure the ports on which these processes are running are open. The [default ports](../../../../reference/configuration/default-ports/) on which the processes run are `7100` and `9100` respectively. + +- Monitor available disk space. + + The change records for CDC are read from the WAL. YugabyteDB CDC maintains checkpoints internally for each DB stream ID and garbage collects the WAL entries if those have been streamed to the CDC clients. + + If CDC lags or is away for some time, the disk usage may grow and cause instability. To avoid this scenario, if a stream is inactive for a configured amount of time, the WAL is garbage collected. For more information, see [Important configuration settings](#important-configuration-settings). + +## Deploy the YugabyteDB gRPC Connector + +To stream data change events from YugabyteDB databases, follow these steps to deploy the YugabyteDB gRPC Connector: + +- Download the Connector. You can download the connector from the [GitHub releases](https://github.com/yugabyte/debezium-connector-yugabytedb/releases). +- Install the Connector: Extract and install the connector archive in your Kafka Connect environment. +- Configure the Connector: Modify the connector configuration to suit your specific requirements. +- Start the Connector: Add the connector's configuration to Kafka Connect and start the connector. + +For more details on connector configuration and deployment steps, refer to the [YugabyteDB gRPC Connector documentation](../debezium-connector-yugabytedb/). + +## Serialization + +{{< tabpane text=true >}} + + {{% tab header="Avro" lang="avro" %}} + +The YugabyteDB source connector also supports AVRO serialization with schema registry. To use AVRO serialization, add the following configuration to your connector: + +```json +{ + ... + "key.converter":"io.confluent.connect.avro.AvroConverter", + "key.converter.schema.registry.url":"http://host-url-for-schema-registry:8081", + "value.converter":"io.confluent.connect.avro.AvroConverter", + "value.converter.schema.registry.url":"http://host-url-for-schema-registry:8081" + ... +} +``` + + {{% /tab %}} + + {{% tab header="JSON" lang="json" %}} + +For JSON schema serialization, you can use the [Kafka JSON Serializer](https://mvnrepository.com/artifact/io.confluent/kafka-json-serializer) and equivalent de-serializer. After downloading and including the required `JAR` file in the Kafka-Connect environment, you can directly configure the CDC source and sink connectors to use this converter. + +For source connectors: + +```json +{ + ... + "value.serializer":"io.confluent.kafka.serializers.KafkaJsonSerializer", + ... +} +``` + +For sink connectors: + +```json +{ + ... + "value.deserializer":"io.confluent.kafka.serializers.KafkaJsonDeserializer", + ... +} +``` + + {{% /tab %}} + + {{% tab header="Protobuf" lang="protobuf" %}} + +To use the [protobuf](http://protobuf.dev) format for the serialization/de-serialization of the Kafka messages, you can use the [Protobuf Converter](https://www.confluent.io/hub/confluentinc/kafka-connect-protobuf-converter). After downloading and including the required `JAR` files in the Kafka-Connect environment, you can directly configure the CDC source and sink connectors to use this converter. + +```json +{ + ..., + config: { + ..., + "key.converter": "io.confluent.connect.protobuf.ProtobufConverter", + "value.converter": "io.confluent.connect.protobuf.ProtobufConverter" + } +} +``` + + {{% /tab %}} + +{{< /tabpane >}} + +## Before image + +Before image refers to the state of the row _before_ the change event occurred. The YugabyteDB connector sends the before image of the row when it will be configured using a stream ID enabled with before image. It is populated for UPDATE and DELETE events. For INSERT events, before image doesn't make sense as the change record itself is in the context of new row insertion. + +Yugabyte uses multi-version concurrency control (MVCC) mechanism, and compacts data at regular intervals. The compaction or the history retention is controlled by the [history retention interval flag](../../../../reference/configuration/yb-tserver/#timestamp-history-retention-interval-sec). However, when before image is enabled for a database, YugabyteDB adjusts the history retention for that database based on the most lagging active CDC stream so that the previous row state is retained, and available. Consequently, in the case of a lagging CDC stream, the amount of space required for the database grows as more data is retained. On the other hand, older rows that are not needed for any of the active CDC streams are identified and garbage collected. + +Schema version that is currently being used by a CDC stream will be used to frame before and current row images. The before image functionality is disabled by default unless it is specifically turned on during the CDC stream creation. The [yb-admin](../../../../admin/yb-admin/#enabling-before-image) `create_change_data_stream` command can be used to create a CDC stream with before image enabled. + +{{< tip title="Use transformers" >}} + +Add a transformer in the source connector while using with before image; you can add the following property directly to your configuration: + +```properties +... +"transforms":"unwrap,extract", +"transforms.unwrap.type":"io.debezium.connector.yugabytedb.transforms.PGCompatible", +"transforms.unwrap.drop.tombstones":"false", +"transforms.extract.type":"io.debezium.transforms.ExtractNewRecordState", +"transforms.extract.drop.tombstones":"false", +... +``` + +{{< /tip >}} + +After you've enabled before image and are using the suggested transformers, the effect of an update statement with the record structure is as follows: + +```sql +UPDATE customers SET email = 'service@example.com' WHERE id = 1; +``` + +```output.json {hl_lines=[4,9,14,28]} +{ + "schema": {...}, + "payload": { + "before": { --> 1 + "id": 1, + "name": "Vaibhav Kushwaha", + "email": "vaibhav@example.com" + } + "after": { --> 2 + "id": 1, + "name": "Vaibhav Kushwaha", + "email": "service@example.com" + }, + "source": { --> 3 + "version": "1.9.5.y.11", + "connector": "yugabytedb", + "name": "dbserver1", + "ts_ms": -8881476960074, + "snapshot": "false", + "db": "yugabyte", + "sequence": "[null,\"1:5::0:0\"]", + "schema": "public", + "table": "customers", + "txId": "", + "lsn": "1:5::0:0", + "xmin": null + }, + "op": "u", --> 4 + "ts_ms": 1646149134341, + "transaction": null + } +} +``` + +The highlighted fields in the update event are: + +| Item | Field name | Description | +| :--- | :--------- | :---------- | +| 1 | before | The value of the row before the update operation. | +| 2 | after | Specifies the state of the row after the change event occurred. In this example, the value of `email` has changed to `service@example.com`. | +| 3 | source | Mandatory field that describes the source metadata for the event. This has the same fields as a create event, but some values are different. The source metadata includes:
  • Debezium version
  • Connector type and name
  • Database and table that contains the new row
  • Schema name
  • If the event was part of a snapshot (always `false` for update events)
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 4 | op | In an update event, this field's value is `u`, signifying that this row changed because of an update. | + +### Before image modes + +YugabyteDB supports the following record types in the context of before image: + +- ALL +- FULL_ROW_NEW_IMAGE +- MODIFIED_COLUMNS_OLD_AND_NEW_IMAGES +- CHANGE + +Consider the following employee table into which a row is inserted, subsequently updated, and deleted: + +```sql +create table employee (employee_id int primary key, employee_name varchar, employee_dept text); + +insert into employee values(1001, 'Alice', 'Packaging'); + +update employee set employee_name='Bob' where employee_id=1001; + +delete from employee where employee_id=1001; +``` + +CDC records for update and delete statements without enabling before image (that is, the default record type `CHANGE`) would be as follows: + + + + + + + + + +
CDC record for UPDATE: CDC record for DELETE:
+ +
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+        "value": 1001,
+        "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": null
+  }
+  "op": "u"
+}
+
+
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": null,
+    "employee_dept": null
+  },
+  "after": null,
+  "op": "d"
+}
+
+ +
+ +For record type `ALL`, the update and delete records look like the following: + + + + + + + + + +
CDC record for UPDATE: CDC record for DELETE:
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "after": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "op": "u"
+}
+
+
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "after": null,
+  "op": "d"
+}
+
+ +
+ +For record type `FULL_ROW_NEW_IMAGE`, the update and delete records look like the following: + + + + + + + + + +
CDC record for UPDATE: CDC record for DELETE:
+ +
+{
+  "before": null,
+  "after": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "op": "u"
+}
+
+
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    },
+    "employee_dept": {
+      "value": "Packaging",
+      "set": true
+    }
+  },
+  "after": null,
+  "op": "d"
+}
+
+ +
+ +For record type `MODIFIED_COLUMNS_OLD_AND_NEW_IMAGES`, the update and delete records look like the following: + + + + + + + + + +
CDC record for UPDATE: CDC record for DELETE:
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Alice",
+      "set": true
+    }
+  },
+  "after": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    },
+    "employee_name": {
+      "value": "Bob",
+      "set": true
+    }
+  },
+  "op": "u"
+}
+
+
+ +
+{
+  "before": {
+    "employee_id": {
+      "value": 1001,
+      "set": true
+    }
+  },
+  "after": null,
+  "op": "d"
+}
+
+ +
+ +### Updating or deleting a row inserted in the same transaction + +If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the before image record type is not CHANGE, then CDC will throw an error while processing such events. + +To handle such updates/deletes with a non-CHANGE before-image record type, set the YB-TServer flag [cdc_send_null_before_image_if_not_exists](../../../../reference/configuration/yb-tserver/#cdc-send-null-before-image-if-not-exists) to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. + +## Schema evolution + +Table schema is needed for decoding and processing the changes and populating CDC records. Thus, older schemas are retained if CDC streams are lagging. Also, older schemas that are not needed for any of the existing active CDC streams are garbage collected. In addition, if before image is enabled, the schema needed for populating before image is also retained. The YugabyteDB source connector caches schema at the tablet level. This means that for every tablet the connector has a copy of the current schema for the tablet it is polling the changes for. As soon as a DDL command is executed on the source table, the CDC service emits a record with the new schema for all the tablets. The YugabyteDB source connector then reads those records and modifies its cached schema gracefully. + +{{< warning title="No backfill support" >}} + +If you alter the schema of the source table to add a default value for an existing column, the connector will NOT emit any event for the schema change. The default value will only be published in the records created after schema change is made. In such cases, it is recommended to alter the schema in your sinks to add the default value there as well. + +{{< /warning >}} + +Consider the following employee table (with schema version 0 at the time of table creation) into which a row is inserted, followed by a DDL resulting in schema version 1 and an update of the row inserted, and subsequently another DDL incrementing the schema version to 2. If a CDC stream created for the employee table lags and is in the process of streaming the update, corresponding schema version 1 is used for populating the update record. + +```sql +create table employee(employee_id int primary key, employee_name varchar); // schema version 0 + +insert into employee values(1001, 'Alice'); + +alter table employee add dept_id int; // schema version 1 + +update employee set dept_id=9 where employee_id=1001; // currently streaming record corresponding to this update + +alter table employee add dept_name varchar; // schema version 2 +``` + +Update CDC record would be as follows: + +```json +CDC record for UPDATE (using schema version 1): +{ + "before": { + "public.employee.Value":{ + "employee_id": { + "value": 1001 + }, + "employee_name": { + "employee_name": { + "value": { + "string": "Alice" + } + } + }, + "dept_id": null + } + }, + + "after": { "public.employee.Value":{ + "employee_id": { + "value": 1001 + }, + "employee_name": { + "employee_name": { + "value": { + "string": "Alice" + } + } + }, + "dept_id": { + "dept_id": { + "value": { + "int": 9 + } + } + } + } + }, + "op": "u" +} +``` + +## Colocated tables + +YugabyteDB supports streaming of changes from [colocated tables](../../../../additional-features/colocation/). The connector can be configured with regular configuration properties and deployed for streaming. + +{{< note title="Note" >}} + +If a connector is already streaming a set of colocated tables from a database and if a new table is created in the same database, you cannot deploy a new connector for this newly created table. + +To stream the changes for the new table, delete the existing connector and deploy it again with the updated configuration property after adding the new table to `table.include.list`. + +{{< /note >}} + +## Important configuration settings + +You can use several flags to fine-tune YugabyteDB's CDC behavior. These flags are documented in the [Change data capture flags](../../../../reference/configuration/yb-tserver/#change-data-capture-cdc-flags) section of the YB-TServer reference and [Change data capture flags](../../../../reference/configuration/yb-master/#change-data-capture-cdc-flags) section of the YB-Master reference. The following flags are particularly important for configuring CDC: + +- [cdc_intent_retention_ms](../../../../reference/configuration/yb-tserver/#cdc-intent-retention-ms) - Controls retention of intents, in ms. If a request for change records is not received for this interval, un-streamed intents are garbage collected and the CDC stream is considered expired. This expiry is not reversible, and the only course of action would be to create a new CDC stream. The default value of this flag is 4 hours (4 x 3600 x 1000 ms). + +- [cdc_wal_retention_time_secs](../../../../reference/configuration/yb-master/#cdc-wal-retention-time-secs) - Controls how long WAL is retained, in seconds. This is irrespective of whether a request for change records is received or not. The default value of this flag is 4 hours (14400 seconds). + +- [cdc_snapshot_batch_size](../../../../reference/configuration/yb-tserver/#cdc-snapshot-batch-size) - This flag's default value is 250 records included per batch in response to an internal call to get the snapshot. If the table contains a very large amount of data, you may need to increase this value to reduce the amount of time it takes to stream the complete snapshot. You can also choose not to take a snapshot by modifying the [Debezium](../debezium-connector-yugabytedb/) configuration. + +- [cdc_max_stream_intent_records](../../../../reference/configuration/yb-tserver/#cdc-max-stream-intent-records) - Controls how many intent records can be streamed in a single `GetChanges` call. Essentially, intents of large transactions are broken down into batches of size equal to this flag, hence this controls how many batches of `GetChanges` calls are needed to stream the entire large transaction. The default value of this flag is 1680, and transactions with intents less than this value are streamed in a single batch. The value of this flag can be increased, if the workload has larger transactions and CDC throughput needs to be increased. Note that high values of this flag can increase the latency of each `GetChanges` call. + +## Retain data for longer durations + +The following flags control the retention of data required by CDC: + +- `cdc_wal_retention_time_secs` (default: 28800s) +- `cdc_intent_retention_ms` (default: 28800000ms) + +Starting from v2024.2.1, the default data retention for CDC is 8 hours, with support for maximum retention up to 24 hours. Prior to v2024.2.1, the default retention for CDC is 4 hours. + +{{< warning title="Important" >}} +When using ALL, FULL_ROW_NEW_IMAGE, or MODIFIED_COLUMNS_OLD_AND_NEW_IMAGES before image modes, CDC preserves previous row values for UPDATE and DELETE operations. This is done by retaining history for each row in the database through a suspension of the compaction process. Compaction is halted by setting retention barriers to prevent cleanup of history for those rows that are yet to be streamed to the CDC client. These retention barriers are dynamically managed and advanced only after the CDC events are streamed and explicitly acknowledged by the client, thus allowing compaction of streamed rows. + +The [cdc_intent_retention_ms](../../../../reference/configuration/yb-tserver/#cdc-intent-retention-ms) flag governs the maximum retention period (default 8 hours). Be aware that any interruption in CDC consumption for extended periods using these before image modes may degrade read performance. This happens because compaction activities are halted in the database when these before image modes are used, leading to inefficient key lookups as reads must traverse multiple SST files. +{{< /warning >}} + +## Content-based routing + +By default, the connector streams all of the change events that it reads from a table to a single static topic. However, you may want to re-route the events into different Kafka topics based on the event's content. You can do this using the Debezium `ContentBasedRouter`. But first, two additional dependencies need to be placed in the Kafka-Connect environment. These are not included in the official _yugabyte-debezium-connector_ for security reasons. These dependencies are: + +- Debezium routing SMT (Single Message Transform) +- Groovy JSR223 implementation (or other scripting languages that integrate with [JSR 223](https://jcp.org/en/jsr/detail?id=223)) + +To get started, you can rebuild the _yugabyte-debezium-connector_ image including these dependencies. The following shows what the Dockerfile would look like: + +```Dockerfile +FROM quay.io/yugabyte/debezium-connector:latest +# Add the required jar files for content based routing +RUN cd $KAFKA_CONNECT_YB_DIR && curl -so debezium-scripting-2.1.2.Final.jar https://repo1.maven.org/maven2/io/debezium/debezium-scripting/2.1.2.Final/debezium-scripting-2.1.2.Final.jar +RUN cd $KAFKA_CONNECT_YB_DIR && curl -so groovy-4.0.9.jar https://repo1.maven.org/maven2/org/apache/groovy/groovy/4.0.9/groovy-4.0.9.jar +RUN cd $KAFKA_CONNECT_YB_DIR && curl -so groovy-jsr223-4.0.9.jar https://repo1.maven.org/maven2/org/apache/groovy/groovy-jsr223/4.0.9/groovy-jsr223-4.0.9.jar +``` + +To configure a content-based router, you need to add the following lines to your connector configuration: + +```json +{ + ..., + config: { + ..., + "transforms": "router", + "transforms.router.type": "io.debezium.transforms.ContentBasedRouter", + "transforms.router.language": "jsr223.groovy", + "transforms.router.topic.expression": "", + } +} +``` + +The `` contains the logic for routing of the events. For example, if you want to re-route the events based on the `country` column in user's table, you may use a expression similar to the following: + +```regexp +value.after != null ? (value.after?.country?.value == '\''UK'\'' ? '\''uk_users'\'' : null) : (value.before?.country?.value == '\''UK'\'' ? '\''uk_users'\'' : null)" +``` + +This expression checks if the value of the row after the operation has the country set to `UK`. If _yes_, then the expression returns `uk_users`. If _no_, it returns _null_, and in case the row after the operation is _null_ (for example, in a "delete" operation), the expression also checks for the same condition on row values before the operation. The value that is returned determines which new Kafka Topic will receive the re-routed event. If it returns _null_, the event is sent to the default topic. + +For more advanced routing configuration, refer to the [Debezium documentation](https://debezium.io/documentation/reference/stable/transformations/content-based-routing.html) on content-based routing. + +## CDC with point-in-time recovery + +[Point-in-time recovery](../../../../manage/backup-restore/point-in-time-recovery/) (PITR) provides the ability to restore the data to a specific point in time, reflecting the state of the database at an earlier time. For databases and tables with CDC configured, you need to create new streams after the restore is complete, and start streaming from that point. Creating new streams ensures that you start streaming from the correct checkpoints. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-monitor.md b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-monitor.md new file mode 100644 index 000000000000..aa555c991ac4 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-monitor.md @@ -0,0 +1,132 @@ +--- +title: CDC monitoring in YugabyteDB +headerTitle: Monitor +linkTitle: Monitor +description: Monitor Change Data Capture in YugabyteDB. +headcontent: Monitor deployed CDC connectors +aliases: + - /preview/explore/change-data-capture/cdc-monitor/ + - /preview/explore/change-data-capture/using-yugabytedb-grpc-replication/cdc-monitor/ +menu: + preview: + parent: explore-change-data-capture-grpc-replication + identifier: cdc-monitor + weight: 20 +type: docs +--- + +## Status of the deployed connector + +You can use the rest APIs to monitor your deployed connectors. The following operations are available: + +* List all connectors + + ```sh + curl -X GET localhost:8083/connectors/ + ``` + +* Get a connector's configuration + + ```sh + curl -X GET localhost:8083/connectors/ + ``` + +* Get the status of all tasks with their configuration + + ```sh + curl -X GET localhost:8083/connectors//tasks + ``` + +* Get the status of the specified task + + ```sh + curl -X GET localhost:8083/connectors//tasks/ + ``` + +* Get the connector's status, and the status of its tasks + + ```sh + curl -X GET localhost:8083/connectors//status + ``` + +{{< note title= "How to check task status">}} + +It is possible that upon retrieving the status of the connector, it can show a RUNNING state while no data is being ingested to Kafka. As a connector consists of one or more tasks, the tasks have likely failed, independently from the connector. To verify this, you need to check for the status of the tasks. + +To show the status, use the following command: + +```sh +curl -X GET localhost:8083/connectors//status +``` + +You can also get the status of a specific task by passing in the task ID as follows: + +```sh +curl -X GET localhost:8083/connectors//tasks//status +``` + +{{< /note >}} + +## Metrics + +In addition to the built-in support for JMX metrics that Zookeeper, Kafka, and Kafka Connect provide, the YugabyteDB source connector provides the following types of metrics. + +### CDC service metrics + +Provide information about the CDC service in YugabyteDB. + +| Metric name | Type | Description | +| :---- | :---- | :---- | +| cdcsdk_change_event_count | `long` | The Change Event Count metric shows the number of records sent by the CDC Service. | +| cdcsdk_traffic_sent | `long` | Total traffic sent, in bytes. | +| cdcsdk_sent_lag_micros | `long` | The LAG metric is calculated by subtracting the timestamp of the latest record in the WAL of a tablet from the last record sent to the CDC connector. | +| cdcsdk_expiry_time_ms | `long` | The time left to read records from WAL is tracked by the Stream Expiry Time (ms). | + +CDC service metrics are calculated for every tablet that is of interest for a replication slot. In the scenario where you aren't interested in polling all the tables (and consequently all the tablets) in a database, the metrics are calculated considering the unpolled tablets until [cdcsdk_tablet_not_of_interest_timeout_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-tablet-not-of-interest-timeout-secs) interval. + +### Snapshot metrics + +The **MBean** is `debezium.yugabytedb:type=connector-metrics,server=,task=,context=snapshot`. + +Snapshot metrics are only available when a snapshot operation is active, or if a snapshot has occurred since the last connector start. The following snapshot metrics are available: + +| Metric name | Type | Description | +| :---- | :---- | :---- | +| LastEvent | `string` | The last snapshot event that the connector has read. | +| MilliSecondsSinceLastEvent | `long` | The number of milliseconds since the connector has read and processed the most recent event. | +| TotalNumberOfEventsSeen | `long` | The total number of events that this connector has seen since the last start or metrics reset. | +| NumberOfEventsFiltered | `long` | The number of events that have been filtered by include/exclude list filtering rules configured on the connector. | +| QueueTotalCapacity | `int` | The length the queue used to pass events between the snapshotter and the main Kafka Connect loop. | +| QueueRemainingCapacity | `int` | The free capacity of the queue used to pass events between the snapshotter and the main Kafka Connect loop. | +| SnapshotRunning | `boolean` | Whether the snapshot is currently running. | +| SnapshotPaused | `boolean` | Whether the snapshot was paused one or more times. | +| SnapshotAborted | `boolean` | Whether the snapshot has been aborted. | +| SnapshotCompleted | `boolean` | Whether the snapshot has been completed. | +| SnapshotDurationInSeconds | `long` | The total number of seconds that the snapshot has taken so far, even if not complete. Includes also time when snapshot was paused.| +| SnapshotPausedDurationInSeconds | `long` | The total number of seconds that the snapshot was paused. If the snapshot was paused more than once, this is the cumulative pause time. | +| MaxQueueSizeInBytes | `long` | The maximum buffer of the queue, in bytes. This metric is available if `max.queue.size.in.bytes` is set to a positive long value. | +| CurrentQueueSizeInBytes | `long` | The current volume, in bytes, of records in the queue. | + +### Streaming metrics + +The **MBean** is `debezium.yugabytedb:type=connector-metrics,server=,task=,context=streaming`. + +The following streaming metrics are available: + +| Metric name | Type | Description | +| :---- | :---- | :---- | +| LastEvent | `string` | The last streaming event that the connector has read. | +| MilliSecondsSinceLastEvent | `long` | The number of milliseconds since the connector has read and processed the most recent event. | +| TotalNumberOfEventsSeen | `long` | The total number of events that this connector has seen since the last start or metrics reset. | +| TotalNumberOfCreateEventsSeen | `long` | The total number of create events that this connector has seen since the last start or metrics reset. | +| TotalNumberOfUpdateEventsSeen | `long` |The total number of update events that this connector has seen since the last start or metrics reset. | +| TotalNumberOfDeleteEventsSeen | `long` | The total number of delete events that this connector has seen since the last start or metrics reset. | +| NumberOfEventsFiltered | `long` | The total number of events (since the last start or metrics reset) that have been filtered by include/exclude list filtering rules configured on the connector. | +| QueueTotalCapacity | `int` | The length the queue used to pass events between the streamer and the main Kafka Connect loop. | +| QueueRemainingCapacity | `int` | The free capacity of the queue used to pass events between the streamer and the main Kafka Connect loop. | +| Connected | `boolean` | Indicates whether the connector is currently connected to the database server. | +| MilliSecondsBehindSource | `long` | The number of milliseconds between the last change event's timestamp and when the connector processed it. The value incorporates any differences between the clocks on the machines where the database server and the connector are running. | +| SourceEventPosition | `Map` | The coordinates of the last received event. | +| LastTransactionId | `string` | Transaction identifier of the last processed transaction. | +| MaxQueueSizeInBytes | `long` | The maximum buffer of the queue in bytes. This metric is available if `max.queue.size.in.bytes` is set to a positive long value. | +| CurrentQueueSizeInBytes | `long` | The current volume, in bytes, of records in the queue. | diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/debezium-connector-yugabytedb.md b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/debezium-connector-yugabytedb.md new file mode 100644 index 000000000000..483df571f179 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/debezium-connector-yugabytedb.md @@ -0,0 +1,1293 @@ +--- +title: YugabyteDB gRPC Connector (Debezium) +headerTitle: YugabyteDB gRPC Connector +linkTitle: YugabyteDB gRPC Connector +description: YugabyteDB gRPC Connector is an open source distributed platform used to capture the changes in a database. +aliases: + - /preview/explore/change-data-capture/debezium-connector-yugabytedb-ysql + - /preview/explore/change-data-capture/debezium-connector + - /preview/explore/change-data-capture/debezium + - /preview/explore/change-data-capture/debezium-connector-postgresql + - /preview/explore/change-data-capture/debezium-connector-yugabytedb + - /preview/explore/change-data-capture/using-yugabytedb-grpc-replication/debezium-connector-yugabytedb/ +menu: + preview: + parent: explore-change-data-capture-grpc-replication + identifier: debezium-connector-yugabytedb + weight: 50 +type: docs +rightNav: + hideH4: true +--- + +The YugabyteDB gRPC Connector is based on the Debezium Connector, and captures row-level changes in a YugabyteDB database's schemas using the YugabyteDB gRPC replication protocol. + +## Connector compatibility + +The connector is compatible with the following versions of YugabyteDB. + +| YugabyteDB | Connector | +| :--- | :--- | +| 2.14 | 1.9.5.y.3 | +| 2.16 | 1.9.5.y.24 | +| 2.18.2 | 1.9.5.y.33.2 | +| 2.20 | 1.9.5.y.220.4 | +| 2024.1 | dz.1.9.5.yb.grpc.2024.1 | +| 2024.2 | dz.1.9.5.yb.grpc.2024.2.2 | + +Starting with YugabyteDB v2024.1, the connector uses the following naming convention: + +```output +dz..yb.grpc.. +``` + +* Debezium Release - Debezium release the connector is based on +* YugabyteDB Version - version of YugabyteDB the connector works with +* Patch - patch release version, if applicable + +The connector is backward compatible with previous releases of YugabyteDB unless stated otherwise. For the latest YugabyteDB preview version, use the latest available connector. + +In addition, the connector supports Kafka Connect v2.x and later. + +{{< note title="Connector Class Name" >}} + +Starting with YugabyteDB v2024.1, the name of the connector class changed from + +`io.debezium.connector.yugabytedb.YugabyteDBConnector` + +to + +`io.debezium.connector.yugabytedb.YugabyteDBgRPCConnector` + +New deployments using connector version dz.1.9.5.yb.grpc.2024.1 and later need to use the new connector class. + +{{< /note >}} + +## Initial Snapshot and Continuous Streaming + +* Initial Snapshot: Upon its first connection to a YugabyteDB cluster, the connector takes a consistent snapshot of the configured tables. +* Continuous Streaming: After the snapshot, it continuously captures row-level changes (insertions, updates, and deletions) from the database. It then generates data change event records and streams them to Kafka topics. + +![What is CDC](/images/explore/cdc-overview-work.png) + +## Kafka integration + +For each table, the connector streams all generated events to a separate Kafka topic. Client applications and services can consume these data change event records from their respective topics. + +* CDC (Change Data Capture) Service: The Debezium connector leverages the CDC service APIs to read the changes from YugabyteDB. +* Event Production: For every row-level insert, update, and delete operation captured, the connector produces a corresponding change event, and sends it to separate Kafka topics dedicated to each table. +* Client Consumption: Applications read the Kafka topics corresponding to the database tables they are interested in and react to the row-level events received. + +## Failure tolerance + +The connector records the WAL position for each event as it reads changes and produces events. If the connector stops (due to communication failures, network problems, or crashes), it resumes reading the WAL from the last recorded position upon restart. This uses checkpoints managed on both the Kafka side and the YugabyteDB cluster. + +{{< tip title="Use UTF-8 encoding" >}} + +Debezium supports databases with UTF-8 character encoding only. With a single-byte character encoding, it's not possible to correctly process strings that contain extended ASCII code characters. + +{{< /tip >}} + +## How the connector works + +To optimally configure and run a Debezium connector, it is helpful to understand how the connector performs snapshots, streams change events, determines Kafka topic names, and uses metadata. + +### Security + +Currently, for any user that has the access to the cluster, authentication is done via that user. SSL support-based verification is provided for all the required keys and certificates are passed to the connector. + +{{< note title="Note" >}} + +Per-user CDC privileges are planned for a future release. + +{{< /note >}} + +### Snapshots + +Most YugabyteDB servers are configured to not retain the complete history of the database in the WAL segments. This means that the YugabyteDB connector would be unable to see the entire history of the database by reading only the WAL. Consequently, the first time that the connector starts, it performs an initial consistent snapshot of the database. You can change this behavior by setting the `snapshot.mode` connector configuration property to a value other than initial. + +After the connector completes its initial snapshot, it continues streaming the changes. This ensures that the connector does not miss any updates. If the connector stops again for any reason, upon restart, the connector continues streaming changes from where it previously left off. + +Options for the `snapshot.mode` connector configuration property are as follows: + +| Option | Description | +| :--- | :--- | +| `never` | The connector never performs a snapshot. When a connector is configured in this way, the behaviour is as follows. If an offset is stored on the server, the connector will resume the streaming from that position. If no offset is stored on the server, the connector will bootstrap the tablets, meaning that it will stream data from that point onward only, and then start streaming. The `never` snapshot mode is useful when you know that your data of interest will be coming after the point you have deployed your connector. | +| `initial` | The connector performs a snapshot every time it starts. When a connector is configured this way, the behaviour is as follows. If the snapshot was stopped midway, the connector continues to take the snapshot from that position. If the snapshot was completed previously for the given stream ID, then the connector resumes streaming from the point checkpoints are stored on the server. | +| `initial_only` | The connector performs a database snapshot and stops before streaming any change event records. If the connector had started but did not complete a snapshot before stopping, the connector resumes the snapshot process from the point it stopped and stops when the snapshot completes. | + +### Streaming changes + +The YugabyteDB gRPC Connector typically spends the vast majority of its time streaming changes from the YugabyteDB server to which it is connected. + +The connector keeps polling for changes and whenever there is a change, the connector processes them, converts them to a specific format (Protobuf or JSON in the case of the Debezium plugin) and writes them on an output stream, which can then be consumed by clients. + +The connector acts as a YugabyteDB client. When the connector receives changes it transforms the events into Debezium create, update, or delete events that include the Log Sequence Number ([LSN](../../using-logical-replication/key-concepts/#lsn-type)) of the event. The connector forwards these change events in records to the Kafka Connect framework, which is running in the same process. The Kafka Connect process asynchronously writes the change event records in the same order in which they were generated to the appropriate Kafka topic. + +Periodically, Kafka Connect records the most recent offset in another Kafka topic. The offset indicates source-specific position information that Debezium includes with each event. + +When Kafka Connect gracefully shuts down, it stops the connectors, and flushes all event records to Kafka. Upon restart, the connector reads the last recorded offset from YugabyteDB server and then it sends a request to the YugabyteDB server to send the events starting just after that position. + +{{< note title="Schema changes" >}} + +The connector retrieves schema information as part of the change events which consist of the schema metadata for the table. When there is any schema change on the configured table, the connector will automatically receive an event pertaining to the change and it will update its internal schema. + +{{< /note >}} + +### Topic names + +By default, the YugabyteDB gRPC connector writes change events for all INSERT, UPDATE, and DELETE operations that occur in a table to a single Apache Kafka topic that is specific to that table. The connector names change event topics as _serverName.schemaName.tableName_. + +The components of a topic name are as follows: + +* _serverName_ - the logical name of the connector, as specified by the `database.server.name` configuration property. +* _schemaName_ - the name of the database schema in which the change event occurred. +* _tableName_ - the name of the database table in which the change event occurred. + +For example, suppose that `dbserver` is the logical server name in the configuration for a connector that is capturing changes in a YugabyteDB installation that has a `yugabyte` database and an `inventory` schema that contains four tables: `products`, `products_on_hand`, `customers`, and `orders`. The connector would stream records to these four Kafka topics: + +* `dbserver.inventory.products` +* `dbserver.inventory.products_on_hand` +* `dbserver.inventory.customers` +* `dbserver.inventory.orders` + +Now suppose that the tables are not part of a specific schema but were created in the default public YugabyteDB schema. The names of the Kafka topics would be: + +* `dbserver.public.products` +* `dbserver.public.products_on_hand` +* `dbserver.public.customers` +* `dbserver.public.orders` + +The connector applies similar naming conventions to label its [transaction metadata topics](#transaction-metadata). + +If the default topic names don't meet your requirements, you can configure custom topic names. To configure custom topic names, you specify regular expressions in the logical topic routing SMT. For more information about using the logical topic routing SMT to customize topic naming, see the Debezium documentation on [Topic routing](https://debezium.io/documentation/reference/stable/transformations/topic-routing.html#topic-routing). + +### Meta information + +In addition to the data change event, each record produced by the connector contains some metadata. Metadata includes information about which tablet caused the change event to occur, the commit time, table, database, offset of the event, for example: + +```output.json +"source": { + "version": "1.9.5.y.21", + "connector": "yugabytedb", + "name": "dbserver1", + "snapshot": "false", + "db": "yugabyte", + "sequence": "[null,\"1:4::0:0\"]", + "schema": "public", + "table": "customers", + "txId": "", + "lsn": "1:4::0:0" +} +``` + +* `version` is the version number of the connector which is being used. +* `name` always defaults to the `database.server.name` connector configuration property. +* `db` is the database name on which the connector is configured. +* `sequence` and `lsn` indicate the offset to which the change event belongs. +* `schema` is the schema name to which the table belongs. +* `table` is the name of the table to which the change event belongs. +* `txId` contains the transaction ID if the change event is a part of any transaction; otherwise it is empty. + +### Transaction metadata + +Debezium can generate events that represent transaction boundaries and that enrich data change event messages. + +{{< note title="Note" >}} + +Debezium registers and receives metadata only for transactions that occur _after you deploy the connector_. Metadata for transactions that occur before you deploy the connector is not available. + +{{< /note >}} + +For every transaction BEGIN and END, Debezium generates an event containing the following fields: + +* `status` - BEGIN or END +* `id` - string representation of unique transaction identifier +* `event_count` (for END events) - total number of events emitted by the transaction +* `data_collections` (for END events) - an array of pairs of `data_collection` and `event_count` that provides the number of events emitted by changes originating from given data collection + +For example: + +```output.json +{ + "status": "BEGIN", + "id": "571", + "event_count": null, + "data_collections": null +} + +{ + "status": "END", + "id": "571", + "event_count": 2, + "data_collections": [ + { + "data_collection": "s1.a", + "event_count": 1 + }, + { + "data_collection": "s2.a", + "event_count": 1 + } + ] +} +``` + +Unless overridden via the `transaction.topic` option, transaction events are written to the topic and named _database.server.name_.transaction. + +#### Change data event enrichment + +When transaction metadata is enabled, the data message envelope is enriched with a new transaction field. This field provides information about every event in the form of a composite of fields: + +* `id` - string representation of unique transaction identifier +* `total_order` - absolute position of the event among all events generated by the transaction +* `data_collection_order` - the per-data collection position of the event among all events emitted by the transaction + +For example: + +```output.json +{ + "before": null, + "after": { + "pk": "2", + "aa": "1" + }, + "source": { + ... + }, + "op": "c", + "ts_ms": "1580390884335", + "transaction": { + "id": "571", + "total_order": "1", + "data_collection_order": "1" + } +} +``` + +## Data change events + +The connector generates a data change event for each row-level INSERT, UPDATE, and DELETE operation. Each event contains a key and a value. The structure of the key and the value depends on the table that was changed. + +Debezium and Kafka Connect are designed around continuous streams of event messages. However, the structure of these events may change over time, which can be difficult for consumers to handle. To address this, each event contains the schema for its content. This makes each event self-contained. + +The following skeleton JSON shows the basic four parts of a change event. However, how you configure the Kafka Connect converter that you choose to use in your application determines the representation of these four parts in change events. A schema field is in a change event only when you configure the converter to produce it. Likewise, the event key and event payload are in a change event only if you configure a converter to produce it. + +If you use the JSON converter and you configure it to produce all four basic change event parts, change events have the following structure: + +```output.json +{ + "schema": { --> 1 + ... + }, + "payload": { --> 2 + ... + }, + "schema": { --> 3 + ... + }, + "payload": { --> 4 + ... + } +} +``` + +| Item | Field name | Description | +| :--: | :--------- | :---------- | +| 1 | schema | The first `schema` field is part of the event key. It specifies a Kafka Connect schema that describes what is in the event key's `payload` portion. In other words, the first `schema` field describes the structure of the primary key, or the unique key if the table does not have a primary key, for the table that was changed. | +| 2 | payload | The first `payload` field is part of the event key. It has the structure described by the previous `schema` field and it contains the key for the row that was changed. | +| 3 | schema | The second `schema` field is part of the event value. It specifies the Kafka Connect schema that describes what is in the event value's `payload` portion. In other words, the second `schema` describes the structure of the row that was changed. Typically, this schema contains nested schemas. | +| 4 | payload | The second `payload` field is part of the event value. It has the structure described by the previous `schema` field and it contains the actual data for the row that was changed. | + +{{< warning title="Naming conflicts due to invalid characters" >}} + +The YugabyteDB gRPC connector ensures that all Kafka Connect schema names adhere to the [Avro schema name format](http://avro.apache.org/docs/current/spec.html#names). This means that the logical server name must start with a Latin letter or an underscore (a-z, A-Z, or \_). Each remaining character in the logical server name and each character in the schema and table names must be a Latin letter, a digit, or an underscore (a-z, A-Z, 0-9, or \_). Invalid characters are replaced with an underscore character. + +This can lead to unexpected conflicts if the logical server name, a schema name, or a table name contains invalid characters, in the event that the only characters that distinguish names from one another are invalid, and thus replaced with underscores. + +{{< /warning >}} + +### Change event keys + +For a given table, the change event's key has a structure that contains a field for each column in the primary key of the table at the time the event was created. + +Consider a `customers` table defined in the `public` database schema and the example of a change event key for that table: + +```sql +CREATE TABLE customers ( + id SERIAL, + name VARCHAR(255), + email TEXT, + PRIMARY KEY(id) +); +``` + +#### Example change event key + +If the `database.server.name` connector configuration property has the value `dbserver1`, every change event for the `customers` table while it has this definition has the same key structure, which in JSON looks like the following: + +```output.json +{ + "schema": { --> 1 + "type": "struct", + "name": "dbserver1.public.customers.Key", --> 2 + "optional": false, --> 3 + "fields": [ --> 4 + { + "name": "id", + "index": "0", + "schema": { + "type": "INT32", + "optional": "false" + } + } + ] + }, + "payload": { --> 5 + "id": { + "value":"1" + } + }, +} +``` + +This change event key has the following structure: + +| Item | Field name | Description | +| :--- | :--------- | :---------- | +| 1 | schema | The schema portion of the key specifies a Kafka Connect schema that describes what is in the key's `payload` portion. | +| 2 | dbserver1.public.
customers.Key | Name of the schema that defines the structure of the key's payload. This schema describes the structure of the primary key for the table that was changed. Key schema names have the format _connector-name.database-name.table-name.Key_. In this example:
`dbserver1` is the logical name of the server that generated this event.
`public` is the schema which contains the table which was changed.
`customers` is the table which was updated. | +| 3 | optional | Indicates whether the event key must contain a value in its `payload` field. In this example, a value in the key's payload is required. | +| 4 | fields | Specifies each field that is expected in the payload, including each field's name, index, and schema. | +| 5 | payload | Contains the key for the row for which this change event was generated. In this example, the key, contains a single `id` field whose value is `1`. | + +{{< note title="Note" >}} + +Although the `column.exclude.list` and `column.include.list` connector configuration properties allow you to capture only a subset of table columns, all columns in a primary or unique key are always included in the event's key. + +{{< /note >}} + +### Change event values + +The value in a change event is a bit more complicated than the key. Like the key, the value has a `schema` section and a `payload` section. The `schema` section contains the schema that describes the `Envelope` structure of the `payload` section, including its nested fields. Change events for operations that create, update or delete data all have a value payload with an envelope structure. + +### Create events + +For a given table, the change event has a structure that contains a field for each column of the table at the time the event was created. + +Now suppose a row is inserted to the table: + +```sql +INSERT INTO customers (name, email) VALUES ('Vaibhav Kushwaha', 'vaibhav@example.com'); +``` + +The following example shows the value portion of a change event that the connector generates for an operation that creates data in the `customers` table: + +
+ +
+ Click to expand the JSON event. + +```output.json +{ + "schema": { --> 1 + "type": "struct", + "fields": [ + { + "type": "struct", + "fields": [ --> 2 + { + "type": "int32", + "optional": false, + "field": "id" + }, + { + "type": "string", + "optional": true, + "field": "name" + }, + { + "type": "string", + "optional": true, + "field": "email" + } + ], + "optional": true, + "name": "dbserver1.public.customers.Value", + "field": "before" + }, + { + "type": "struct", + "fields": [ + { + "type": "int32", + "optional": false, + "field": "id" + }, + { + "type": "string", + "optional": true, + "field": "name" + }, + { + "type": "string", + "optional": true, + "field": "email" + } + ], + "optional": true, + "name": "dbserver1.public.customers.Value", + "field": "after" + }, + { + "type": "struct", + "fields": [ + { + "type": "string", + "optional": false, + "field": "version" + }, + { + "type": "string", + "optional": false, + "field": "connector" + }, + { + "type": "string", + "optional": false, + "field": "name" + }, + { + "type": "int64", + "optional": false, + "field": "ts_ms" + }, + { + "type": "string", + "optional": true, + "name": "io.debezium.data.Enum", + "version": 1, + "parameters": { + "allowed": "true,last,false" + }, + "default": "false", + "field": "snapshot" + }, + { + "type": "string", + "optional": false, + "field": "db" + }, + { + "type": "string", + "optional": true, + "field": "sequence" + }, + { + "type": "string", + "optional": false, + "field": "schema" + }, + { + "type": "string", + "optional": false, + "field": "table" + }, + { + "type": "string", + "optional": true, + "field": "txId" + }, + { + "type": "string", + "optional": true, + "field": "lsn" + }, + { + "type": "int64", + "optional": true, + "field": "xmin" + } + ], + "optional": false, + "name": "io.debezium.connector.postgresql.Source", + "field": "source" + }, + { + "type": "string", + "optional": false, + "field": "op" + }, + { + "type": "int64", + "optional": true, + "field": "ts_ms" + }, + { + "type": "struct", + "fields": [ + { + "type": "string", + "optional": false, + "field": "id" + }, + { + "type": "int64", + "optional": false, + "field": "total_order" + }, + { + "type": "int64", + "optional": false, + "field": "data_collection_order" + } + ], + "optional": true, + "field": "transaction" + } + ], + "optional": false, + "name": "dbserver1.public.customers.Envelope" + }, + "payload": { --> 3 + "before": null, --> 4 + "after": { --> 5 + "id": { + "value":1 + }, + "name": { + "value":"Vaibhav Kushwaha" + }, + "email": { + "value":"vaibhav@example.com" + } + }, + "source": { --> 6 + "version": "1.9.5.y.11", + "connector": "yugabytedb", + "name": "dbserver1", + "ts_ms": -8898156066356, + "snapshot": "false", + "db": "yugabyte", + "sequence": "[null,\"1:4::0:0\"]", + "schema": "public", + "table": "customers", + "txId": "", + "lsn": "1:4::0:0", + "xmin": null + }, + "op": "c", --> 7 + "ts_ms": 1646145062480, --> 8 + "transaction": null + } +} +``` + +
+ +
+ +The fields in the create event are as follows: + +| Item | Field name | Description | +| :--- | :--------- | :---------- | +| 1 | schema | Specifies a Kafka Connect schema that describes what is in the event's payload portion. | +| 2 | fields | Fields specified in the schema of the table. | +| 3 | payload | The key for the row for which this change event was generated. | +| 4 | before | Optional field specifying the state of the row before the event occurred. This field is null when the `op` field is `c` for create, as in this example, because the change event is for new content. | +| 5 | after | Optional field specifying the state of the row after the event occurred. In this example, the field contains the values of the new row's `id`, `name`, and `email` columns. | +| 6 | source | Mandatory field describing the source metadata for the event. This field contains information you can use to compare this event with other events, with regard to the origin of the events, the order in which the events occurred, and whether the events were part of the same transaction. The source metadata includes:
  • Debezium version
  • Connector type and name
  • Database and table containing the new row
  • Stringified JSON array of additional offset information, where the first value is always the last committed LSN, and the second value is always the current LSN. Either value may be null.
  • Schema name
  • If the event was part of a snapshot
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 7 | op | Mandatory string that describes the type of operation that caused the connector to generate the event. In this example, `c` indicates that the operation created a row. Valid values are:
  • `c` = create
  • `r` = read (applies to only snapshots)
  • `u` = update
  • `d` = delete
| +| 8 | ts_ms | Optional field containing the time at which the connector processed the event. The time is based on the system clock in the JVM running the Kafka Connect task.
In the source object, `ts_ms` indicates the time that the change was made in the database. By comparing the value for `payload.source.ts_ms` with the value for `payload.ts_ms`, you can determine the lag between the source database update and Debezium. | + +### Update events + +The value of a change event for an update in the sample `customers` table has the same schema as a create event for that table. Likewise, the event value's payload has the same structure. However, the event value payload contains different values in an update event. + +Note that updating the columns for a row's **primary/unique key** changes the value of the row's key. When a key changes, Debezium outputs three events: a DELETE event and a [tombstone event](#tombstone-events) with the old key for the row, followed by an event with the new key for the row. See [Primary key updates](#primary-key-updates) for details. + +The following example shows a change event value in an event that the connector generates for an update in the `customers` table: + +```sql +UPDATE customers SET email = 'service@example.com' WHERE id = 1; +``` + +The update event is as follows: + +```output.json +{ + "schema": {...}, + "payload": { + "before": null, --> 1 + "after": { --> 2 + "id": { + "value": 1 + }, + "name": { + "value": "Vaibhav Kushwaha" + }, + "email": { + "value": "service@example.com" + } + }, + "source": { --> 3 + "version": "1.9.5.y.11", + "connector": "yugabytedb", + "name": "dbserver1", + "ts_ms": -8881476960074, + "snapshot": "false", + "db": "yugabyte", + "sequence": "[null,\"1:5::0:0\"]", + "schema": "public", + "table": "customers", + "txId": "", + "lsn": "1:5::0:0", + "xmin": null + }, + "op": "u", --> 4 + "ts_ms": 1646149134341, + "transaction": null + } +} +``` + +The fields in the update event are: + +| Item | Field name | Description | +| :--- | :--------- | :---------- | +| 1 | before | The value of the row before the update operation. | +| 2 | after | Specifies the state of the row after the change event happened. In this example, the value of `email` has now changed to `service@example.com`. | +| 3 | source | Mandatory field that describes the source metadata for the event. The source field structure has the same fields as a create event, but some values are different. The source metadata includes:
  • Debezium version
  • Connector type and name
  • Database and table that contains the new row
  • Schema name
  • If the event was part of a snapshot (always `false` for update events)
  • ID of the transaction in which the operation was performed
  • Offset of the operation in the database log
  • Timestamp for when the change was made in the database
| +| 4 | op | In an update event, this field's value is `u`, signifying that this row changed because of an update. | + +#### Primary key updates + +An UPDATE operation that changes a row's primary key field(s) is known as a primary key change. For a primary key change, in place of sending an UPDATE event record, the connector sends a DELETE event record for the old key, and a CREATE event record for the new (updated) key. These events have the usual structure and content, and in addition, each one has a message header related to the primary key change: + +* The DELETE event record has `__debezium.newkey` as a message header. The value of this header is the new primary key for the updated row. + +* The CREATE event record has `__debezium.oldkey` as a message header. The value of this header is the previous (old) primary key for the updated row. + +### Delete events + +The value in a _delete_ change event has the same schema portion as create and update events for the same table. The _payload_ portion in a delete event for the sample _customers_ table looks like the following: + +```sql +DELETE FROM customers WHERE id = 1; +``` + +```output.json +{ + "schema": {...}, + "payload": { + "before": { --> 1 + "id": { + "value": 1 + }, + "name": null, + "email": null + }, + "after": null, --> 2 + "source": { + "version": "1.9.5.y.11", + "connector": "yugabytedb", + "name": "dbserver1", + "ts_ms": -8876894517738, + "snapshot": "false", + "db": "yugabyte", + "sequence": "[null,\"1:6::0:0\"]", + "schema": "public", + "table": "customers", + "txId": "", + "lsn": "1:6::0:0", + "xmin": null + }, + "op": "d", --> 3 + "ts_ms": 1646150253203, + "transaction": null + } +} +``` + +The fields in this event are: + +| Item | Field name | Description | +| :--: | :--------- | :---------- | +| 1 | before | The value of the row before the delete event occurred. | +| 2 | after | Optional field specifying the state of the row after the event occurred. In a delete event, this field is null, indicating that the row no longer exists. | +| 3 | op | The field value is `d`, indicating that the row was deleted. | + +A `delete` change event record provides a consumer with the information it needs to process the removal of this row. + +#### Tombstone events + +When a row is deleted, the _delete_ event value still works with log compaction, because Kafka can remove all earlier messages that have that same key. However, for Kafka to remove all messages that have that same key, the message value must be `null`. To make this possible, the connector follows a delete event with a special _tombstone_ event that has the same key but a null value. + +If the downstream consumer from the topic relies on tombstone events to process deletions and uses the [YBExtractNewRecordState transformer](../yugabytedb-grpc-transformers/#ybextractnewrecordstate) (SMT), it is recommended to set the `delete.to.tombstone` SMT configuration property to `true`. This ensures that the connector converts the delete records to tombstone events and drops the tombstone events. + +To set the property, follow the SMT configuration conventions. For example: + +```json +"transforms": "flatten", +"transforms.flatten.type": "io.debezium.connector.yugabytedb.transforms.YBExtractNewRecordState", +"transforms.flatten.delete.to.tombstone": "true" +``` + +##### Suppress tombstone events + +You can configure whether a connector emits tombstone events using its `tombstones.on.delete` property. + +Whether you enable the connector to emit tombstones depends on how topics are consumed in your environment, and on the characteristics of the sink consumer. If your sink consumers rely on tombstone records to indicate when to delete records in downstream data stores, you should configure the connector to emit them. + +By default, a connector's `tombstones.on.delete` property is set to `true` so that the connector generates a tombstone after each delete event. + +If you set the property to `false` to prevent the connector from saving tombstone records to Kafka topics, the **absence of tombstone records might lead to unintended consequences if your sink is not designed to handle it properly**. For example, Kafka relies on tombstones during log compaction to remove records related to deleted keys. + +## Datatype mappings + +The connector represents changes to rows with events that are structured like the table in which the row exists. The event contains a field for each column value. How that value is represented in the event depends on the YugabyteDB data type of the column. The following sections describe how the connector maps YugabyteDB data types to a literal type and a semantic type in event fields. + +* The literal type describes how the value is literally represented using Kafka Connect schema types: INT8, INT16, INT32, INT64, FLOAT32, FLOAT64, BOOLEAN, STRING, BYTES, ARRAY, MAP, and STRUCT. +* The semantic type describes how the Kafka Connect schema captures the meaning of the field using the name of the Kafka Connect schema for the field. + +### Default values + +If there is a default value for any column in the YugabyteDB database schema, the connector propagates the same value to the Kafka schema. + +### Basic types + +The following table describes mappings for YugabyteDB basic data types. + +| YugabyteDB data type| Literal type (schema type) | Semantic type (schema name) | +| :------------------ | :------------------------- | :-------------------------- | +| BOOLEAN | BOOLEAN | N/A | +| BIT(1) | STRING | N/A | +| BIT( > 1) | STRING | N/A | +| VARBIT[(M)] | STRING | N/A | +| SMALLINT, SMALLSERIAL | INT16 | N/A | +| INTEGER, SERIAL | INT32 | N/A | +| BIGINT, BIGSERIAL | INT64 | N/A | +| REAL | FLOAT32 | N/A | +| DOUBLE PRECISION | FLOAT64 | N/A | +| CHAR [(M)] | STRING | N/A | +| VARCHAR [(M)] | STRING | N/A | +| TEXT | STRING | N/A | +| TIMESTAMPTZ | STRING | `io.debezium.time.ZonedTimestamp`
A string representation of a timestamp with timezone information, where the timezone is GMT. | +| TIMETZ | STRING | `io.debezium.time.ZonedTime`
A string representation of a time value with timezone information, where the timezone is GMT. | +| INTERVAL [P] | INT64 | `io.debezium.time.MicroDuration` (default)
The approximate number of microseconds for a time interval using the 365.25 / 12.0 formula for days per month average. | +| INTERVAL [P] | STRING | `io.debezium.time.Interval`
(when `interval.handling.mode` is `string`)
The string representation of the interval value that follows the pattern
P\Y\M\DT\H\M\S.
For example, `P1Y2M3DT4H5M6.78S`. | +| BYTEA | STRING | A hex encoded string. | +| JSON, JSONB | STRING | `io.debezium.data.Json`
Contains the string representation of a JSON document, array, or scalar. | +| UUID | STRING | `io.debezium.data.Uuid`
Contains the string representation of a YugabyteDB UUID value. | +| DATE | INT32 | Number of days since the UNIX epoch (January 1, 1970). | +| TIME | INT32 | Milliseconds since midnight. | +| TIMESTAMP | INT64 | Milliseconds since the UNIX epoch (1970-01-01 00:00:00). | +| INT4RANGE | STRING | Range of integer. | +| INT8RANGE | STRING | Range of `bigint`. | +| NUMRANGE | STRING | Range of `numeric`. | +| TSRANGE | STRING | The string representation of a timestamp range without a time zone. | +| TSTZRANGE | STRING | The string representation of a timestamp range with the local system time zone. | +| DATERANGE | STRING | The string representation of a date range. Always has an _exclusive_ upper bound. | +| ARRAY | ARRAY | N/A | +| ENUM | STRING | The string representation of the enum label. | + +### Temporal types + +Other than the YugabyteDB TIMESTAMPTZ and TIMETZ data types, which contain time zone information, how temporal types are mapped depends on the value of the `time.precision.mode` connector configuration property. The following sections describe these mappings: + +* [`time.precision.mode=adaptive`](#adaptive-mode) +* [`time.precision.mode=adaptive_time_microseconds`](#adaptive-microseconds-mode) +* [`time.precision.mode=connect`](#connect-mode) + +#### Adaptive mode + +When the `time.precision.mode` property is set to adaptive (the default), the connector determines the literal type and semantic type based on the column's data type definition. This ensures that events exactly represent the values in the database. + +The following table describes mappings when `time.precision.mode` is `adaptive`. + +| YugabyteDB data type| Literal type (schema type) | Semantic type (schema name) | +| :------------------ | :------------------------- | :-------------------------- | +| DATE | INT32 | `io.debezium.time.Date`
The number of days since the epoch. | +| TIME([P]) | INT32 | `io.debezium.time.Time`
The number of milliseconds past midnight, and does not include timezone information. | +| TIMESTAMP([P]) | INT64 | `io.debezium.time.Timestamp`
The number of milliseconds since the epoch, and does not include timezone information. | + +#### Adaptive (microseconds) mode + +When the `time.precision.mode` configuration property is set to `adaptive_time_microseconds`, the connector determines the literal type and semantic type for temporal types based on the column's data type definition. This ensures that events exactly represent the values in the database, except all `TIME` fields are captured as microseconds. + +The following table describes mappings when `time.precision.mode` is `adaptive_time_microseconds`. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) | +| :------------------- | :------------------------- | :-------------------------- | +| DATE | INT32 | `io.debezium.time.Date`
The number of days since the epoch. | +| TIME([P]) | INT64 | `io.debezium.time.MicroTime`
The time value in microseconds and doesn't include timezone information. YugabyteDB allows precision P to be in the range 0-6 to store up to microsecond precision. | +| TIMESTAMP([P]) | INT64 | `io.debezium.time.Timestamp`
The number of milliseconds since the UNIX epoch, and doesn't include timezone information. | + +#### Connect mode + +When the `time.precision.mode` configuration property is set to `connect`, the connector uses Kafka Connect logical types. This may be beneficial when consumers can handle only the built-in Kafka Connect logical types and are unable to handle variable-precision time values. However, because YugabyteDB supports microsecond precision, the events generated by a connector with the `connect` time precision mode **results in a loss of precision** when the database column has a fractional second precision value that is greater than 3. + +The following table describes mappings when `time.precision.mode` is `connect`. + +| YugabyteDB data type| Literal type (schema type) | Semantic type (schema name) | +| :------------------ | :------------------------- | :-------------------------- | +| DATE| INT32 | `org.apache.kafka.connect.data.Date`
The number of days since the UNIX epoch. | +| TIME([P]) | INT64 | `org.apache.kafka.connect.data.Time`
The number of milliseconds since midnight, and doesn't include timezone information. YugabyteDB allows P to be in the range 0-6 to store up to microsecond precision, though this mode results in a loss of precision when P is greater than 3. | +| TIMESTAMP([P]) | INT64 | `org.apache.kafka.connect.data.Timestamp`
The number of milliseconds since the UNIX epoch, and doesn't include timezone information. YugabyteDB allows P to be in the range 0-6 to store up to microsecond precision, though this mode results in a loss of precision when P is greater than 3. | + +### TIMESTAMP type + +The TIMESTAMP type represents a timestamp without time zone information. Such columns are converted into an equivalent Kafka Connect value based on UTC. For example, the TIMESTAMP value `2022-03-03 16:51:30` is represented by an `io.debezium.time.Timestamp` with the value `1646326290000` when `time.precision.mode` is set to any value other than `connect`. + +The timezone of the JVM running Kafka Connect and Debezium does not affect this conversion. + +YugabyteDB supports using `+/-infinity` values in TIMESTAMP columns. These special values are converted to timestamps with value `9223372036825200000` in case of positive infinity or `-9223372036832400000` in case of negative infinity. + +### Decimal types + +The setting of the connector configuration property `decimal.handling.mode` determines how the connector maps decimal types. + +{{< note title="Note" >}} + +YugabyteDB doesn't currently support the `decimal.handling.mode` property value `precise`. If that is set, YugabyteDB automatically defaults to `double`. + +{{< /note >}} + +When the `decimal.handling.mode` property is set to `double`, the connector represents all DECIMAL, NUMERIC, and MONEY values as Java double values, and encodes them as shown in the following table. + +The following table describes mappings when `decimal.handling.mode` is `double`. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) | +| :------------------- | :------------------------- | :-------------------------- | +| NUMERIC [(M[,D])] | FLOAT64 | | +| DECIMAL [(M[,D])] | FLOAT64 | | +| MONEY [(M[,D])] | FLOAT64 | | + +The other possible value for `decimal.handling.mode` is `string`. In this case, the connector represents DECIMAL, NUMERIC, and MONEY values as their formatted string representation, and encodes them as shown in the following table. + +The following table describes mappings when `decimal.handling.mode` is `string`. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) | +| :------------------- | :------------------------- | :-------------------------- | +| NUMERIC [(M[,D])] | STRING | | +| DECIMAL [(M[,D])] | STRING | | +| MONEY [(M[,D])] | STRING | | + +### Network address types + +YugabyteDB has data types that can store IPv4, IPv6, and MAC addresses. You should use these types instead of plain text types to store network addresses, as network address types offer input error checking and specialized operators and functions. + +The following table describes mappings for network address types. + +| YugabyteDB data type | Literal type (schema type) | Semantic type (schema name) | +| :------------------- | :------------------------- | :-------------------------- | +| INET | STRING | IPv4 and IPv6 networks. | +| CIDR | STRING | IPv4 and IPv6 hosts and networks. | +| MACADDR | STRING | MAC addresses. | +| MACADDR8 | STRING | MAC addresses in EUI-64 format. | + +### Example data type behaviors + +| Datatype | What you insert in YSQL | What you get in the Kafka topic | Notes | +| :------- | :---------------------- | :------------------------------ | :---- | +| BIGINT | 123456 | 123456 | | +| BIGSERIAL | Cannot insert explicitly | | | +| BIT [ (N) ] | '11011' | "11011" | | +| BIT VARYING [ (n) ] | '11011' | "11011" | | +| BOOLEAN | FALSE | false | | +| BYTEA | E'\\001' | "\x01" | | +| CHARACTER [ (N) ] | 'five5' | "five5" | | +| CHARACTER VARYING [ (n) ] | 'sampletext' | "sampletext" | | +| CIDR | '10.1.0.0/16' | "10.1.0.0/16" | | +| DATE | '2021-11-25' | 18956 | The value in the Kafka topic is the number of days since the Unix epoch (1970-01-01). | +| DOUBLE PRECISION | 567.89 | 567.89 | | +| INET | '192.166.1.1' | "192.166.1.1" | | +| INTEGER | 1 | 1 | | +| INTERVAL [ fields ] [ (p) ] | '2020-03-10 00:00:00'::timestamp - '2020-02-10 00:00:00'::timestamp | 2505600000000 | The output value coming up is the equivalent of the interval value in microseconds. So here 2505600000000 means 29 days. | +| JSON | '{"first_name":"vaibhav"}' | "{\"first_name\":\"vaibhav\"}" | | +| JSONB | '{"first_name":"vaibhav"}' | "{\"first_name\": \"vaibhav\"}" | | +| MACADDR | '2C:54:91:88:C9:E3' | "2c:54:91:88:c9:e3" | | +| MACADDR8 | '22:00:5c:03:55:08:01:02' | "22:00:5c:03:55:08:01:02" | | +| MONEY | '$100.5' | 100.5 | | +| NUMERIC | 34.56 | 34.56 | | +| REAL | 123.4567 | 123.4567 | | +| SMALLINT | 12 | 12 | | +| INT4RANGE | '(4, 14)' | "[5,14)" | | +| INT8RANGE | '(4, 150000)' | "[5,150000)" | | +| NUMRANGE | '(10.45, 21.32)' | "(10.45,21.32)" | | +| TSRANGE | '(1970-01-01 00:00:00, 2000-01-01 12:00:00)' | "(\"1970-01-01 00:00:00\",\"2000-01-01 12:00:00\")" | | +| TSTZRANGE | '(2017-07-04 12:30:30 UTC, 2021-07-04 12:30:30+05:30)' | "(\"2017-07-04 12:30:30+00\",\"2021-07-04 07:00:30+00\")" | | +| DATERANGE | '(2019-10-07, 2021-10-07)' | "[2019-10-08,2021-10-07)" | | +| SMALLSERIAL | Cannot insert explicitly | | | +| SERIAL | Cannot insert explicitly | | | +| TEXT | 'text to verify behaviour' | "text to verify behaviour" | | +| TIME [ (P) ] [ WITHOUT TIME ZONE ] | '12:47:32' | 46052000 | The output value is the number of milliseconds since midnight. | +| TIME [ (p) ] WITH TIME ZONE | '12:00:00+05:30' | "06:30:00Z" | The output value is the equivalent of the inserted time in UTC. The Z stands for Zero Timezone. | +| TIMESTAMP [ (p) ] [ WITHOUT TIME ZONE ] | '2021-11-25 12:00:00' | 1637841600000 | The output value is the number of milliseconds since the UNIX epoch (January 1, 1970, at midnight). | +| TIMESTAMP [ (p) ] WITH TIME ZONE | '2021-11-25 12:00:00+05:30' | "2021-11-25T06:30:00Z" | This output value is the timestamp value in UTC wherein the Z stands for Zero Timezone and T acts as a separator between the date and time. This format is defined by the sensible practical standard ISO 8601. | +| UUID | 'ffffffff-ffff-ffff-ffff-ffffffffffff' | "ffffffff-ffff-ffff-ffff-ffffffffffff" | | + +### Unsupported data types + +Support for the following YugabyteDB data types will be enabled in future releases: + +* BOX +* CIRCLE +* LINE +* LSEG +* PATH +* PG_LSN +* POINT +* POLYGON +* TSQUERY +* TSVECTOR +* TXID_SNAPSHOT + +## Setting up YugabyteDB + +Before using the connector to monitor the changes on a YugabyteDB server, you need to ensure the following: + +* You have a stream ID created on the database you want to monitor the changes for. The stream can be created using the [yb-admin create_change_data_stream](../../../../admin/yb-admin/#create-change-data-stream) command. +* The table to be monitored should have a primary key. Only tables which have a primary key can be streamed. + +### WAL disk space consumption + +In certain cases, it is possible for YugabyteDB disk space consumed by WAL files to spike or increase out of proportion. There are some possible reasons for this. + +For example, the connector is lagging behind in streaming the changes. In this case, the latest checkpoint the connector has received is way behind the last record available in WAL. Because the latest changes are not consumed yet, CDC will not allow the WAL files to be cleaned up, thus causing higher disk consumption. This is the expected behaviour and no action is needed. However, the efficiency of the connector can be increased by increasing the number of tasks so that more processing can happen in parallel. + +## Deployment + +To deploy a Debezium connector, you install the YugabyteDB gRPC Connector archive, configure the connector, and start the connector by adding its configuration to Kafka Connect. For complete steps, follow the guide to [running the Debezium connector in YugabyteDB](/preview/integrations/cdc/debezium/). + +### Connector configuration example + +Following is an example of the configuration for a connector that connects to a YugabyteDB server on port 5433 at 127.0.0.1, whose logical name is `dbserver1`. Typically, you configure the connector in a JSON file by setting the configuration properties available for the connector. + +You can choose to produce events for a subset of the schemas and tables in a database. Optionally, you can ignore, mask, or truncate columns that contain sensitive data, are larger than a specified size, or that you do not need. + +```output.json +{ + "name": "ybconnector", --> 1 + "config": { + "connector.class": "io.debezium.connector.yugabytedb.YugabyteDBgRPCConnector", --> 2 + "database.hostname": "127.0.0.1", --> 3 + "database.port": "5433", --> 4 + "database.master.addresses": "127.0.0.1:7100", --> 5 + "database.streamid": "d540f5e4890c4d3b812933cbfd703ed3", --> 6 + "database.user": "yugabyte", --> 7 + "database.password": "yugabyte", --> 8 + "database.dbname": "yugabyte", --> 9 + "database.server.name": "dbserver1", --> 10 + "table.include.list": "public.test" --> 11 + } +} +``` + +1. The name of the connector when registered with a Kafka Connect service. +1. The name of this YugabyteDB gRPC Connector class. +1. The address of this YugabyteDB server. +1. The port number of the YugabyteDB YSQL process. +1. List of comma separated values of master nodes of the YugabyteDB server. Usually in the form `host`:`port`. +1. The DB stream ID created using [yb-admin](../../../../admin/yb-admin/#change-data-capture-cdc-commands). +1. The name of the YugabyteDB user having the privileges to connect to the database. +1. The password for the above specified YugabyteDB user. +1. The name of the YugabyteDB database to connect to. +1. The logical name of the YugabyteDB server/cluster, which forms a namespace and is used in all the names of the Kafka topics to which the connector writes and the Kafka Connect schema names. +1. A list of all tables hosted by this server that this connector will monitor. This is optional, and there are other properties for listing the schemas and tables to include or exclude from monitoring. + +You can send this configuration with a POST command to a running Kafka Connect service. The service records the configuration and starts one connector task that performs the following actions: + +* Connects to the YugabyteDB database. +* Reads the transaction log. +* Streams change event records to Kafka topics. + +#### Custom record extractor + +YugabyteDB uses a custom record extractor (`YBExtractNewRecordState`) so that the sinks understand the format in which data is sent. For example, if you are using a JDBC sink connector, you need to add two more properties to the sink configuration: + +| Property | Value | +| :------- | :---- | +| `transforms` | `unwrap` | +| `transforms.unwrap.type` | `io.debezium.connector.yugabytedb.transforms.YBExtractNewRecordState` | + +See [Transformers](#transformers). + +### Adding connector configuration + +To run a connector, create a connector configuration and add the configuration to your Kafka Connect cluster. + +#### Prerequisites + +* [YugabyteDB is configured](#setting-up-yugabytedb) for change data capture. +* The YugabyteDB gRPC connector is installed. + +#### Procedure + +1. Create a configuration for the connector. +2. Use the [Kafka Connect REST API](https://kafka.apache.org/documentation/#connect_rest) to add that connector configuration to your Kafka Connect cluster. + +#### Results + +After the connector starts, it will perform a snapshot of the tables depending on the configuration and if the connector is set to take snapshots. The connector then starts generating data change events for row-level operations and streaming change event records to Kafka topics. + +### Connector configuration properties + +The connector has many configuration properties that you can use to achieve the right connector behavior for your application. Many properties have default values. + +The following properties are _required_ unless a default value is available: + +| Property | Default value | Description | +| :------- | :------------ | :---------- | +| connector.class | N/A | Specifies the connector to use to connect Debezium to the database. For YugabyteDB, use `io.debezium.connector.yugabytedb.YugabyteDBgRPCConnector`. | +| database.hostname | N/A | The IP address of the database host machine. For a distributed cluster, use the leader node's IP address. Alternatively, you can specify a comma-separated list of multiple host addresses and corresponding ports (for example,`ip1:port1,ip2:port2,ip3:port3`). This is useful for connection fail-over. | +| database.port | N/A | The port at which the YSQL process is running. | +| database.master.addresses | N/A | Comma-separated list of `host:port` values. | +| database.user | N/A | The user which will be used to connect to the database. | +| database.password | N/A | Password for the given user. | +| database.dbname | N/A | The database from which to stream. | +| database.server.name | N/A | Logical name that identifies and provides a namespace for the particular YugabyteDB database server or cluster for which Debezium is capturing changes. This name must be unique, as it's also used to form the Kafka topic. | +| database.streamid | N/A | Stream ID created using [yb-admin](../../../../admin/yb-admin/#change-data-capture-cdc-commands) for Change data capture. | +| table.include.list | N/A | Comma-separated list of table names and schema names, such as `public.test` or `test_schema.test_table_name`. | +| table.max.num.tablets | 300 | Maximum number of tablets the connector can poll for. This should be greater than or equal to the number of tablets the table is split into. | +| database.sslmode | disable | Whether to use an encrypted connection to the YugabyteDB cluster. Supported options are:
  • `disable` uses an unencrypted connection
  • `require` uses an encrypted connection and fails if it can't be established
  • `verify-ca` uses an encrypted connection, verifies the server TLS certificate against the configured Certificate Authority (CA) certificates, and fails if no valid matching CA certificates are found.
| +| database.sslrootcert | N/A | The path to the file which contains the root certificate against which the server is to be validated. | +| database.sslcert | N/A | Path to the file containing the client's SSL certificate. | +| database.sslkey | N/A | Path to the file containing the client's private key. | +| schema.include.list | N/A | An optional, comma-separated list of regular expressions that match names of schemas for which you **want** to capture changes. Any schema name not included in `schema.include.list` is excluded from having its changes captured. By default, all non-system schemas have their changes captured. Do not also set the `schema.exclude.list` property. | +| schema.exclude.list | N/A | An optional, comma-separated list of regular expressions that match names of schemas for which you **do not** want to capture changes. Any schema whose name is not included in `schema.exclude.list` has its changes captured, with the exception of system schemas. Do not also set the `schema.include.list` property. | +| table.include.list | N/A | An optional, comma-separated list of regular expressions that match fully-qualified table identifiers for tables whose changes you want to capture. Any table not included in `table.include.list` does not have its changes captured. Each identifier is of the form _schemaName.tableName_. By default, the connector captures changes in every non-system table in each schema whose changes are being captured. Do not also set the `table.exclude.list` property. | +| table.exclude.list | N/A | An optional, comma-separated list of regular expressions that match fully-qualified table identifiers for tables whose changes you **do not** want to capture. Any table not included in `table.exclude.list` has its changes captured. Each identifier is of the form _schemaName.tableName_. Do not also set the `table.include.list` property. | +| column.include.list | N/A | An optional, comma-separated list of regular expressions that match the fully-qualified names of columns that should be included in change event record values. Fully-qualified names for columns are of the form _schemaName.tableName.columnName_. Do not also set the `column.exclude.list` property. | +| column.exclude.list | N/A | An optional, comma-separated list of regular expressions that match the fully-qualified names of columns that should be excluded from change event record values. Fully-qualified names for columns are of the form _schemaName.tableName.columnName_. Do not also set the `column.include.list` property. | +| column.truncate.to._length_.chars | N/A | An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns. Fully-qualified names for columns are of the form _schemaName.tableName.columnName_. In change event records, values in these columns are truncated if they are longer than the number of characters specified by _length_ in the property name. You can specify multiple properties with different lengths in a single configuration. Length must be a positive integer, for example, `column.truncate.to.20.chars`. | +| column.mask.with._length_.chars | N/A | An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns. Fully-qualified names for columns are of the form _schemaName.tableName.columnName_. In change event records, the values in the specified table columns are replaced with _length_ number of asterisk (`*`) characters. You can specify multiple properties with different lengths in a single configuration. Length must be a positive integer or zero. When you specify zero, the connector replaces a value with an empty string. | +| message.key.columns | _empty string_ | A list of expressions that specify the columns that the connector uses to form custom message keys for change event records that it publishes to the Kafka topics for specified tables.
By default, Debezium uses the primary key column of a table as the message key for records that it emits. In place of the default or to specify a key for tables that lack a primary key, you can configure custom message keys based on one or more columns.

To establish a custom message key for a table, list the table, followed by the columns to use as the message key. Each list entry takes the following format:

`:,`

To base a table key on multiple column names, insert commas between the column names. Each fully-qualified table name is a regular expression in the following format:

`.`

The property can include entries for multiple tables. Use a semicolon to separate table entries in the list. The following example sets the message key for the tables `inventory.customers` and `purchase.orders`:

`inventory.customers:pk1,pk2;purchase.orders:pk3,pk4`

For the table `inventory.customers`, the columns `pk1` and `pk2` are specified as the message key. For the `purchase.orders` tables in any schema, the columns `pk3` and `pk4` server as the message key.

There is no limit to the number of columns that you use to create custom message keys. However, it's best to use the minimum number that are required to specify a unique key. | + +{{< note title="TLS v1.2 only" >}} + +The APIs used to fetch the changes are set up to work only with TLS v1.2. Make sure you're using the proper environment properties for Kafka Connect. + +{{< /note >}} + +{{< note title="Obtaining universe certificates" >}} + +If you have a YugabyteDB cluster with SSL enabled, you need to obtain the root certificate and provide the path of the file in the `database.sslrootcert` configuration property. You can follow these links to get the certificates for your universe: + +* [Local deployments](../../../../secure/tls-encryption/) +* [YugabyteDB Anywhere](../../../../yugabyte-platform/security/enable-encryption-in-transit/#enable-encryption-in-transit) +* [YugabyteDB Aeon](/preview/yugabyte-cloud/cloud-secure-clusters/cloud-authentication/#download-your-cluster-certificate) + +{{< /note >}} + +Advanced connector configuration properties: + +| Property | Default | Description | +| :------- | :------ | :---------- | +| snapshot.mode | N/A | `never` - Don't take a snapshot
`initial` - Take a snapshot when the connector is first started
`initial_only` - Only take a snapshot of the table, do not stream further changes | +| snapshot.include.collection.list | All tables specified in `table.include.list` | An optional, comma-separated list of regular expressions that match the fully-qualified names (`.`) of the tables to include in a snapshot. The specified items must also be named in the connector's `table.include.list` property. This property takes effect only if the connector's `snapshot.mode` property is set to a value other than `never`. | +| cdc.poll.interval.ms | 500 | The interval at which the connector will poll the database for the changes. | +| admin.operation.timeout.ms | 60000 | The default timeout used for administrative operations (such as createTable, deleteTable, getTables, etc). | +| operation.timeout.ms | 60000 | The default timeout used for user operations (using sessions and scanners). | +| socket.read.timeout.ms | 60000 | The default timeout to use when waiting on data from a socket. | +| time.precision.mode | adaptive | Time, date, and timestamps can be represented with different kinds of precision:

`adaptive` captures the time and timestamp values exactly as in the database using millisecond precision values based on the database column's type.

`adaptive_time_microseconds` captures the date, datetime and timestamp values exactly as in the database using millisecond precision values based on the database column's type. An exception is `TIME` type fields, which are always captured as microseconds.

`connect` always represents time and timestamp values by using Kafka Connect's built-in representations for Time, Date, and Timestamp, which use millisecond precision regardless of the database columns' precision. See temporal values. | +| decimal.handling.mode | double | The `precise` mode is not currently supported.

`double` maps all the numeric, double, and money types as Java double values (FLOAT64).

`string` represents the numeric, double, and money types as their string-formatted form.

| +| binary.handling.mode | hex | `hex` is the only supported mode. All binary strings are converted to their respective hex format and emitted as their string representation . | +| interval.handling.mode | numeric | Specifies how the connector should handle values for interval columns:

`numeric` represents intervals using approximate number of microseconds.

`string` represents intervals exactly by using the string pattern representation
`PYMDTHMS`.
For example: P1Y2M3DT4H5M6.78S. See [YugabyteDB data types](../../../../api/ysql/datatypes/). | +| transaction.topic | `${database.server.name}`
`.transaction` | Controls the name of the topic to which the connector sends transaction metadata messages. The placeholder `${database.server.name}` can be used for referring to the connector's logical name; defaults to `${database.server.name}.transaction`, for example `dbserver1.transaction`. | +| provide.transaction.metadata | `false` | Determines whether the connector generates events with transaction boundaries and enriches change event envelopes with transaction metadata. Specify `true` if you want the connector to do this. See [Transaction metadata](#transaction-metadata) for details. | +| skipped.operations | N/A | A comma-separated list of operation types to be skipped during streaming. The types are `c` for insert/create operations, `u` for update operations, and `d` for delete operations. By default, no operations are skipped. | +| max.queue.size | 20240 | Positive integer value for the maximum size of the blocking queue. The connector places change events received from streaming replication in the blocking queue before writing them to Kafka. This queue can provide back pressure when, for example, writing records to Kafka is slower that it should be, or when Kafka is not available. | +| max.batch.size | 10240 | Positive integer value that specifies the maximum size of each batch of events that the connector processes. | +| max.queue.size.in.bytes | 0 | Long value for the maximum size in bytes of the blocking queue. The feature is disabled by default, it will be active if it's set with a positive long value. | +| max.connector.retries | 5 | Positive integer value for the maximum number of times a retry can happen at the connector level itself. | +| connector.retry.delay.ms | 60000 | Delay between subsequent retries at the connector level. | +| ignore.exceptions | `false` | Determines whether the connector ignores exceptions, which should not cause any critical runtime issues. By default, if there is an exception the connector throws the exception and stops further execution. Specify `true` to have the connector log a warning for any exception and proceed. | +| tombstones.on.delete | `true` | Controls whether a delete event is followed by a tombstone event.

`true` - a delete operation is represented by a delete event and a subsequent tombstone event.

`false` - only a delete event is emitted.

After a source record is deleted, emitting a tombstone event (the default behavior) allows Kafka to completely delete all events that pertain to the key of the deleted row in case log compaction is enabled for the topic. | +| auto.add.new.tables | `true` | Controls whether the connector should keep polling the server to check if any new table has been added to the configured change data stream ID. If a new table has been found in the stream ID and if it has been included in the `table.include.list`, the connector will be restarted automatically. | +| new.table.poll.interval.ms | 300000 | The interval at which the poller thread will poll the server to check if there are any new tables in the configured change data stream ID. | +| transaction.ordering | `false` | Whether to order transactions by their commit time.
{{< warning title="Deprecation Notice" >}} This configuration property has been deprecated. For more details, see [transaction ordering](#transaction-ordering). {{< /warning >}} | + +### Transformers + +The following three transformers are available: YBExtractNewRecordState, ExtractTopic, and PGCompatible. + +#### YBExtractNewRecordState SMT + +Transformer type: `io.debezium.connector.yugabytedb.transforms.YBExtractNewRecordState` + +Unlike the Debezium connector for PostgreSQL, the YugabyteDB gRPC Connector only sends the `after` image of the "set of columns" that are modified. PostgreSQL sends the complete `after` image of the row which has changed. So by default if the column was not changed, it is not a part of the payload that is sent and the default value is set to `null`. + +To differentiate between the case where a column is set to `null` and the case in which it's not modified, the YugabyteDB gRPC Connector changes the value type to a struct. In this structure, an unchanged column is `{'value': null}`, whereas the column changed to a null value is `{'value': null, 'set': true}`. + +A schema registry requires that, once a schema is registered, records must contain only payloads with that schema version. If you're using a schema registry, the YugabyteDB gRPC Connector's approach can be problematic, as the schema may change with every message. For example, if we keep changing the record to only include the value of modified columns, the schema of each record will be different (the total number unique schemas will be a result of making all possible combinations of columns) and thus would require sending a schema with every record. + +To avoid this problem when you're using a schema registry, use the `YBExtractNewRecordState` SMT (Single Message Transformer for Kafka), which interprets these values and sends the record in the correct format (by removing the unmodified columns from the JSON message). Records transformed by `YBExtractNewRecordState` are compatible with all sink implementations. This approach ensures that the schema doesn't change with each new record and it can work with a schema registry. + +#### ExtractTopic + +Transformer type: `io.aiven.kafka.connect.transforms.ExtractTopic` + +This transformer extracts a string value from the record and uses it as the topic name. + +The transformation can use either the whole key or value (in this case, it must have `INT8`, `INT16`, `INT32`, `INT64`, `FLOAT32`, `FLOAT32`, `BOOLEAN`, or `STRING` type) or a field in them (in this case, it must have `STRUCT` type and the field's value must be `INT8`, `INT16`, `INT32`, `INT64`, `FLOAT32`, `FLOAT32`, `BOOLEAN`, or `STRING`). + +ExtractTopic exists in two variants: + +* `io.aiven.kafka.connect.transforms.ExtractTopic$Key` - works on keys +* `io.aiven.kafka.connect.transforms.ExtractTopic$Value` - works on values + +The transformation defines the following configurations: + +* `field.name` - The name of the field which should be used as the topic name. If `null` or empty, the entire key or value is used (and assumed to be a string). By default is `null`. +* `skip.missing.or.null` - In case the source of the new topic name is `null` or missing, should a record be silently passed without transformation. By default, is `false`. + +The following is an example of this transformation configuration: + +```properties +... +"transforms":"ExtractTopicFromValueField", +"transforms.ExtractTopicFromValueField.type":"io.aiven.kafka.connect.transforms.ExtractTopic$Value", +"transforms.ExtractTopicFromValueField.field.name":"inner_field_name", +... +``` + +#### PGCompatible SMT + +Transformer type: `io.debezium.connector.yugabytedb.transforms.PGCompatible` + +By default, the YugabyteDB CDC service publishes events with a schema that only includes columns that have been modified. The source connector then sends the value as `null` for columns that are missing in the payload. Each column payload includes a `set` field that is used to signal if a column has been set to `null` because it wasn't present in the payload from YugabyteDB. + +However, some sink connectors may not understand the preceding format. `PGCompatible` transforms the payload to a format that is compatible with the format of the standard change data events. Specifically, it transforms column schema and value to remove the set field and collapse the payload such that it only contains the data type schema and value. + +PGCompatible differs from `YBExtractNewRecordState` by recursively modifying all the fields in a payload. + +## Transaction ordering + +{{< warning title="Deprecation Notice" >}} + +Starting with YugabyteDB v2024.2, and YugabyteDB gRPC Connector `dz.1.9.5.yb.grpc.2024.2`, the configuration `transaction.ordering` is deprecated. This configuration will be removed in future releases. As the PostgreSQL Logical Replication-based [YugabyteDB connector](../../using-logical-replication/yugabytedb-connector) offers the same transactional ordering properties by default, you are advised to use the same for your use cases. + +Currently, for cases where you absolutely need to use transactional ordering with the YugabyteDB gRPC Connector, add the following configurations to use the deprecated configuration: + +```output.json +{ + ... + "transaction.ordering":"true", + "TEST.override.transaction.ordering.deprecation":"true" + ... +} +``` + +{{< /warning >}} + +In a CDC Stream, events from different transactions in different tablets across tables may appear at different times. This works well in use cases such as archiving, or with applications where only eventual consistency is required. There is another class of applications, where the end destination is another OLTP / operational database. These databases can have constraints (such as foreign keys) and strict transactional consistency requirements. In these cases, the stream of events cannot be applied as is, as events of the same transaction may appear out of order because transactions in YugabyteDB can span two tablets. + +The YugabyteDB source connector supports transaction ordering, which guarantees consistent streaming of records in the sorted order based on time. With transaction ordering enabled, the connector can be used to stream change events while honoring the constraints. + +To use transaction ordering, you need to set the configuration property `transaction.ordering` to `true`. Additionally, a transformer [ByLogicalTableRouter](https://debezium.io/documentation/reference/stable/transformations/topic-routing.html) is required to send all the events to a common topic to ensure that the published change events are published in the same sorted order as they are meant to be. + +The following table describes properties for configuring transaction ordering. + +| Property | Definition | +| :--- | :--- | +| transaction.ordering | Whether to enable ordering of transactions by their commit time. | +| transforms | Logical name for the transformer to use. For example, the following property definitions use `Reroute`. | +| transforms.Reroute.topic.regex | Specifies a regular expression that the transformation applies to each change event record to determine if it should be routed to a particular topic. | +| transforms.Reroute.topic.replacement | A regular expression that represents the destination topic name. | +| transforms.Reroute.type | Transformer class to be used. | +| transforms.Reroute.key.field.regex | Specifies a regular expression that the transformation applies to the default destination topic name to capture one or more groups of characters. | +| transforms.Reroute.key.field.replacement | Specifies a regular expression for determining the value of the inserted key field in terms of those captured groups. | +| provide.transaction.metadata | Whether to generate events with transaction boundaries. | + +For usage example, refer to YugabyteDB CDC Consistent Streaming Pipeline in the [example repository](https://github.com/yugabyte/cdc-examples/tree/main/grpc_connector/consistent-streaming). + +### Transaction boundaries + +The connector publishes metadata that can be used to distinguish transaction boundaries for a downstream application to implement atomicity. After the configuration property `provide.transaction.metadata` is enabled, the connector will also publish events indicating the beginning and end of the transaction. For more information, see [Transaction metadata](#transaction-metadata). + +### Prerequisites + +* Create the Stream ID should in the EXPLICIT checkpointing mode. For more information, see [yb-admin create\_change\_data_stream](../../../../admin/yb-admin#create-change-data-stream). +* You should always run the connector with a single task, that is, `tasks.max` should always be set to 1. + +### Known limitations + +* Transactional ordering is currently not supported with schema evolution. See issue [18476](https://github.com/yugabyte/yugabyte-db/issues/18476). + +## Monitoring + +For information on monitoring CDC, refer to [Monitor](../cdc-monitor/). + +## Behavior when things go wrong + +Debezium is a distributed system that captures all changes in multiple upstream databases; it never misses or loses an event. When the system is operating normally or being managed carefully then Debezium provides exactly once delivery of every change event record. + +If a fault does happen then the system does not lose any events. However, while it is recovering from the fault, it might repeat some change events. In these abnormal situations, Debezium, like Kafka, provides at least once delivery of change events. + +The rest of this section describes how Debezium handles various kinds of faults and problems. + +### Configuration and startup errors + +In the following situations, the connector fails when trying to start, reports an error/exception in the log, and stops running: + +* The connector's configuration is invalid. +* The connector cannot successfully connect to YugabyteDB by using the specified connection parameters. +* The connector is restarting from a previously-recorded checkpoint and YugabyteDB no longer has that history available. + +In these cases, the error message has details about the problem and possibly a suggested workaround. After you correct the configuration or address the YugabyteDB problem, restart the connector. + +### YB-TServer becomes unavailable + +In case one of the tablet servers crashes, the replicas on other YB-TServer nodes will become the leader for the tablets that were hosted on the crashed server. The YugabyteDB gRPC Connector will figure out the new tablet leaders and start streaming from the checkpoint the Debezium maintains. + +### YugabyteDB server failures + +In case of YugabyteDB server failures, the YugabyteDB gRPC Connector will try for a configurable amount of time for the availability of the YB-TServer and will stop if the cluster cannot start. When the cluster is restarted, the connector can be run again and it will start processing the changes with the committed checkpoint. + +### Connector unable to find table association with stream ID + +In this case, the connector throws an exception with an error message that the table is not a part of the stream ID. + +This can happen in the following 2 scenarios: + +* The stream ID you have created might belong to any other database than the one being polled. +* The table you are asking to poll for has no primary keys on it. In this case, the table will not be a part of the stream ID. To continue, add a primary key on the table and create a new stream ID on the database. + +### YugabyteDB server becomes unavailable + +When the connector is running, the YugabyteDB server that it is connected to could become unavailable for any number of reasons. If this happens, the connector fails with an error and stops. When the server is available again, restart the connector. + +The connector externally stores the last processed offset in the form of a checkpoint. After a connector restarts and connects to a server instance, the connector communicates with the server to continue streaming from that particular offset. This offset is available as long as the stream ID remains intact. Never delete a stream ID without deleting all the associated connectors with it, otherwise you will lose data. + +## Dropping a table part of the replication + +While the connector is running with a set of tables configured to capture the changes, if one of the tables in the set is dropped, the connector will fail with an error message indicating that the object is not found. + +To avoid or resolve a failure due to a dropped table, follow these steps: + +1. Delete the connector that contains the table that was dropped, or that you want to drop. +2. Edit the configuration and remove the given table from `table.include.list`. +3. Deploy a new connector with the updated configuration. + +### Kafka Connect process stops gracefully + +Suppose that Kafka Connect is being run in distributed mode and a Kafka Connect process is stopped gracefully. Prior to shutting down that process, Kafka Connect migrates the process's connector tasks to another Kafka Connect process in that group. The new connector tasks start processing exactly where the prior tasks stopped. There is a short delay in processing while the connector tasks are stopped gracefully and restarted on the new processes. + +### Kafka Connect process crashes + +If the Kafka Connector process stops unexpectedly, any connector tasks it was running terminate without recording their most recently processed offsets. When Kafka Connect is being run in distributed mode, Kafka Connect restarts those connector tasks on other processes. However, YugabyteDB connectors resume from the last offset that was recorded by the earlier processes. This means that the new replacement tasks might generate some of the same change events that were processed just prior to the crash. The number of duplicate events depends on the offset flush period and the volume of data changes just before the crash. + +Because there is a chance that some events might be duplicated during a recovery from failure, consumers should always anticipate some duplicate events. Debezium changes are idempotent, so a sequence of events always results in the same state. + +In each change event record, Debezium connectors insert source-specific information about the origin of the event, including the YugabyteDB server's time of the event, the ID of the server transaction, and the position in the write-ahead log where the transaction changes were written. Consumers can keep track of this information, especially the LSN, to determine whether an event is a duplicate. + +### Kafka becomes unavailable + +As the connector generates change events, the Kafka Connect framework records those events in Kafka by using the Kafka producer API. Periodically, at a frequency that you specify in the Kafka Connect configuration, Kafka Connect records the latest offset that appears in those change events. If the Kafka brokers become unavailable, the Kafka Connect process that is running the connectors repeatedly tries to reconnect to the Kafka brokers. In other words, the connector tasks pause until a connection can be re-established, at which point the connectors resume exactly where they left off. + +### Connector is stopped for a duration + +If the connector is gracefully stopped, the database can continue to be used. Any changes are recorded in the YugabyteDB WAL. When the connector restarts, it resumes streaming changes where it left off. That is, it generates change event records for all database changes that were made while the connector was stopped. + +A properly configured Kafka cluster is able to handle massive throughput. Kafka Connect is written according to Kafka best practices, and given enough resources a Kafka Connect connector can also handle very large numbers of database change events. Because of this, after being stopped for a while, when a Debezium connector restarts, it is very likely to catch up with the database changes that were made while it was stopped. How quickly this happens depends on the capabilities and performance of Kafka and the volume of changes being made to the data in YugabyteDB. diff --git a/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/yugabytedb-grpc-transformers.md b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/yugabytedb-grpc-transformers.md new file mode 100644 index 000000000000..8b322e87ff16 --- /dev/null +++ b/docs/content/v2.25/additional-features/change-data-capture/using-yugabytedb-grpc-replication/yugabytedb-grpc-transformers.md @@ -0,0 +1,220 @@ +--- +title: YugabyteDB gRPC connector transformers +headerTitle: YugabyteDB gRPC connector transformers +linkTitle: Connector transformers +description: YugabyteDB gRPC connector transformers for Change Data Capture. +menu: + preview: + parent: debezium-connector-yugabytedb + identifier: yugabytedb-grpc-connector-transformers + weight: 50 +type: docs +--- + +The YugabyteDB gRPC Connector comes bundled with Single Message Transformers (SMTs). SMTs are applied to messages as they flow through Kafka Connect so that sinks understand the format in which data is sent. SMTs transform inbound messages after a source connector has produced them, but before they are written to Kafka. SMTs transform outbound messages before they are sent to a sink connector. + +The following SMTs are bundled with the connector jar file available on [GitHub releases](https://github.com/yugabyte/debezium-connector-yugabytedb/releases): + +* YBExtractNewRecordState +* PGCompatible + +To provide examples of output from these transformers, consider a table created using the following statement: + +```sql +CREATE TABLE test (id INT PRIMARY KEY, name TEXT, aura INT); +``` + +The following DML statements are used to demonstrate the payload for each transformer in case of individual replica identities: + +```sql +-- statement 1 +INSERT INTO test VALUES (1, 'Vaibhav', 9876); + +-- statement 2 +UPDATE test SET aura = 9999 WHERE id = 1; + +-- statement 3 +UPDATE test SET name = 'Vaibhav Kushwaha', aura = 10 WHERE id = 1; + +-- statement 4 +UPDATE test SET aura = NULL WHERE id = 1; + +-- statement 5 +DELETE FROM test WHERE id = 1; +``` + +For simplicity, only `before` and `after` fields of the `payload` of the message published by the connector are mentioned in the following example output. Any information pertaining to the record schema, if it is the same as the standard Debezium connector for PostgreSQL, is skipped. + +By default, the YugabyteDB CDC service publishes events with a schema that only includes columns that have been modified. The source connector then sends the value as `null` for columns that are missing in the payload. Each column payload includes a `set` field that is used to signal if a column has been set to `null` because it wasn't present in the payload from YugabyteDB. + +## YBExtractNewRecordState + +**Transformer class:** `io.debezium.connector.yugabytedb.transforms.YBExtractNewRecordState` + +The SMT `YBExtractNewRecordState` is used to flatten the records published by the connector and just keep the payload field in a flattened format. + +The following examples show what the payload would look like for each [before image mode](../cdc-get-started/#before-image-modes). Note that in this example, as you have set the property `delete.handling.mode` to `none` for the transformer, it will not drop the delete records from the stream. Additionally, this SMT works on the `after` field of the payload and since the `after` field for the DELETE record is `null`, the output after applying this transformer on a DELETE record is also `null`. + +### CHANGE + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"aura":null} + +-- statement 5 +null +``` + +### FULL_ROW_NEW_IMAGE + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +null +``` + +### MODIFIED_COLUMNS_OLD_AND_NEW_IMAGES + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"name":null,"aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"name":null,"aura":null} + +-- statement 5 +null +``` + +### ALL + +```json{.nocopy} +-- statement 1 +{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +null +``` + +## PGCompatible + +**Transformer class:** `io.debezium.connector.yugabytedb.transforms.PGCompatible` + +Some sink connectors may not understand the payload format published by the connector. `PGCompatible` transforms the payload to a format that is compatible with the format of standard change data events. Specifically, it transforms column schema and value to remove the `set` field and collapse the payload such that it only contains the data type schema and value. + +`PGCompatible` differs from `YBExtractNewRecordState` by recursively modifying all the fields in a payload. + +The following examples show what the payload would look like for each [before image mode](../cdc-get-started/#before-image-modes). + +### CHANGE + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":null,"after":{"id":1,"name":null,"aura":9999} + +-- statement 3 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":null,"after":{"id":1,"name":null,"aura":null} + +-- statement 5 +"before":{"id":1,"name":null,"aura":null},"after":null +``` + +Note that for statements 2 and 4, the columns that were not updated as a part of the UPDATE statement are `null` in the output field. + +### FULL_ROW_NEW_IMAGE + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":null,"after":{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +"before":{"id":1,"name":"Vaibhav Kushwaha","aura":null},"after":null +``` + +### MODIFIED_COLUMNS_OLD_AND_NEW_IMAGES + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":{"id":1,"name":null,"aura":9876},"after":{"id":1,"name":null,"aura":9999} + +-- statement 3 +"before":{"id":1,"name":"Vaibhav","aura":9999},"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":{"id":1,"name":null,"aura":10},"after":{"id":1,"name":null,"aura":null} + +-- statement 5 +"before":{"id":1,"name":null,"aura":null},"after":null +``` + +### ALL + +```json{.nocopy} +-- statement 1 +"before":null,"after":{"id":1,"name":"Vaibhav","aura":9876} + +-- statement 2 +"before":{"id":1,"name":"Vaibhav","aura":9876},"after":{"id":1,"name":"Vaibhav","aura":9999} + +-- statement 3 +"before":{"id":1,"name":"Vaibhav","aura":9999},"after":{"id":1,"name":"Vaibhav Kushwaha","aura":10} + +-- statement 4 +"before":{"id":1,"name":"Vaibhav Kushwaha","aura":10},"after":{"id":1,"name":"Vaibhav Kushwaha","aura":null} + +-- statement 5 +"before":{"id":1,"name":"Vaibhav Kushwaha","aura":null},"after":null +``` diff --git a/docs/content/v2.25/additional-features/colocation.md b/docs/content/v2.25/additional-features/colocation.md new file mode 100644 index 000000000000..46309f50954c --- /dev/null +++ b/docs/content/v2.25/additional-features/colocation.md @@ -0,0 +1,537 @@ +--- +title: Colocating tables and databases +headerTitle: Colocating tables +linkTitle: Colocation +description: Learn how colocated tables aggregate data into a single tablet. +aliases: + - /preview/architecture/docdb/colocated_tables/ + - /preview/architecture/docdb-sharding/colocated-tables/ + - /preview/explore/colocation/ +menu: + preview: + identifier: colocation + parent: additional-features + weight: 50 +rightNav: + hideH4: true +type: docs +--- + +YugabyteDB supports colocating SQL tables. This allows for closely related data in colocated tables to reside together in a single parent tablet called the "colocation tablet." Colocation helps to optimize for low-latency, high-performance data access by reducing the need for additional trips across the network. It also reduces the overhead of creating a tablet for every relation (tables, indexes, and so on) and the storage for these per node. + +Note that all the data in the colocation tablet is still replicated across nodes in accordance with the replication factor of the cluster. + +## Benefits of colocation and considerations by use case + +Colocation can make sense for high-performance, real-time data processing, where low-latency and fast access to data are critical. Colocation has the following benefits: + +- Improved performance and scalability: Using a single tablet instead of creating a tablet per relation reduces storage and compute overhead. +- Faster access to data: By having all the data for a single database from multiple tables stored in a single tablet, you can avoid the overhead of inter-node communication and data shuffling, which can result in faster access to data. For example, the speed of joins improves when data across the various colocated tables is local and you no longer have to read data over the network. + +### When to use colocation + +The decision to use colocating clusters should be based on the specific requirements of your use case, including the expected performance, data size, availability, and durability requirements. The following scenarios may benefit from colocation: + +#### Small datasets needing HA or geo-distribution + +Applications with smaller sized datasets may have the following pattern and requirements: + +- The size of the entire dataset is small. Typically, this entire database is less than 50 GB in size. +- They require a large number of tables, indexes and other relations created in a single database. +- Need high availability and/or geographic data distribution. +- Scaling the dataset or the number of IOPS is not an immediate concern. + +In this scenario, it is undesirable to have the small dataset spread across multiple nodes as this may affect the performance of certain queries due to more network hops (for example, joins). + +#### Large datasets - a few large tables with many small tables + +Applications that have a large dataset could have the following characteristics: + +- A large number of tables and indexes. +- A handful of tables that are expected to grow large, and thereby need to be scaled out. +- The remaining tables continue to remain small. + +In this case, only the few large tables need to be sharded and scaled out. All other tables benefit from colocation as queries involving these tables do not need network hops. + +#### Scaling the number of databases, each database with a small dataset + +In some scenarios, the number of databases in a cluster grows rapidly, while the size of each database stays small. This is characteristic of a microservices-oriented architecture, where each microservice needs its own database. An example for this would be a multi-tenant SaaS service in which a database is created per customer. The net result is a lot of small databases, with the need to scale the number of databases hosted. Colocated tables allow for the entire dataset in each database to be hosted in one tablet, enabling scalability of the number of databases in a cluster by adding more nodes. + +Colocating all data in a single tablet comes with some trade-offs. It can lead to a potential bottleneck in terms of resource utilization. Ultimately, the size of the dataset is just one factor to consider when determining whether a collocated database is a good fit for your use case. + +## Enable colocation + +Colocation can be enabled at the cluster, database, or table level. For a colocated cluster, all the databases created in the cluster will have colocation enabled by default. You can also choose to configure a single database as colocated to ensure that all the data in the database tables is stored on the single colocation tablet on a node. This can be especially helpful when working with real-time data processing or when querying large amounts of data. + +### Clusters + +To enable colocation for all databases in a cluster, when you create the cluster, set the following [flag](../../reference/configuration/yb-master/#ysql-colocate-database-by-default) to true for [YB-Master](../../architecture/yb-master/) and [YB-TServer](../../architecture/yb-tserver/) services as follows: + +```sql +ysql_colocate_database_by_default = true +``` + +You can also set this flag after creating the cluster, but you will need to restart the YB-Masters and YB-TServers. + +Note: For YugabyteDB Aeon, you currently cannot enable colocation for a cluster. Enable colocation for [individual databases](#databases). + +### Databases + +You can create a colocated database in a non-colocated cluster. Tables created in this database are colocated by default. That is, all the tables in the database share a single tablet. To enable this, at database creation time run the following command: + +```sql +CREATE DATABASE with COLOCATION = true +``` + +For a colocation-enabled cluster, you can choose to opt a specific database out of colocation using the following syntax: + +```sql +CREATE DATABASE with COLOCATION = false +``` + +{{< warning title="Deprecated syntax" >}} + +The following syntax to create colocated databases is deprecated in v2.18 and later: + +```sql +CREATE DATABASE WITH colocated = +``` + +You can create a backup of a database that was colocated with the deprecated syntax and restore it in a colocated cluster to recreate it with the new colocation implementation. You can't upgrade a database created with the deprecated syntax. + +{{< /warning >}} + +To check if a database is colocated or not, you can use the `yb_is_database_colocated` function as follows: + +```sql +select yb_is_database_colocated(); +``` + +You should see an output similar to the following: + +```output + yb_is_database_colocated +-------------------------- + t +``` + +### Tables + +All the tables in a colocated database are colocated by default. There is no need to enable colocation when creating tables. You can choose to opt specific tables out of colocation in a colocated database. To do this, use the following command: + +```sql +CREATE TABLE (columns) WITH (COLOCATION = false); +``` + +Note that you cannot create a colocated table in a non-colocated database. + +{{< warning title="Deprecated syntax" >}} + +The following syntax to create colocated tables is deprecated in v2.18 and later: + +```sql +CREATE TABLE (columns) WITH (colocated = ) +``` + +{{< /warning >}} + +To check if a table is colocated or not, you can use the [\d](../../api/ysqlsh-meta-commands/#d-s-pattern-patterns) meta-command in [ysqlsh](../../api/ysqlsh/). You can also retrieve the same information using the `yb_table_properties()` function as follows: + +```sql +select is_colocated from yb_table_properties('table_name'::regclass); +``` + +You should see an output similar to the following: + +```output + is_colocated +-------------- + f +``` + +#### Change table colocation + +To remove a single table from a colocation (for example, if it increases beyond a certain size), you can create a copy of the table using CREATE TABLE AS SELECT with colocation set to false. Do the following: + +1. Rename your colocated table to ensure no further changes modify the table or its contents. +1. Create a new non-colocated table from the original colocated table using CREATE TABLE AS SELECT. You can choose to use the same name as the original table. +1. Optionally, drop the original colocated table after confirming reads and writes on the new, non-colocated table. + +You can use the same process to add a non-colocated table to colocation in a colocated database. + +{{< note title="Note" >}} + +Changing table colocation requires some downtime during the creation of the new table. The time taken for this process depends on the size of the table whose colocation is changed. + +{{< /note >}} + +## Metrics and views + +To view metrics such as table size, use the name of the parent colocation table. The colocation table name is in the format `.colocation.parent.tablename`. All the tables in a colocation share the same metric values and these show under the colocation table for each metric. Table and tablet metrics are available at the YB-TServer endpoint (`:9000`) as well as in YugabyteDB Anywhere in the Metrics section for each Universe. + +## Limitations and considerations + +- Metrics for table metrics such as table size are available for the colocation tablet, not for individual colocated tables that are part of the colocation. +- Tablet splitting is disabled for colocated tables. +- To avoid hotspots, do not colocate tables that receive disproportionately high loads. +- xCluster replication automatic mode does not yet support colocated tables. + +### Semantic differences between colocated and non-colocated tables + +Concurrent DML and DDL on different tables in the same colocated database will abort the DML. This is not the case for distributed, non-colocated tables. + +For a colocated table, a TRUNCATE / DROP operation may abort due to conflicts if another session is holding row-level locks on the table. + +## xCluster and colocation + +xCluster replication currently only supports colocated tables for [semi-automatic and fully manual](../../deploy/multi-dc/async-replication/async-transactional-setup-semi-automatic/) modes. + +When setting up xCluster for colocated tables when using manual or +semi-automatic mode, the `colocation_id` for a given table or index +needs to match on the source and target universes. + +To set up xCluster for colocated tables, do the following: + +1. Create the table in the colocated database on the source universe with colocation ID explicitly specified. + + ```SQL + CREATE TABLE WITH (COLOCATION = true, COLOCATION_ID = 20000) + ``` + +1. Create the table in the colocated database on the target universe using the same colocation ID. + + ```SQL + CREATE TABLE WITH (COLOCATION = true, COLOCATION_ID = 20000) + ``` + +1. Create the index in the colocated database on the source universe with colocation ID explicitly specified. + + ```SQL + CREATE INDEX ON TABLE WITH (COLOCATION_ID = 20000) + ``` + +1. Create the index in the colocated database on the target universe using the same colocation ID. + + ```SQL + CREATE INDEX ON TABLE WITH (COLOCATION_ID = 20000) + ``` + +1. Get the parent table UUID for the colocated database. + + ```sh + ./yb-admin --master_addresses list_tables include_table_id | grep -i | grep -i "colocation.parent.uuid" + ``` + + ```output + col_db.00004000000030008000000000004004.colocation.parent.tablename 00004000000030008000000000004004.colocation.parent.uuid + ``` + +1. Set up replication for the parent colocation table using yb-admin. + + ```sh + ./yb-admin --master_addresses setup_universe_replication + ``` + + For example: + + ```sh + ./yb-admin --master_addresses 127.0.0.2 setup_universe_replication A1-B2 127.0.0.1 00004000000030008000000000004004.colocation.parent.uuid + ``` + + ```output + Replication setup successfully + ``` + +If more colocated tables are added to the same colocated database on +both source and target universes with matching colocation IDs, then they +are automatically included in replication. There is no need to set up +the parent table for replication again. + +For information on how to set up xCluster for non-colocated tables, refer to [xCluster deployment](../../deploy/multi-dc/async-replication/). + +## Colocated tables with tablespaces + +{{}}Colocated tables can be placed in [tablespaces](../../explore/going-beyond-sql/tablespaces/). When a colocated table is created in a tablespace, the colocation tablet is placed and replicated exclusively in the tablespace. + +During Early Access, by default colocation support for tablespaces is not enabled. To enable the feature, set the flag `ysql_enable_colocated_tables_with_tablespaces=true`. + +### Create a colocated table in a tablespace + +In a [colocated database](#databases), [tables](#tables) are created with colocation by default. To create a colocated table in a tablespace, use the following command: + +```sql +CREATE TABLE TABLESPACE ; +``` + +Use the same syntax to create colocated indexes and materialized views in a tablespace as follows: + +```sql +CREATE INDEX ON () TABLESPACE ; +CREATE MATERIALIZED VIEW TABLESPACE AS ; +``` + +To create a non-colocated table (in a colocated database) in a tablespace, use the following command: + +```sql +CREATE TABLE WITH (COLOCATION=FALSE) TABLESPACE ; +``` + +### View tablespace and colocation properties + +To check the tablespace and colocation properties of a table, use the [\d](../../api/ysqlsh-meta-commands/#d-s-pattern-patterns) meta-command on the table as follows: + +```sql +\d table_name; +``` + +You should see output similar to the following: + +```output + Table "public.t" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + col | integer | | | +Tablespace: "us_east_1a_zone_tablespace" +Colocation: true +``` + +### List all tablegroups and associated tablespaces + +To list all tablegroups and their associated tablespaces, use the following query: + +```sql +SELECT * FROM pg_yb_tablegroup; +``` + +You should see output similar to the following: + +```output + grpname | grpowner | grptablespace | grpacl | grpoptions +-----------------+----------+---------------+--------+------------ +default | 10 | 0 | | +colocation_16384 | 10 | 16384 | | +(2 rows) +``` + +The `grpname` column represent the tablegroup's name and the `grptablespace` column shows the OID of the associated tablespace. + +### Geo-partitioned colocated tables with tablespaces + +YugabyteDB supports [row-level geo-partitioning](../../explore/multi-region-deployments/row-level-geo-partitioning/), which distributes each row across child tables based on the region specified by respective tablespaces. This capability enhances data access speed and helps meet compliance policies that require specific data locality. + +In a colocated database, you can create colocated geo-partitioned tables to benefit from both colocation and geo-partitioning as follows: + +```sql +CREATE TABLE PARTITION BY LIST(region); + +CREATE TABLE PARTITION OF FOR VALUES IN () TABLESPACE ; + +CREATE TABLE PARTITION OF FOR VALUES IN () TABLESPACE ; + +CREATE TABLE PARTITION OF FOR VALUES IN () TABLESPACE ; +``` + +### Alter tablespaces for colocated relations + +Colocated relations (the strategy of storing related data together to optimize performance and query efficiency) cannot be moved independently from one tablespace to another; instead, you can either move all colocated relations or none. The tablespace to which the colocated relations are being moved must not contain any other colocated relations prior to the move. + +To move all relations from one tablespace to another, use the following syntax: + +```sql +ALTER TABLE ALL IN TABLESPACE tablespace_1 SET TABLESPACE tablespace_2 CASCADE; +``` + +This command moves all relations in `tablespace_1` to `tablespace_2`, including any non-colocated relations. Note that keyword CASCADE is required to move the colocated relations. + +To move only colocated relations, you can specify a table to colocate with using the following syntax: + +```sql +ALTER TABLE ALL IN TABLESPACE tablespace_1 COLOCATED WITH table1 SET TABLESPACE tablespace_2 CASCADE; +``` + +This command moves only the relations present in `tablespace_1` that are colocated with `table1`, where `table1` must be a colocated table. + +Move a single non-colocated table using the following syntax: + +```sql +ALTER TABLE SET TABLESPACE ; + +``` + +#### Failure scenarios for altering tablespaces + +The following failure scenarios are applicable to the alter commands from the [Alter tablespaces for colocated relations](#alter-tablespaces-for-colocated-relations) section. + +**Scenario 1**: Moving to a tablespace which already contains colocated relations + +Consider the following example: + +```sql +CREATE TABLE t1 (col INT PRIMARY KEY, col2 INT) TABLESPACE tsp1; +CREATE TABLE t2 (col INT PRIMARY KEY, col2 INT) TABLESPACE tsp1; +CREATE TABLE t3 (col INT PRIMARY KEY, col2 INT) TABLESPACE tsp2; + +ALTER TABLE ALL IN TABLESPACE tsp1 SET TABLESPACE tsp2; +``` + +```output +ERROR: cannot move colocated relations to tablespace tsp2, as it contains existing colocated relation +``` + +```sql +CREATE TABLE t4 (col INT PRIMARY KEY, col2 INT) WITH (COLOCATION = FALSE) TABLESPACE tsp1; + +ALTER TABLE t4 SET TABLESPACE tsp2; +``` + +```output +NOTICE: Data movement for table t4 is successfully initiated. +DETAIL: Data movement is a long running asynchronous process and can be monitored by checking the tablet placement in http://:7000/tables +ALTER TABLE +``` + +Tables `t1` and `t2` are colocated tables belonging to tablespace `tsp1`, while table `t3` is a colocated table in tablespace `tsp2`. Moving colocated tables (for example, `t1` and `t2`) from `tsp1` to `tsp2` is not allowed. However, a non-colocated table (for example, `t4`) from `tsp1` can be moved to `tsp2`. + +**Scenario 2**: Moving colocated relations without the keyword CASCADE + +If you move colocated relations without the CASCADE keyword, it results in the following error: + +```sql +ALTER TABLE ALL IN TABLESPACE tsp1 SET TABLESPACE tsp2; +``` + +```output +ERROR: cannot move colocated relations present in tablespace tsp1 +HINT: Use ALTER ... CASCADE to move colcated relations. +``` + +**Scenario 3**: Using non-colocated tables in COLOCATED WITH syntax + +Consider the following example: + +```sql +CREATE TABLE t1 (col INT PRIMARY KEY, col2 INT) TABLESPACE tsp1; +CREATE TABLE t2 (col INT PRIMARY KEY, col2 INT) WITH (COLOCATION = FALSE) TABLESPACE tsp1; + +ALTER TABLE ALL IN TABLESPACE tsp1 COLOCATED WITH t2 SET TABLESPACE tsp2 CASCADE; +``` + +```output +ERROR: the specified relation is non-colocated which can't be moved using this command +``` + +### Back up and restore colocated database using ysql_dump + +You can back up and restore a database with colocated tables and tablespaces in two ways: + +- With `--use_tablespaces` option in [ysql_dump](../../admin/ysql-dump/). Using this option during backup and restore includes tablespace information. The restored database requires the target universe to contain the necessary nodes for tablespace creation. + +- Without `--use_tablespaces` option. When this option is omitted, tablespace information is not stored during backup and restore, and all relations are restored to the YugabyteDB default tablespace `pg_default`. + + After the restore, all colocated entities remain colocated, and you can create tablespaces post-restore as needed. Tables can then be moved to the desired tablespaces using the ALTER syntax. + + - To move non-colocated tables, use the syntax to [alter the tablespace of a non-colocated table](#create-a-colocated-table-in-a-tablespace). + + - To move colocated relations, use the following variant of the previously mentioned command: + + ```sql + ALTER TABLE ALL IN TABLESPACE pg_default COLOCATED WITH table1 SET TABLESPACE new_tablespace CASCADE; + ``` + + This command moves all restored colocated relations in the default tablespace `pg_default` that are colocated with `table1` to `new_tablespace`. + + - Consider the following example schema for before and after backup or restore operation: + + ```sql + CREATE TABLE t1 (a INT, region VARCHAR, c INT, PRIMARY KEY(a, region)) PARTITION BY LIST (region); + CREATE TABLE t1_1 PARTITION OF t1 FOR VALUES IN ('USWEST') TABLESPACE tsp1; + CREATE TABLE t1_2 PARTITION OF t1 FOR VALUES IN ('USEAST') TABLESPACE tsp2; + CREATE TABLE t1_3 PARTITION OF t1 FOR VALUES IN ('APSOUTH') TABLESPACE tsp3; + CREATE TABLE t1_default PARTITION OF t1 DEFAULT; + + CREATE TABLE t2 (a INT, region VARCHAR, c INT, PRIMARY KEY(a, region)) PARTITION BY LIST (region); + CREATE TABLE t2_1 PARTITION OF t2 FOR VALUES IN ('USWEST') TABLESPACE tsp1; + CREATE TABLE t2_2 PARTITION OF t2 FOR VALUES IN ('USEAST') TABLESPACE tsp2; + CREATE TABLE t2_3 PARTITION OF t2 FOR VALUES IN ('APSOUTH') TABLESPACE tsp3; + CREATE TABLE t2_default PARTITION OF t2 DEFAULT; + ``` + + The tablegroup information would look like the following: + + ```sql + \dgrt + ``` + + ```output + List of tablegroup tables + Group Name | Group Owner | Name | Type | Owner + ------------------+-------------+------------+-------+---------- + colocation_16384 | postgres | t2_1 | table | yugabyte + colocation_16384 | postgres | t1_1 | table | yugabyte + colocation_16385 | postgres | t2_2 | table | yugabyte + colocation_16385 | postgres | t1_2 | table | yugabyte + colocation_16386 | postgres | t2_3 | table | yugabyte + colocation_16386 | postgres | t1_3 | table | yugabyte + default | postgres | t2_default | table | yugabyte + default | postgres | t1_default | table | yugabyte + default | postgres | t2 | table | yugabyte + default | postgres | t1 | table | yugabyte + (10 rows) + ``` + + The Group Name column shows which entities are colocated. + + Each tablegroup belongs to a different tablespace, as shown in the grptablespace column in the following table. + + ```sql + SELECT * FROM pg_yb_tablegroup; + ``` + + ```output + grpname | grpowner | grptablespace | grpacl | grpoptions + ------------------+----------+---------------+--------+------------ + default | 10 | 0 | | + colocation_16384 | 10 | 16384 | | + colocation_16385 | 10 | 16385 | | + colocation_16386 | 10 | 16386 | | + (4 rows) + ``` + + The same information after backup or restore without the `--use_tablespaces` option looks like the following: + + ```sql + \dgrt + ``` + + ```output + List of tablegroup tables + Group Name | Group Owner | Name | Type | Owner + --------------------------+-------------+------------+-------+---------- + colocation_restore_16393 | postgres | t2_1 | table | yugabyte + colocation_restore_16393 | postgres | t1_1 | table | yugabyte + colocation_restore_16399 | postgres | t2_2 | table | yugabyte + colocation_restore_16399 | postgres | t1_2 | table | yugabyte + colocation_restore_16405 | postgres | t2_3 | table | yugabyte + colocation_restore_16405 | postgres | t1_3 | table | yugabyte + default | postgres | t2_default | table | yugabyte + default | postgres | t1_default | table | yugabyte + default | postgres | t2 | table | yugabyte + default | postgres | t1 | table | yugabyte + (10 rows) + ``` + + The colocation property is still maintained after the backup or restore. But all the tables now reside in the same tablespace (the default one): + + ```sql + SELECT * FROM pg_yb_tablegroup; + ``` + + ```output + grpname | grpowner | grptablespace | grpacl | grpoptions + --------------------------+----------+---------------+--------+------------ + default | 10 | 0 | | + colocation_restore_16393 | 10 | 0 | | + colocation_restore_16399 | 10 | 0 | | + colocation_restore_16405 | 10 | 0 | | + (4 rows) + ``` diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/_index.md b/docs/content/v2.25/additional-features/connection-manager-ysql/_index.md new file mode 100644 index 000000000000..d8fa05cf089f --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/_index.md @@ -0,0 +1,51 @@ +--- +title: YSQL Connection Manager +headerTitle: YSQL Connection Manager +linkTitle: YSQL Connection Manager +description: Built-in server-side connection pooler for YSQL +headcontent: Built-in server-side connection pooler for YSQL +tags: + feature: early-access +menu: + preview: + identifier: connection-manager + parent: additional-features + weight: 10 +type: indexpage +--- + +YugabyteDB includes a built-in connection pooler, YSQL Connection Manager. Because the manager is bundled with the product, it is convenient to manage, monitor, and configure the server connections without additional third-party tools. When combined with [smart drivers](../../drivers-orms/smart-drivers/), YSQL Connection Manager simplifies application architecture and enhances developer productivity. + +{{}} + + {{}} + + {{}} + + {{}} + + {{}} + + {{}} + +{{}} diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-best-practices.md b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-best-practices.md new file mode 100644 index 000000000000..be53bf4dc94b --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-best-practices.md @@ -0,0 +1,63 @@ +--- +title: YSQL Connection Manager best practices +headerTitle: Best practices +linkTitle: Best practices +description: Best practices +headcontent: How to get the most from YSQL Connection Manager +menu: + preview: + identifier: ycm-best-practices + parent: connection-manager + weight: 20 +type: docs +--- + +## Reduce stickiness to maximize sharing + +Where possible, design application sessions to avoid using session-level state, temporary tables, or settings that require [sticky connections](../ycm-setup/#sticky-connections). Reducing stickiness allows more efficient reuse of server connections across clients. + +- Prepared statements: Use protocol-level prepared statements instead of SQL-level prepared statements to avoid stickiness (this may provide better performance than PostgreSQL if in optimized mode). +- Using superuser connections: In some corner cases, providing temporary superuser access to a user on a connection can break the connection after revoking the superuser privileges. If there are no cases of temporarily providing superuser privileges to any user, then you can safely set the [ysql_conn_mgr_superuser_sticky](../ycm-setup/#configure) flag to false. + + +## Use sticky connections for long-running workloads + +Sticky connections are ideal for workloads where avoiding the overhead of connection pooling context switches is important. Currently, you cannot explicitly request a sticky connection through configuration or connection parameters (this capability is planned for a future release). + +In the meantime, you can explicitly request sticky connections by using a role with superuser privileges, as all connections initiated by superusers are treated as sticky by default. This approach is particularly recommended for administrative tasks, long-running analytical queries, or debugging sessions where stickiness avoids context switching overhead. + +## Coordinate connection scaling using a smart driver + +YugabyteDB YSQL [Smart Drivers](../../../drivers-orms/smart-drivers/) and Connection Manager are designed to work together and complement each other for optimal scalability and performance. + +A smart driver intelligently routes connections across nodes in a distributed YugabyteDB cluster, ensuring that application traffic is load-balanced efficiently, and can dynamically route queries to appropriate TServers. + +Connection Manager operates at the node level, handling pooling and management of server and client connections in each TServer. It ensures efficient usage of backend resources, reduces the cost of idle connections, and smoothes out connection spikes. + +By using a smart driver and Connection Manager together, you benefit from end-to-end optimization: + +- Connections are intelligently spread across the cluster (Smart Driver) +- In each node, connections are pooled, shared, and throttled effectively (Connection Manager) + +This layered architecture enables high concurrency, efficient resource use, and operational simplicity, especially in large-scale, multi-tenant environments. + +## Configure optimal memory settings + +In a YugabyteDB node, the TServer and PostgreSQL (YSQL) processes run side-by-side and share the memory available on the node. + +- The TServer handles data storage, replication (DocDB), and tablet-level operations. +- The PostgreSQL process provides the SQL layer (YSQL), but it's tightly integrated and communicates with the TServer over RPCs. + +For optimal distribution of memory between TServer and PostgreSQL processes, set the `use_memory_defaults_optimized_for_ysql` flag to true when you create a cluster. Refer to [Memory division smart defaults](../../../reference/configuration/smart-defaults/#memory-division-smart-defaults) for more details. + +Note that when Connection Manager is enabled, an instance of the odyssey process is also run on each database node, which can take up to 200MB of RAM. + +## Right-size the cluster + +When sizing your YugabyteDB cluster, be sure to use server (actual) connections, not just client connection pool settings, to guide your calculations. Client connection pools may multiplex multiple client connections over a single server one, so it's the server, concurrent active connections that impact resource usage and should drive cluster sizing. + +You should create a maximum of 15 server connections per vCPU at a baseline level. However, to right-size your cluster you should also consider the expected number of concurrent active YSQL connections, and the p99 latency requirements of your workload. As the number of active connections increases, so does the cost of CPU context switching, which can negatively impact p99 latencies. For latency-sensitive or high-throughput workloads, it's often better to use fewer than 15 connections per core when calculating your cluster size. + +For example, if your workload typically has 600 concurrent active connections, and you assume a safe limit of 10 active connections per vCPU, you'll need at least 60 vCPUs in total. In a 6-node cluster with a replication factor (RF) of 3, the data is evenly distributed, so you can plan for 16 vCPUs per node to meet that requirement. This setup ensures each node has enough compute capacity to handle its share of active connections without introducing excessive latency. + +Run performance benchmarks with your specific workload to validate your sizing, and fine-tune the connections-per-core ratio as needed. Keep in mind that you have to think beyond just the number of connections when you're sizing your cluster. diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-migrate.md b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-migrate.md new file mode 100644 index 000000000000..e7c0b3e4e48a --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-migrate.md @@ -0,0 +1,74 @@ +--- +title: Migrate to YSQL Connection Manager +headerTitle: Migrate +linkTitle: Migrate +description: Migrate to YSQL Connection Manager +headcontent: How to migrate from your current pooling solution +menu: + preview: + identifier: ycm-migrate + parent: connection-manager + weight: 50 +type: docs +--- + +## PgBouncer + +PgBouncer is a generic PostgreSQL connection pooler and Connection Manager is purpose-built for YugabyteDB, optimizing connection management for distributed PostgreSQL (YSQL). Both support transaction pooling, session pooling, or statement pooling. + +The following table describes key differences between PgBouncer and YugabyteDB Connection Manager. + +| Feature | PgBouncer | YSQL Connection Manager | +| :--- | :--- | :--- | +| Architecture | Single node | Designed for distributed multi-node connections | +| Pooling mode | Transaction level (default) | Transaction level only | +| Pool configuration | Creates pool for every combination of users and databases | Creates pool for every (user,db) combination | +| SQL limitations | No support for SQL features such as TEMP TABLE, SET statements, CURSORS, and so on. | No equivalent limitations. | +| Configuration parameters | max_db_connections | ysql_max_connections (core database flag) | +| | max_db_client_connections | ysql_conn_mgr_max_client_connections | +| | min_pool_size | ysql_conn_mgr_min_conns_per_db | +| | server_idle_timeout | ysql_conn_mgr_idle_time | +| | server_lifetime | ysql_conn_mgr_server_lifetime | +| | tcp_keepalive | ysql_conn_mgr_tcp_keepalive | +| | tcp_keepintvl | ysql_conn_mgr_tcp_keepalive_keep_interval | +| | listen_port | ysql_conn_mgr_port | +| Connection string | `postgresql://:@:/?sslmode=require` | `postgresql://:@:/?sslmode=require`

Connection Manager remains transparent, connection string (by default) is same as without connection manager enabled. | +| Scalability | Single process/thread. To scale, you need to start multiple instances of PgBouncer. | The number of threads for multiplexing is configurable using `ysql_conn_mgr_worker_threads` (default is CPU cores divided by 2). | + +### Migrate + +After reviewing the differences bewtween PgBouncer and Connection Manager, migrate from PgBouncer as follows: + +1. Make sure your application works with session-level pooling. + + Connection Manager uses session-level pooling. If you are using transaction pooling with PgBouncer, you may need to make changes to your application. + +1. Deploy YugabyteDB Connection Manager. + + Enable and configure YugabyteDB Connection Manager. Refer to [setup](../ycm-setup/). + + [How to configure to connect to a local region (without using sticky session).] + +1. Update connection strings. + + Change your application's database connection URL to point to the YugabyteDB endpoint (default port is 5433). + +1. Test in a staging environment. + + - Validate connection handling when running your workload. + - Simulate failover scenarios and node failures. + +1. Set up [monitoring](../ycm-monitor/). + + - Integrate YugabyteDB metrics into Prometheus, Grafana, Datadog, or your preferred APM. + - Track active connections, error rates, and latency. + +1. Understand how YugabyteDB scales horizontally. + + PgBouncer is a single process/thread, and to scale you need to start multiple instances of PgBouncer. Connection manager on the other hand is multi-threaded and creates a number of threads based on the available CPU. + +1. Update your failover and high availability strategy. + + Make sure your application handles dynamic leader changes. Remove PgBouncer-specific manual failover scripts, if any custom scripts are used to fail over and bring your application online. + + diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-monitor.md b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-monitor.md new file mode 100644 index 000000000000..8d4573a36b6a --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-monitor.md @@ -0,0 +1,117 @@ +--- +title: YSQL Connection Manager metrics +headerTitle: Observability and metrics +linkTitle: Observability +description: YSQL Connection Manager observability and metrics +headcontent: Monitor YSQL Connection Manager +menu: + preview: + identifier: ycm-monitor + parent: connection-manager + weight: 30 +type: docs +--- + +## Metrics + +Use the following metrics to monitor connections when using YSQL Connection Manager. + +Access metrics at the `13000/connections` endpoint. + +| Metric Name | Description | +| :--- | :--- | +| database_name (DB OID) | Specifies information regarding the database being used in each pool. | +| user_name (User OID) | Specifies information regarding the user being used in each pool. | +| active_logical_connections | Specifies on a pool-by-pool basis the number of active logical (client) connections.
An "active" client connection corresponds to a session in an active transaction on a server connection. | +| queued_logical_connections | Specifies on a pool-by-pool basis the number of queued client connections.
A "queued" client connection corresponds to a session that is queued up to attach to a server connection. | +| waiting_logical_connections | Specifies on a pool-by-pool basis the number of waiting/idle client connections.
A session that is neither queued to attach to a server connection nor currently using a server connection is in a "waiting" state. | +| active_physical_connections | Specifies on a pool-by-pool basis the number of active physical (server) connections.
(At the start of a transaction) After a server connection is picked up from the connection pool (or freshly created) to serve a client connection, it is marked as "active". | +| idle_physical_connections | Specifies on a pool-by-pool basis the number of idle server connections.
(At the end of a transaction) Once a server connection detaches from its client connection and returns to the server connection pool, it is marked as "idle". | +| sticky_connections | Specifies on a pool-by-pool basis the number of [sticky connections](../ycm-setup/#sticky-connections).
server connections that do not return to the connection pool at the end of a transaction remain stuck to the client connection for the lifetime of the session. | +| avg_wait_time_ns | Specifies on a pool-by-pool basis the time (in ns) on average clients have to be queued before attaching to a server connection. | +| qps / tps | Specifies on a pool-by-pool basis some basic performance metrics.
qps = queries per second
tps = transactions per second | + +### Logical and server connections + +The sum of waiting, queued, and active client connections provides the number of client connections that are currently open. + +The sum of idle and active server connections provides the number of server-side backend processes that have been spawned. + +The number of active client connections will always be equal to the number of active server connections. + +### Pool use (idle server connections/waiting client connections) + +In general, you can have idle server connections, as they can be used for connection burst scenarios. Configure the [timeout for idle connections](../ycm-setup/#configure) using the `ysql_conn_mgr_idle_time` flag, depending on your use case. + +You can reduce `ysql_max_connections` such that the active to idle ratio is higher, provided that idle connections are not completely extinguished in the long run. + +### Queued clients + +You can have some queued state clients. However, if clients start timing out or query latency is too high, increase `ysql_max_connections`. + +### Sticky connections + +[Sticky connections](../ycm-setup/#sticky-connections) can be the cause of higher connection acquisition latency in some cases (sticky connections are destroyed once used). + +They may also be the cause for connection exhaustion or client wait timeouts. + +## Logging + +Connection Manager provides the following log levels that you can set using the `ysql_conn_mgr_log_settings` flag: + +- log_debug +- log_query +- log_config +- log_session +- log_stats + +The structure of a log line is as follows: + +```prolog +PID YYYY-MM-DD HH:MM:SS UTC log_level [clientID serverID] (context) This is a sample log! +``` + +For example: + +```prolog +2986790 2025-04-22 20:55:08.236 UTC debug [c960b6b7a6030 scb5ee95439f2] (reset) ReadyForQuery +``` + +Connection Manager logs are stored in the same directory as [TServer logs](../../../explore/observability/logging/), depending on your cluster setup. They are rotated daily and have the following file naming convention: + +```sh +ysql-conn-mgr-YYYY-MM-DD_HHMMSS.log.PID +``` + +For example: + +```sh +ysql-conn-mgr-2025-04-22_205456.log.2986790 +``` + +The log file was created for a Connection Manager process with a PID of 2986790, which started logging at 20:54:56 UTC on 22nd April 2025. + + diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-setup.md b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-setup.md new file mode 100644 index 000000000000..0c29eff55735 --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-setup.md @@ -0,0 +1,173 @@ +--- +title: Set up YSQL Connection Manager +headerTitle: Set up YSQL Connection Manager +linkTitle: Setup +description: Set up YSQL Connection Manager +headcontent: YSQL Connection Manager flags and settings +menu: + preview: + identifier: ycm-setup + parent: connection-manager + weight: 10 +type: docs +--- + +## Start YSQL Connection Manager + + + +
+
+ +To start a YugabyteDB cluster with YSQL Connection Manager, set the [yb-tserver](../../../reference/configuration/yb-tserver/) flag `enable_ysql_conn_mgr` to true. + +For example, to create a single-node cluster with YSQL Connection Manager using [yugabyted](../../../reference/configuration/yugabyted/), use the following command: + +```sh +./bin/yugabyted start --tserver_flags "enable_ysql_conn_mgr=true" --ui false +``` + +When `enable_ysql_conn_mgr` is set, each YB-TServer starts the YSQL Connection Manager process along with the PostgreSQL process. You should see one YSQL Connection Manager process per YB-TServer. + +{{< note >}} + +To create a large number of client connections, ensure that "SHMMNI" (the maximum number of concurrent shared memory segments an OS allows) as well as [ulimit](../../../deploy/manual-deployment/system-config/#set-ulimits) is set correctly as follows: + +1. Open the file `/etc/sysctl.conf`. +1. Add `kernel.shmmni = 32768` (support for 30000 clients) at the end of the file. +1. To refresh the settings, use `sudo sysctl -p`. +{{< /note >}} + +
+
+ +{{}}While in Early Access, YSQL Connection Manager is not available in YugabyteDB Anywhere by default. To make connection pooling available, set the **Allow users to enable or disable connection pooling** Global Runtime Configuration option (config key `yb.universe.allow_connection_pooling`) to true. Refer to [Manage runtime configuration settings](../../../yugabyte-platform/administer-yugabyte-platform/manage-runtime-config/). You must be a Super Admin to set global runtime configuration flags. + +To enable built-in connection pooling for universes deployed using YugabyteDB Anywhere: + +- Turn on the **Connection pooling** option when creating a universe. Refer to [Create a multi-zone universe](../../../yugabyte-platform/create-deployments/create-universe-multi-zone/#advanced-configuration). +- Edit connection pooling on an existing universe. Refer to [Edit connection pooling](../../../yugabyte-platform/manage-deployments/edit-universe/#edit-connection-pooling). + +Note that when managing universes using YugabyteDB Anywhere, do not set connection pooling flags, `enable_ysql_conn_mgr`, `ysql_conn_mgr_port`, and `pgsql_proxy_bind_address`. + +**Connect** + +To connect to the YSQL Connection Manager, use the [ysqlsh](../../../api/ysqlsh/) command with the [`-h `](../../../api/ysqlsh/#h-hostname-host-hostname) flag, instead of specifying the Unix-domain socket directory. + +Using the socket directory along with [`-p`](../../../api/ysqlsh/#p-port-port-port) (custom PostgreSQL port or default 6433) will connect you to the PostgreSQL process, not the YSQL connection manager process. + +
+
+ +{{}}You can enable built-in connection pooling on YugabyteDB Aeon clusters in the following ways: + +- When [creating a cluster](../../../yugabyte-cloud/cloud-basics/create-clusters/), turn on the **Connection Pooling** option. (Connection Pooling is enabled by default for [Sandbox clusters](../../../yugabyte-cloud/cloud-basics/create-clusters/create-clusters-free/).) +- For clusters that are already created, navigate to the cluster **Settings>Connection Pooling** tab. + +Enabling connection pooling on an Aeon cluster gives 10 client connections for every server connection by default. + +
+
+ +## Configure + +By default, when YSQL Connection Manager is enabled, it uses the port 5433, and the backend database is assigned a random free port. + +To explicitly set a port for YSQL, you should specify ports for the flags `ysql_conn_mgr_port` and [ysql_port](../../../reference/configuration/yugabyted/#advanced-flags). + +The following table describes YB-TServer flags related to YSQL Connection Manager: + +| flag | Description | +|:---- | :---------- | +| enable_ysql_conn_mgr | Enables YSQL Connection Manager for the cluster. YB-TServer starts a YSQL Connection Manager process as a child process.
Default: false | +| enable_ysql_conn_mgr_stats | Enable statistics collection from YSQL Connection Manager. These statistics are displayed at the endpoint `:13000/connections`.
Default: true | +| ysql_conn_mgr_idle_time | Specifies the maximum idle time (in seconds) allowed for database connections created by YSQL Connection Manager. If a database connection remains idle without serving a client connection for a duration equal to, or exceeding this value, it is automatically closed by YSQL Connection Manager.
Default: 60 | +| ysql_conn_mgr_max_client_connections | Maximum number of concurrent client connections allowed.
Default: 10000 | +| ysql_conn_mgr_min_conns_per_db | Minimum number of server connections that is present in the pool. This limit is not considered while closing a broken server connection.
Default: 1 | +| ysql_conn_mgr_num_workers | Number of worker threads used by YSQL Connection Manager. If set to 0, the number of worker threads will be half of the number of CPU cores.
Default: 0 | +| ysql_conn_mgr_stats_interval | Interval (in seconds) for updating the YSQL Connection Manager statistics.
Default: 1 | +| ysql_conn_mgr_superuser_sticky | Make superuser connections sticky.
Default: true | +| ysql_conn_mgr_port | YSQL Connection Manager port to which clients can connect. This must be different from the PostgreSQL port set via `pgsql_proxy_bind_address`.
Default: 5433 | +| ysql_conn_mgr_server_lifetime | The maximum duration (in seconds) that a backend PostgreSQL connection managed by YSQL Connection Manager can remain open after creation.
Default: 3600 | +| ysql_conn_mgr_log_settings | Comma-separated list of log settings for YSQL Connection Manger. Can include 'log_debug', 'log_config', 'log_session', 'log_query', and 'log_stats'.
Default: "" | +| ysql_conn_mgr_use_auth_backend | Enable the use of the auth-backend for authentication of client connections. When false, the older auth-passthrough implementation is used.
Default: true | +| ysql_conn_mgr_readahead_buffer_size | Size of the per-connection buffer (in bytes) used for IO read-ahead operations in YSQL Connection Manager.
Default: 8192 | +| ysql_conn_mgr_tcp_keepalive | TCP keepalive time (in seconds) in YSQL Connection Manager. Set to zero to disable keepalive.
Default: 15 | +| ysql_conn_mgr_tcp_keepalive_keep_interval | TCP keepalive interval (in seconds) in YSQL Connection Manager. Only applicable if 'ysql_conn_mgr_tcp_keepalive' is enabled.
Default: 75 | +| ysql_conn_mgr_tcp_keepalive_probes | Number of TCP keepalive probes in YSQL Connection Manager. Only applicable if 'ysql_conn_mgr_tcp_keepalive' is enabled.
Default: 9 | +| ysql_conn_mgr_tcp_keepalive_usr_timeout | TCP user timeout (in milliseconds) in YSQL Connection Manager. Only applicable if 'ysql_conn_mgr_tcp_keepalive' is enabled.
Default: 9 | +| ysql_conn_mgr_pool_timeout | Server pool wait timeout (in milliseconds) in YSQL Connection Manager. This is the time clients wait for an available server, after which they are disconnected. If set to zero, clients wait for server connections indefinitely.
Default: 0 | +| ysql_conn_mgr_sequence_support_mode | Sequence support mode when YSQL connection manager is enabled. When set to 'pooled_without_curval_lastval', the currval() and lastval() functions are not supported. When set to 'pooled_with_curval_lastval', the currval() and lastval() functions are supported. For both settings, monotonic sequence order is not guaranteed if `ysql_sequence_cache_method` is set to `connection`. To also support monotonic order, set this flag to `session`.
Default: pooled_without_curval_lastval | +| ysql_conn_mgr_optimized_extended_query_protocol | Enables optimization of [extended-query protocol](https://www.postgresql.org/docs/current/protocol-overview.html#PROTOCOL-QUERY-CONCEPTS) to provide better performance; note that while optimization is enabled, you may have correctness issues if you alter the schema of objects used in prepared statements. If set to false, extended-query protocol handling is always fully correct but unoptimized.
Default: true | + +## Authentication methods + +The following table outlines the various authentication methods supported by YugabyteDB and their compatibility with the YSQL Connection Manager when a connection matches an HBA (Host-Based Authentication) record. + +| | Auth Method | Description | +|:--| :---------------------| :------------ | :---- | +| {{}} | Ident Authentication | Server contacts client's OS to verify username that initiated connection, trusting OS-level identity.| +| {{}} | Peer Authentication | For local/Unix socket connections, server checks that the connecting UNIX user matches the requested database user, relying on OS user identity. | +| {{}} | Plain/Clear Text Password | Standard password-based authentication, though storing passwords in plain text is not recommended. | +| {{}} | JWT Authentication (OIDC) | Uses JSON Web Tokens (JWT) from an external Identity Provider (IDP) to securely transmit authentication and authorization information. | +| {{}} | LDAP Authentication | Verifies users against a centralized directory service using Lightweight Directory Access Protocol (LDAP). | +| {{}} | GSS API or Kerberos| Enables Kerberos-based authentication through a standardized API, allowing secure, enterprise-grade Single Sign-On (SSO) logins without passwords.
**Note**: Testing of this feature with YugabyteDB is currently limited.| +| {{}} | SCRAM-SHA-256 | A secure password-based authentication that protects credentials using hashing, salting, and challenge-response. | +| {{}} | MD5 | Password-based authentication where the user's password is by default stored in MD5 encryption format in the database. | +| {{}} | Cert | Certificate-based authentication requires the client to provide certificates to the server over a TLS connection for authentication. | + +## Sticky connections + +YSQL Connection Manager enables a larger number of client connections to efficiently share a smaller pool of backend processes using a many-to-one multiplexing model. However, in certain cases, a backend process may enter a state that prevents connection multiplexing between transactions. When this occurs, the backend process remains dedicated to a single client connection (hence the term "sticky connection") for the entire session rather than just a single transaction. This behavior deviates from the typical use case, where backend processes are reassigned after each transaction. + +Currently, once formed, sticky connections remain sticky until the end of the session. At the end of the session, the backend process corresponding to a sticky connection is destroyed along with the connection, and the connection does not return to the pool. + +When using YSQL Connection Manager, sticky connections can form in the following circumstances: + +- Creating TEMP tables. +- Declaring a CURSOR using the WITH HOLD attribute. +- Using a PREPARE query (not to be confused with protocol-level preparation of statements). +- Superuser connections; if you want superuser connections to not be sticky, set the `ysql_conn_mgr_superuser_sticky` flag to false. +- Using a SEQUENCE with `ysql_conn_mgr_sequence_support_mode` set to `session`. (Other values for this flag provide lesser support without stickiness.) +- Replication connections. +- Setting the following configuration parameters during the session: + - `session_authorization` + - `role` + - `default_tablespace` + - `temp_tablespaces` + - Any string-type variables of extensions + - `yb_read_after_commit_visibility` + +## Limitations + +- Changes to [configuration parameters](../../../reference/configuration/yb-tserver/#postgresql-configuration-parameters) for a user or database that are set using ALTER ROLE SET or ALTER DATABASE SET queries may reflect in other pre-existing active sessions. +- YSQL Connection Manager can route up to 10,000 connection pools. This includes pools corresponding to dropped users and databases. +- Prepared statements may be visible to other sessions in the same connection pool. {{}} +- Attempting to use DEALLOCATE/DEALLOCATE ALL queries can result in unexpected behavior. {{}} +- Currently, you can't apply custom configurations to individual pools. The YSQL Connection Manager configuration applies to all pools. +- When YSQL Connection Manager is enabled, the backend PID stored using JDBC drivers may not be accurate. This does not affect backend-specific functionalities (for example, cancel queries), but this PID should not be used to identify the backend process. +- By default, `currval` and `nextval` functions do not work when YSQL Connection Manager is enabled. They can be supported with the help of the `ysql_conn_mgr_sequence_support_mode` flag. +- YSQL Connection Manager does not yet support IPv6 connections. {{}} +- Currently, [auth-method](https://docs.yugabyte.com/preview/secure/authentication/host-based-authentication/#auth-method) `cert` is not supported for host-based authentication. {{}} +- Although the use of auth-backends (`ysql_conn_mgr_use_auth_backend=true`) to authenticate client connections can result in higher connection acquisition latencies, using auth-passthrough (`ysql_conn_mgr_use_auth_backend=false`) may not be suitable depending on your workload. Contact {{% support-general %}} before setting `ysql_conn_mgr_use_auth_backend` to false. {{}} +- Salted Challenge Response Authentication Mechanism ([SCRAM](https://docs.yugabyte.com/preview/secure/authentication/password-authentication/#scram-sha-256)) is not supported with YSQL Connection Manager. {{}} +- Unix socket connections to YSQL Connection Manager are not supported. {{}} diff --git a/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-troubleshoot.md b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-troubleshoot.md new file mode 100644 index 000000000000..f17761eec7c4 --- /dev/null +++ b/docs/content/v2.25/additional-features/connection-manager-ysql/ycm-troubleshoot.md @@ -0,0 +1,64 @@ +--- +title: YSQL Connection Manager Troubleshooting +headerTitle: Troubleshoot +linkTitle: Troubleshoot +description: Troubleshooting YSQL Connection Manager problems +headcontent: Troubleshoot issues with YSQL Connection Manager +menu: + preview: + identifier: ycm-troubleshoot + parent: connection-manager + weight: 60 +type: docs +--- + +For information on YSQL Connection Manager limitations, refer to [Limitations](../ycm-setup/#limitations). + +## Connection exhaustion due to sticky connections + +**Symptoms** + +- Partially exhausted pool: Higher query execution latencies, reflected in higher `queued_logical_connections` and `avg_wait_time_ns` metrics. +- Completely exhausted server connection pool: Clients appear to hang or timeout while trying to authenticate or execute queries. + +**Verify** + +To offer complete correctness/range of support, Connection Manager makes connections for some features [sticky by default](../ycm-setup/#sticky-connections). You can verify this using the [sticky_connections metric](../ycm-monitor/#metrics). Use the `13000/connections` endpoint and search for `sticky_connections`. + +Search for "sticky" in Connection Manager logs with `log_debug` enabled. See [Logging](../ycm-monitor/#logging). + +Depending on your use case, you can enable flags or workarounds to avoid stickiness. + +## Timeouts (connection exhaustion/high multiplexity factor) + +**Symptoms** + +For higher query execution latencies, timeouts may occur based on application layer handling. + +**Verify** + +Higher latencies will be reflected in the time taken for the client connection to attach to a server connection. + +Look at `avg_wait_time_ns` on the `13000/connections` endpoint; higher values account for higher latencies. + +If you have high multiplexity (many more client connections than server connections), clients may be waiting too long to attach to a server connection. Consider increasing the [ysql_max_connections setting](../ycm-setup/#configure). + +## Unsupported authentication methods + +- SCRAM. Salted Challenge Response Authentication Mechanism is not currently supported by Connection Manager. SCRAM is a secure method for user authentication where the client and server verify each other's identity without ever transmitting the actual password in plain text. This provides protection against password sniffing on an unreliable network. It's a challenge response mechanism, where the server sends a challenge and the client returns it a calculated value based on its password and salt. As part of the challenge response mechanism, Connection Manager does not throw any challenge to the client and so no response is received from the client. + +- TCP IPv6 client connections. Connection Manager assumes all client connections use IPv4. If any IPv6 client connection tries to connect, Connection Manager will still authenticate against IPv4 in the host column of the hba file. + +- CERT authentication. Connection Manager does not support CERT authentication (verify-full/verify-ca). CERT authentication requires connections to be SSL encrypted. Authentication with Connection Manager still happens on the database side. Therefore Connection Manager should forward all client credentials (for example, the password) along with setting up the SSL context on the database while doing authentication. The client connection presents client certificates to Connection Manager and it's difficult to pass the same certificates to the database to perform authentication. If it were to pass, the server connections are Unix socket connections (no SSL/Encryption), which makes it difficult to set up a fake SSL context in which client certificates are needed to be processed for the purpose of certificate authentication via Connection Manager. Client certificates are loaded during the initial SSL handshake of the client with the postmaster process without Connection Manager. + +## SSL behaviour + +Although Connection Manager supports all SSL modes that clients can set in a connection, the behaviour can be slightly different. The following corner cases can result in different behavior when using Connection Manager compared to a direct database connection: + +- Enable TLS in cluster, add `{host all all all trust}` in the HBA file, and try making a connection using sslmode=disable. The connection will fail with Connection Manager, whereas it will be successfully created if connected directly to the database port. + +- Enable TLS in cluster, add `{host all all all trust}` in the HBA file, and try making a connection using sslmode=allow. An encrypted connection will be created with Connection Manager, whereas when connecting to a database port an unencrypted connection will be created. + +- Enable TLS in cluster and create a connection using sslmode=disable. Connection Manager will throw the following error: `odyssey: c8240c445726f: SSL is required`; whereas when connecting to the database port, the error message is `FATAL: no pg_hba.conf entry for host`. + +The main reason for these differences in behaviour is because sometimes authentication is done at the Connection Manager layer itself, rather than following the standard authentication mechanism (where authentication happens on the server based on credentials forwarded by Connection Manager). diff --git a/docs/content/v2.25/admin/_index.md b/docs/content/v2.25/admin/_index.md new file mode 100644 index 000000000000..8150abca462f --- /dev/null +++ b/docs/content/v2.25/admin/_index.md @@ -0,0 +1,67 @@ +--- +title: CLIs and command line tools +headerTitle: Command line tools +linkTitle: CLIs +description: Use these CLIs and command line tools to interact with YugabyteDB. +headcontent: Tools for interacting with, configuring, and managing YugabyteDB +menu: + preview: + identifier: admin + parent: reference + weight: 1400 +type: indexpage +--- + +YugabyteDB ships with a variety of tools to interact with, manage, and configure your cluster. Each tool has been designed for a specific purpose. The following illustration shows which tools operate on which parts of the cluster. + + +![Tools and their purpose](/images/admin/tools_functionalities1.png) + +For information about [yugabyted](../reference/configuration/yugabyted/) and configuring [YB-Master](../reference/configuration/yb-master/) and [YB-TServer](../reference/configuration/yb-tserver/) services, refer to [Configuration](../reference/configuration/). + +For information about YugabyteDB API clients (YSQL shell and YCQL shell), refer to [Client shells](../api/#client-shells). + +{{}} +For all the command line tools, when passing in an argument with a value that starts with a hyphen (for example, `-1`), add a double hyphen (`--`) at the end of other arguments followed by the argument name and value. This tells the binary to treat those arguments as positional. For example, to specify `set_flag ysql_select_parallelism -1`, you need to do the following: + +```bash +yb-ts-cli [other arguments] -- set_flag ysql_select_parallelism -1 +``` + +{{}} + +## Tools + +{{}} + + {{}} + + {{}} + + {{}} + + {{}} + + {{}} + +{{}} diff --git a/docs/content/v2.25/admin/yb-admin.md b/docs/content/v2.25/admin/yb-admin.md new file mode 100644 index 000000000000..06905b9b4a98 --- /dev/null +++ b/docs/content/v2.25/admin/yb-admin.md @@ -0,0 +1,2919 @@ +--- +title: yb-admin - command line tool for advanced YugabyteDB administration +headerTitle: yb-admin +linkTitle: yb-admin +description: Use the yb-admin command line tool for advanced administration of YugabyteDB clusters. +menu: + preview: + identifier: yb-admin + parent: admin + weight: 30 +type: docs +--- + +The yb-admin utility, located in the `bin` directory of YugabyteDB home, provides a command line interface for administering clusters. + +It invokes the [yb-master](../../reference/configuration/yb-master/) and [yb-tserver](../../reference/configuration/yb-tserver/) servers to perform the necessary administration. + +## Syntax + +To use yb-admin from the YugabyteDB home directory, run `./bin/yb-admin` using the following syntax. + +```sh +yb-admin \ + [ --master_addresses ] \ + [ --init_master_addrs ] \ + [ --timeout_ms ] \ + [ --certs_dir_name ] \ + [ command_flags ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* `init_master_addrs`: Allows specifying a single YB-Master address from which the rest of the YB-Masters are discovered. +* `timeout_ms`: The RPC timeout, in milliseconds. Default 60000. A value of 0 means don't wait; -1 means wait indefinitely. +* `certs_dir_name`: The directory with certificates to use for secure server connections. Default is `""`. + + To connect to a cluster with TLS enabled, you must include the `-certs_dir_name` flag with the directory location where the root certificate is located. +* **command**: The operation to be performed. See [Commands](#commands) for syntax details and examples. +* **command_flags**: Configuration flags that can be applied to the command. + +### Online help + +To display the online help, run `yb-admin --help` from the YugabyteDB home directory. + +```sh +./bin/yb-admin --help +``` + +## Commands + +* [Universe and cluster](#universe-and-cluster-commands) +* [Table](#table-commands) +* [Backup and snapshot](#backup-and-snapshot-commands) +* [Deployment topology](#deployment-topology-commands) + * [Multi-zone and multi-region](#multi-zone-and-multi-region-deployment-commands) + * [Read replica](#read-replica-deployment-commands) +* [Security](#security-commands) + * [Encryption at rest](#encryption-at-rest-commands) +* [Change data capture (CDC)](#change-data-capture-cdc-commands) +* [xCluster replication](#xcluster-replication-commands) +* [Decommissioning](#decommissioning-commands) +* [Rebalancing](#rebalancing-commands) +* [Upgrade](#upgrade) + +--- + +### Universe and cluster commands + +#### get_universe_config + +Gets the configuration for the universe. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_universe_config +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### change_config + +Changes the configuration of a tablet. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + change_config \ + [ ADD_SERVER | REMOVE_SERVER ] \ + \ + [ PRE_VOTER | PRE_OBSERVER ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *tablet-id*: The identifier (ID) of the tablet. +* ADD_SERVER | REMOVE_SERVER: Subcommand to add or remove the server. +* *peer-uuid*: The UUID of the tablet server hosting the peer tablet. +* PRE_VOTER | PRE_OBSERVER: Role of the new peer joining the quorum. Required when using the ADD_SERVER subcommand. + +**Notes:** + +If you need to take a node down temporarily, but intend to bring it back up, you should not need to use the REMOVE_SERVER subcommand. + +* If the node is down for less than 15 minutes, it will catch up through RPC calls when it comes back online. +* If the node is offline longer than 15 minutes, then it will go through Remote Bootstrap, where the current leader will forward all relevant files to catch up. + +If you do not intend to bring a node back up (perhaps you brought it down for maintenance, but discovered that the disk is bad), then you want to decommission the node (using the REMOVE_SERVER subcommand) and then add in a new node (using the ADD_SERVER subcommand). + +#### change_master_config + +Changes the master configuration. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + change_master_config \ + [ ADD_SERVER|REMOVE_SERVER ] \ + \ + [] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* ADD_SERVER | REMOVE_SERVER: Adds or removes a new YB-Master server. + + After adding or removing a node, verify the status of the YB-Master server on the YB-Master UI page () or run the yb-admin [dump_masters_state](#dump-masters-state) command. +* *ip-addr*: The IP address of the server node. +* *port*: The port of the server node. +* *uuid*: The UUID for the server that is being added/removed. + +#### list_tablet_servers + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_tablet_servers +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *tablet-id*: The identifier (ID) of the tablet. + +#### list_tablets + +Lists all tablets and their replica locations for a particular table. + +Use this to find out who the LEADER of a tablet is. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_tablets . [] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace-type*: Type of the keyspace, ysql or ycql. +* *keyspace-name*: The namespace, or name of the database or keyspace. +* *table*: The name of the table. +* *max-tablets*: The maximum number of tables to be returned. Default is `10`. Set to `0` to return all tablets. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + list_tablets ysql.db_name table_name 0 +``` + +```output +Tablet UUID Range Leader +cea3aaac2f10460a880b0b4a2a4b652a partition_key_start: "" partition_key_end: "\177\377" 127.0.0.1:9100 +e509cf8eedba410ba3b60c7e9138d479 partition_key_start: "\177\377" partition_key_end: "" +``` + +#### list_all_tablet_servers + +Lists all tablet servers. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_all_tablet_servers +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### list_all_masters + +Displays a list of all YB-Master servers in a table listing the master UUID, RPC host and port, state (`ALIVE` or `DEAD`), and role (`LEADER`, `FOLLOWER`, or `UNKNOWN_ROLE`). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_all_masters +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses node7:7100,node8:7100,node9:7100 \ + list_all_masters +``` + +```output +Master UUID RPC Host/Port State Role +... node8:7100 ALIVE FOLLOWER +... node9:7100 ALIVE FOLLOWER +... node7:7100 ALIVE LEADER +``` + +#### list_replica_type_counts + +Prints a list of replica types and counts for the specified table. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_replica_type_counts +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace*: The name of the database or keyspace. +* *table-name*: The name of the table. + +#### dump_masters_state + +Prints the status of the YB-Master servers. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + dump_masters_state +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### list_tablet_server_log_locations + +List the locations of the tablet server logs. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_tablet_server_log_locations +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### list_tablets_for_tablet_server + +Lists all tablets for the specified tablet server (YB-TServer). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_tablets_for_tablet_server +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *ts-uuid*: The UUID of the tablet server (YB-TServer). + +#### split_tablet + +Splits the specified hash-sharded tablet and computes the split point as the middle of tablet's sharding range. + +```sh +yb-admin \ + --master_addresses \ + split_tablet +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *tablet-id-to-split*: The identifier of the tablet to split. + +For more information on tablet splitting, see: + +* [Tablet splitting](../../architecture/docdb-sharding/tablet-splitting) — Architecture overview +* [Automatic Re-sharding of Data with Tablet Splitting](https://github.com/yugabyte/yugabyte-db/blob/master/architecture/design/docdb-automatic-tablet-splitting.md) — Architecture design document in the GitHub repository. + +#### master_leader_stepdown + +Forces the master leader to step down. The specified YB-Master node will take its place as leader. + +{{< note title="Note" >}} + +* Use this command only if recommended by Yugabyte support. + +* There is a possibility of downtime. + +{{< /note >}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + master_leader_stepdown [ ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *new-leader-id*: (Optional) The identifier (ID) of the new YB-Master leader. If not specified, the new leader is automatically elected. + +#### ysql_catalog_version + +Prints the current YSQL schema catalog version. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + ysql_catalog_version +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + ysql_catalog_version +``` + +The version output displays: + +```output +Version:1 +``` + +--- + +### Table commands + +#### list_tables + +Prints a list of all tables. Optionally, include the database type, table ID, and the table type. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_tables \ + [ include_db_type ] [ include_table_id ] [ include_table_type ] +``` + +```sh +yb-admin \ + --master_addresses list_tables +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* `include_db_type`: (Optional) Add this flag to include the database type for each table. +* `include_table_id`: (Optional) Add this flag to include the unique UUID associated with the table. +* `include_table_type`: (Optional) Add this flag to include the table type for each table. + +Returns tables in the following format, depending on the flags used: + +```output +.. +``` + +* *db-type*: The type of database. Valid values are `ysql`, `ycql`, and `unknown`. +* *namespace*: The name of the database (for YSQL) or keyspace (for YCQL). +* *table-name*: The name of the table. +* *table-id*: The UUID of the table. +* *table-type*: The type of table. Valid values are `catalog`, `table`, `index`, and `other`. + +{{< note title="Tip" >}} + +To display a list of tables and their UUID (`table_id`) values, open the **YB-Master UI** (`:7000/`) and click **Tables** in the navigation bar. + +{{< /note >}} + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + list_tables +``` + +```output +... +yugabyte.pg_range +template1.pg_attrdef +template0.pg_attrdef_adrelid_adnum_index +template1.pg_conversion +system_platform.pg_opfamily +postgres.pg_opfamily_am_name_nsp_index +system_schema.functions +template0.pg_statistic +system.local +template1.pg_inherits_parent_index +template1.pg_amproc +system_platform.pg_rewrite +yugabyte.pg_ts_config_cfgname_index +template1.pg_trigger_tgconstraint_index +template1.pg_class +template1.pg_largeobject +system_platform.sql_parts +template1.pg_inherits +... +``` + +#### compact_table + +Triggers manual compaction on a table. + +**Syntax 1: Using table name** + +```sh +yb-admin \ + --master_addresses \ + compact_table .
[] [ADD_INDEXES] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *db-type*: The type of database. Valid values include `ysql` and `ycql`. +* *namespace*: The name of the database (for YSQL) or keyspace (for YCQL). +* *table*: The name of the table to compact. +* *timeout-in-seconds*: Specifies duration (in seconds) yb-admin waits for compaction to end. Default is `20`. +* ADD_INDEXES: Whether to compact the secondary indexes associated with the table. Default is `false`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + compact_table ysql.yugabyte table_name +``` + +```output +Compacted [yugabyte.table_name] tables. +``` + +**Syntax 2: Using table ID** + +```sh +yb-admin \ + --master_addresses \ + compact_table tableid. [] [ADD_INDEXES] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *table-id*: The unique UUID associated with the table. +* *timeout-in-seconds*: Specifies duration (in seconds) yb-admin waits for compaction to end. Default is `20`. +* ADD_INDEXES: Whether to compact the secondary indexes associated with the table. Default is `false`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + compact_table tableid.000033eb000030008000000000004002 +``` + +```output +Compacted [000033eb000030008000000000004002] tables. +``` + +#### compaction_status + +Show the status of full compaction on a table. + +```sh +yb-admin \ + --master_addresses \ + compaction_status .
[show_tablets] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *db-type*: The type of database. Valid values are `ysql` and `ycql`. +* *namespace*: The name of the database (for YSQL) or keyspace (for YCQL). +* *table*: The name of the table to show the full compaction status. +* `show_tablets`: Show the compactions status of individual tablets. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + compaction_status ysql.yugabyte table_name show_tablets +``` + +```output +tserver uuid: 1b9486461cdd48f59eb46b33992cd73a + tablet id | full compaction state | last full compaction completion time + + 93c6933407e24adf8b3f12c11499673a IDLE 2025-06-03 15:36:22.395586 + 9c1cddfe33ec440cbcd70770563c62ca IDLE 2025-06-03 15:36:22.743703 + b739745bba254330805c259459c61a7e IDLE 2025-06-03 15:36:23.416460 + 9d0155aa77b3441e8e8c78cc433b995c IDLE 2025-06-03 15:36:23.504400 + 2b9f283301d14be2add2c3f2a0016531 IDLE 2025-06-03 15:36:23.892202 + eff101a879f348778ed599cb79498c44 IDLE 2025-06-03 15:36:24.706769 + +tserver uuid: c0505f1d31774a3d88fae26ce14cde10 + tablet id | full compaction state | last full compaction completion time + + 93c6933407e24adf8b3f12c11499673a IDLE 2025-06-03 15:36:22.769900 + 9c1cddfe33ec440cbcd70770563c62ca IDLE 2025-06-03 15:36:23.142609 + b739745bba254330805c259459c61a7e IDLE 2025-06-03 15:36:23.871247 + 9d0155aa77b3441e8e8c78cc433b995c IDLE 2025-06-03 15:36:23.877126 + 2b9f283301d14be2add2c3f2a0016531 IDLE 2025-06-03 15:36:24.294265 + eff101a879f348778ed599cb79498c44 IDLE 2025-06-03 15:36:25.107964 + +tserver uuid: f7b5e6fc38974cbabc330d944d564974 + tablet id | full compaction state | last full compaction completion time + + 93c6933407e24adf8b3f12c11499673a IDLE 2025-06-03 15:36:22.415413 + 9c1cddfe33ec440cbcd70770563c62ca IDLE 2025-06-03 15:36:22.793145 + b739745bba254330805c259459c61a7e IDLE 2025-06-03 15:36:23.473077 + 9d0155aa77b3441e8e8c78cc433b995c IDLE 2025-06-03 15:36:23.475270 + 2b9f283301d14be2add2c3f2a0016531 IDLE 2025-06-03 15:36:23.888733 + eff101a879f348778ed599cb79498c44 IDLE 2025-06-03 15:36:24.705576 + +Last full compaction completion time: 2025-06-03 15:36:22.395586 +Last admin compaction request time: 2025-06-03 15:36:22.061267 +``` + +#### modify_table_placement_info + +Modifies the placement information (cloud, region, and zone) for a table. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + modify_table_placement_info \ + [ ] +``` + +or alternatively: + +```sh +yb-admin \ + --master_addresses \ + modify_table_placement_info tableid. \ + [ ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace*: The namespace, or name of the database or keyspace. +* *table-name*: The name of the table. +* *table-id*: The unique UUID associated with the table whose placement policy is being changed. +* *placement-info*: Comma-delimited list of placements for *cloud*.*region*.*zone*. Default is `cloud1.datacenter1.rack1`. +* *replication-factor*: The number of replicas for each tablet. +* *placement-id*: Identifier of the primary cluster. Optional. If set, it has to match the placement ID specified for the primary cluster in the cluster configuration. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + modify_table_placement_info testdatabase testtable \ + aws.us-west.us-west-2a,aws.us-west.us-west-2b,aws.us-west.us-west-2c 3 +``` + +Verify this in the Master UI by opening the **YB-Master UI** (`:7000/`) and clicking **Tables** in the navigation bar. Navigate to the appropriate table whose placement information you're changing, and check the Replication Info section. + +{{< note title="Notes" >}} + +Setting placement for tables is not supported for clusters with read-replicas or leader affinity policies enabled. + +Use this command to create custom placement policies only for YCQL tables or transaction status tables. For YSQL tables, use [Tablespaces](../../explore/going-beyond-sql/tablespaces) instead. +{{< /note >}} + +#### create_transaction_table + +Creates a transaction status table to be used in a region. This command should always be followed by [modify_table_placement_info](#modify-table-placement-info) to set the placement information for the newly-created transaction status table. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_transaction_table \ + +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *table-name*: The name of the transaction status table to be created; this must start with `transactions_`. + +The transaction status table will be created as `system.`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + create_transaction_table \ + transactions_us_east +``` + +Verify this in the Master UI by opening the **YB-Master UI** (`:7000/`) and clicking **Tables** in the navigation bar. You should see a new system table with keyspace `system` and table name `transactions_us_east`. + +Next, set the placement on the newly created transactions table: + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + modify_table_placement_info system transactions_us_east \ + aws.us-east.us-east-1a,aws.us-east.us-east-1b,aws.us-east.us-east-1c 3 +``` + +After the load balancer runs, all tablets of `system.transactions_us_east` should now be solely located in the AWS us-east region. + +{{< note title="Note" >}} + +The preferred way to create transaction status tables with YSQL is to create a tablespace with the appropriate placement. YugabyteDB automatically creates a transaction table using the tablespace's placement when you create the first table using the new tablespace. + +{{< /note >}} + +#### add_transaction_tablet + +Add a tablet to a transaction status table. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + add_transaction_tablet \ + +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *table-id*: The unique UUID associated with the table to be compacted. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + add_transaction_tablet 000033eb000030008000000000004002 +``` + +To verify that the new status tablet has been created, run the [list_tablets](#list-tablets) command. + +#### flush_table + +Flush the memstores of the specified table on all tablet servers to disk. + +**Syntax 1: Using table name** + +```sh +yb-admin \ + --master_addresses \ + flush_table .
[] [ADD_INDEXES] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *db-type*: The type of database. Valid values are `ysql` and `ycql`. +* *namespace*: The name of the database (for YSQL) or keyspace (for YCQL). +* *table*: The name of the table to flush. +* *timeout-in-seconds*: Specifies duration (in seconds) yb-admin waits for flushing to end. Default is `20`. +* ADD_INDEXES: Whether to flush the secondary indexes associated with the table. Default is `false`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + flush_table ysql.yugabyte table_name + +``` + +```output +Flushed [yugabyte.table_name] tables. +``` + +**Syntax 2: Using table ID** + +```sh +yb-admin \ + --master_addresses \ + flush_table tableid. [] [ADD_INDEXES] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *table-id*: The unique UUID associated with the table. +* *timeout-in-seconds*: Specifies duration (in seconds) yb-admin waits for flushing to end. Default is `20`. +* ADD_INDEXES: Whether to flush the secondary indexes associated with the table. Default is `false`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + flush_table tableid.000033eb000030008000000000004002 +``` + +```output +Flushed [000033eb000030008000000000004002] tables. +``` + +#### backfill_indexes_for_table + +Backfill all DEFERRED indexes in a YCQL table. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + backfill_indexes_for_table +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace*: Specifies the keyspace `ycql.keyspace-name`. +* *table-name*: Specifies the table name. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + backfill_indexes_for_table ybdemo table_name +``` + +A new backfill job is created for all the `DEFERRED` indexes of the table. The command does not have any output. + +--- + +### Backup and snapshot commands + +The following backup and snapshot commands are available: + +* [**create_database_snapshot**](#create-database-snapshot) creates a snapshot of the specified YSQL database +* [**create_keyspace_snapshot**](#create-keyspace-snapshot) creates a snapshot of the specified YCQL keyspace +* [**list_snapshots**](#list-snapshots) returns a list of all snapshots, restores, and their states +* [**create_snapshot**](#create-snapshot) creates a snapshot of one or more YCQL tables and indexes +* [**restore_snapshot**](#restore-snapshot) restores a snapshot +* [**list_snapshot_restorations**](#list-snapshot-restorations) returns a list of all snapshot restorations +* [**export_snapshot**](#export-snapshot) creates a snapshot metadata file +* [**import_snapshot**](#import-snapshot) imports a snapshot metadata file +* [**import_snapshot_selective**](#import-snapshot-selective) imports a specified snapshot metadata file +* [**delete_snapshot**](#delete-snapshot) deletes a snapshot's information +* [**create_snapshot_schedule**](#create-snapshot-schedule) sets the schedule for snapshot creation +* [**list_snapshot_schedules**](#list-snapshot-schedules) returns a list of all snapshot schedules +* [**restore_snapshot_schedule**](#restore-snapshot-schedule) restores all objects in a scheduled snapshot +* [**delete_snapshot_schedule**](#delete-snapshot-schedule) deletes the specified snapshot schedule + +{{< note title="YugabyteDB Anywhere" >}} + +If you are using YugabyteDB Anywhere to manage point-in-time-recovery (PITR) for a universe, you must initiate and manage PITR using the YugabyteDB Anywhere UI. If you use the yb-admin CLI to make changes to the PITR configuration of a universe managed by YugabyteDB Anywhere, including creating schedules and snapshots, your changes are not reflected in YugabyteDB Anywhere. + +{{< /note >}} + +#### create_database_snapshot + +Creates a snapshot of the specified YSQL database. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_database_snapshot +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *database*: The name of the YSQL database. + +When this command runs, a `snapshot_id` is generated and printed. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + create_database_snapshot +``` + +To see if the database snapshot creation has completed, run the [yb-admin list_snapshots](#list-snapshots) command. + +#### create_keyspace_snapshot + +Creates a snapshot of the specified YCQL keyspace. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_keyspace_snapshot +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace*: The name of the YCQL keyspace. + +When this command runs, a `snapshot_id` is generated and printed. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + create_keyspace_snapshot +``` + +To see if the database snapshot creation has completed, run the [yb-admin list_snapshots](#list-snapshots) command. + +#### list_snapshots + +Prints a list of all snapshot IDs, restoration IDs, and states. Optionally, prints details (including keyspaces, tables, and indexes) in JSON format. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_snapshots \ + [ show_details ] [ not_show_restored ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* `show_details`: (Optional) Print snapshot details, including the keyspaces, tables, and indexes. +* `not_show_restored`: (Optional) Do not show successful "restorations" (that is, `COMPLETE`). Use to see a list of only uncompleted or failed restore operations. +* `show_deleted`: (Optional) Show snapshots that are deleted, but still retained in memory. + +Possible `state` values for creating and restoring snapshots: + +* `create_snapshot`: `CREATING`, `COMPLETE`, `DELETING`, `DELETED`, or `FAILED`. +* `restore_snapshot`: `COMPLETE`, `DELETING`, `DELETED`, or `FAILED`. + +By default, the `list_snapshots` command prints the current state of the following operations: + +* `create_snapshot`: `snapshot_id`, `keyspace`, `table`, `state` +* `restore_snapshot`: `snapshot_id`, `restoration_id`, `state`. +* `delete_snapshot`: `snapshot_id`, `state`. + +When `show_details` is included, the `list_snapshots` command prints the following details in JSON format: + +* `type`: `NAMESPACE` + * `id`: `` or `` + * `data`: + * `name`: `""` + * `database_type`: `"YQL_DATABASE_CQL"` + * `colocated`: `true` or `false` + * `state`: `""` +* `type`: `TABLE` <== Use for table or index + * `id`: `""` or `""` + * `data`: + * `name`: `""` or `""` + * `version`: `""` + * `state`: `""` + * `state_msg`: `""` + * `next_column_id`: `""` + * `table_type`: `"YQL_TABLE_TYPE"` + * `namespace_id`: `""` + * `indexed_table_id` (index only): `` + * `is_local_index` (index only): `true` or `false` + * `is_unique_index` (index only): `true` or `false` + +**Example** + +In this example, the optional `show_details` flag is added to generate the snapshot details. + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + list_snapshots show_details +``` + +Because `show_details` was included, `list_snapshots` prints the details in JSON format, like this: + +```output +f566b03b-b85e-41a0-b903-875cd305c1c5 COMPLETE +{"type":"NAMESPACE","id":"8053dd55d478437cba57d9f67caac154","data":{"name":"yugabyte","database_type":"YQL_DATABASE_CQL","colocated":false,"state":"RUNNING"}} +{"type":"TABLE","id":"a7e940e724ef497ebe94bf69bfe507d9","data":{"name":"tracking1","version":1,"state":"RUNNING","state_msg":"Current schema version=1","next_column_id":13,"table_type":"YQL_TABLE_TYPE","namespace_id":"8053dd55d478437cba57d9f67caac154"}} +{"type":"NAMESPACE","id":"8053dd55d478437cba57d9f67caac154","data":{"name":"yugabyte","database_type":"YQL_DATABASE_CQL","colocated":false,"state":"RUNNING"}} +{"type":"TABLE","id":"b48f4d7695f0421e93386f7a97da4bac","data":{"name":"tracking1_v_idx","version":0,"state":"RUNNING","next_column_id":12,"table_type":"YQL_TABLE_TYPE","namespace_id":"8053dd55d478437cba57d9f67caac154","indexed_table_id":"a7e940e724ef497ebe94bf69bfe507d9","is_local_index":false,"is_unique_index":false}} +``` + +If `show_details` is not included, `list_snapshots` prints the `snapshot_id` and `state`: + +```output +f566b03b-b85e-41a0-b903-875cd305c1c5 COMPLETE +``` + +#### create_snapshot + +Creates a snapshot of the specified YCQL tables and their indexes. Prior to v.2.1.8, indexes were not automatically included. You can specify multiple tables, even from different keyspaces. + +{{< note title="Snapshots don't auto-expire" >}} + +Snapshots you create via `create_snapshot` persist on disk until you remove them using the [delete_snapshot](#delete-snapshot) command. + +Use the [create_snapshot_schedule](#create-snapshot-schedule) command to create snapshots that expire after a specified time interval. + +{{}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_snapshot | \ + [ | ]... \ + [] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *keyspace*: The name of the database or keyspace formatted as .. +* *table-name*: The name of the table name. +* *table-id*: The unique UUID associated with the table. +* *flush-timeout-in-seconds*: Specifies duration (in seconds) before flushing snapshot. Default is `60`. To skip flushing, set the value to `0`. + +When this command runs, a `snapshot_id` is generated and printed. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + create_snapshot ydb test_tb +``` + +```output +Started flushing table ydb.test_tb +Flush request id: fe0db953a7a5416c90f01b1e11a36d24 +Waiting for flushing... +Flushing complete: SUCCESS +Started snapshot creation: 4963ed18fc1e4f1ba38c8fcf4058b295 +``` + +To see if the snapshot creation has finished, run the [yb-admin list_snapshots](#list-snapshots) command. + +#### restore_snapshot + +Restores the specified snapshot, including the tables and indexes. When the operation starts, a `restoration_id` is generated. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + restore_snapshot +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *snapshot-id*: The identifier (ID) for the snapshot. +* *restore-target*: The time to which to restore the snapshot. This can be either an absolute Unix time, or a relative time such as `minus 5m` (to restore to 5 minutes ago). Optional; omit to restore to the given snapshot's creation time. + +**Example** + +```sh +./bin/yb-admin restore_snapshot 72ad2eb1-65a2-4e88-a448-7ef4418bc469 +``` + +When the restore starts, the `snapshot_id` and the generated `restoration_id` are displayed. + +```output +Started restoring snapshot: 72ad2eb1-65a2-4e88-a448-7ef4418bc469 +Restoration id: 5a9bc559-2155-4c38-ac8b-b6d0f7aa1af6 +``` + +To see if the snapshot was successfully restored, you can run the [yb-admin list_snapshots](#list-snapshots) command. + +```sh +./bin/yb-admin list_snapshots +``` + +For the example above, the restore failed, so the following displays: + +```output +Restoration UUID State +5a9bc559-2155-4c38-ac8b-b6d0f7aa1af6 FAILED +``` + +#### list_snapshot_restorations + +Lists the snapshots restorations. + +Returns one or more restorations in JSON format. + +**restorations list** entries contain: + +* the restoration's unique ID +* the snapshot's unique ID +* state of the restoration + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_snapshot_restorations +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *restoration-id*: The snapshot restoration's unique identifier. Optional; omit the ID to return all restorations in the system. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + list_snapshot_restorations 26ed9053-0c26-4277-a2b8-c12d0fa4c8cf +``` + +```output.json +{ + "restorations": [ + { + "id": "26ed9053-0c26-4277-a2b8-c12d0fa4c8cf", + "snapshot_id": "ca8f3763-5437-4594-818d-713fb0cddb96", + "state": "RESTORED" + } + ] +} +``` + +#### export_snapshot + +Generates a metadata file for the specified snapshot, listing all the relevant internal UUIDs for various objects (table, tablet, etc.). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + export_snapshot +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *snapshot-id*: The identifier (ID) for the snapshot. +* *file-name*: The name of the file to contain the metadata. Recommended file extension is `.snapshot`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + export_snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 \ + test_tb.snapshot +``` + +```output +Exporting snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 (COMPLETE) to file test_tb.snapshot +Snapshot meta data was saved into file: test_tb.snapshot +``` + +#### import_snapshot + +Imports the specified snapshot metadata file. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + import_snapshot \ + [ [ ]...] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *file-name*: The name of the snapshot file to import +* *keyspace*: The name of the database or keyspace +* *table-name*: The name of the table + +{{< note title="Note" >}} + +The *keyspace* and the *table* can be different from the exported one. + +{{< /note >}} + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + import_snapshot test_tb.snapshot ydb test_tb +``` + +```output +Read snapshot meta file test_tb.snapshot +Importing snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 (COMPLETE) +Target imported table name: ydb.test_tb +Table being imported: ydb.test_tb +Successfully applied snapshot. +Object Old ID New ID +Keyspace c478ed4f570841489dd973aacf0b3799 c478ed4f570841489dd973aacf0b3799 +Table ff4389ee7a9d47ff897d3cec2f18f720 ff4389ee7a9d47ff897d3cec2f18f720 +Tablet 0 cea3aaac2f10460a880b0b4a2a4b652a cea3aaac2f10460a880b0b4a2a4b652a +Tablet 1 e509cf8eedba410ba3b60c7e9138d479 e509cf8eedba410ba3b60c7e9138d479 +Snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 4963ed18fc1e4f1ba38c8fcf4058b295 +``` + +#### import_snapshot_selective + +Imports only the specified tables from the specified snapshot metadata file. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + import_snapshot_selective \ + [ [ ]...] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *file-name*: The name of the snapshot file to import +* *keyspace*: The name of the database or keyspace +* *table-name*: The name of the table + +{{< note title="Note" >}} + +The *keyspace* can be different from the exported one. The name of the table needs to be the same. + +{{< /note >}} + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + import_snapshot_selective test_tb.snapshot ydb test_tb +``` + +```output +Read snapshot meta file test_tb.snapshot +Importing snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 (COMPLETE) +Target imported table name: ydb.test_tb +Table being imported: ydb.test_tb +Successfully applied snapshot. +Object Old ID New ID +Keyspace c478ed4f570841489dd973aacf0b3799 c478ed4f570841489dd973aacf0b3799 +Table ff4389ee7a9d47ff897d3cec2f18f720 ff4389ee7a9d47ff897d3cec2f18f720 +Tablet 0 cea3aaac2f10460a880b0b4a2a4b652a cea3aaac2f10460a880b0b4a2a4b652a +Tablet 1 e509cf8eedba410ba3b60c7e9138d479 e509cf8eedba410ba3b60c7e9138d479 +Snapshot 4963ed18fc1e4f1ba38c8fcf4058b295 4963ed18fc1e4f1ba38c8fcf4058b295 +``` + +#### delete_snapshot + +Deletes the specified snapshot. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_snapshot +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *snapshot-id*: The identifier (ID) of the snapshot. + +#### create_snapshot_schedule + +Creates a snapshot schedule. A schedule consists of a list of objects to be included in a snapshot, a time interval at which to take snapshots for them, and a retention time. + +Returns a schedule ID in JSON format. + +**Syntax** + +```sh +yb-admin create_snapshot_schedule \ + --master_addresses \ + \ + \ + +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *snapshot-interval*: The frequency at which to take snapshots, in minutes. +* *retention-time*: The number of minutes to keep a snapshot before deleting it. +* *filter-expression*: The set of objects to include in the snapshot. + +The filter expression is a list of acceptable objects, which can be either raw tables, keyspaces (YCQL) in the format `keyspace_name`, or databases (YSQL) in the format `ysql.database_name`. For proper consistency guarantees, set this up _per-keyspace_ (YCQL) or _per-database_ (YSQL). + +**Example** + +Take a snapshot of the YSQL database `yugabyte` once per minute, and retain each snapshot for 10 minutes: + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + create_snapshot_schedule 1 10 ysql.yugabyte +``` + +The equivalent command for the YCQL keyspace `yugabyte` would be the following: + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + create_snapshot_schedule 1 10 yugabyte +``` + +```output.json +{ + "schedule_id": "6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256" +} +``` + +#### list_snapshot_schedules + +Lists the snapshots associated with a given schedule. Or, lists all schedules and their associated snapshots. + +Returns one or more schedule lists in JSON format. + +**Schedule list** entries contain: + +* schedule ID +* schedule options (interval and retention time) +* a list of snapshots that the system has automatically taken + +**Snapshot list** entries include: + +* the snapshot's unique ID +* the snapshot's creation time +* the previous snapshot's creation time, if available. Use this time to make sure that, on restore, you pick the correct snapshot, which is guaranteed to have the data you want to bring back. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_snapshot_schedules +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *schedule-id*: The snapshot schedule's unique identifier. Optional; omit the ID to return all schedules in the system. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + list_snapshot_schedules 6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256 +``` + +```output.json +{ + "schedules": [ + { + "id": "6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256", + "options": { + "interval": "60.000s", + "retention": "600.000s" + }, + "snapshots": [ + { + "id": "386740da-dc17-4e4a-9a2b-976968b1deb5", + "snapshot_time_utc": "2021-04-28T13:35:32.499002+0000" + }, + { + "id": "aaf562ca-036f-4f96-b193-f0baead372e5", + "snapshot_time_utc": "2021-04-28T13:36:37.501633+0000", + "previous_snapshot_time_utc": "2021-04-28T13:35:32.499002+0000" + } + ] + } + ] +} +``` + +#### restore_snapshot_schedule + +Schedules group a set of items into a single tracking object (the *schedule*). When you restore, you can choose a particular schedule and a point in time, and revert the state of all affected objects back to the chosen time. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + restore_snapshot_schedule +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *schedule-id*: The identifier (ID) of the schedule to be restored. +* *restore-target*: The time to which to restore the snapshots in the schedule. This can be either an absolute Unix timestamp, or a relative time such as `minus 5m` (to restore to 5 minutes ago). + +You can also use a [YSQL timestamp](../../api/ysql/datatypes/type_datetime/) or [YCQL timestamp](../../api/ycql/type_datetime/#timestamp) with the restore command, if you like. + +In addition to restoring to a particular timestamp, you can also restore from a relative time, such as "ten minutes ago". + +When you specify a relative time, you can specify any or all of *days*, *hours*, *minutes*, and *seconds*. For example: + +* `minus 5m` to restore from five minutes ago +* `minus 1h` to restore from one hour ago +* `minus 3d` to restore from three days ago +* `minus 1h 5m` to restore from one hour and five minutes ago + +Relative times can be in any of the following formats (again, note that you can specify any or all of days, hours, minutes, and seconds): + +* ISO 8601: `3d 4h 5m 6s` +* Abbreviated PostgreSQL: `3 d 4 hrs 5 mins 6 secs` +* Traditional PostgreSQL: `3 days 4 hours 5 minutes 6 seconds` +* SQL standard: `D H:M:S` + +**Examples** + +Restore from an absolute timestamp: + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + restore_snapshot_schedule 6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256 1617670679185100 +``` + +Restore from a relative time: + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + restore_snapshot_schedule 6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256 minus 60s +``` + +In both cases, the output is similar to the following: + +```output.json +{ + "snapshot_id": "6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256", + "restoration_id": "b1b96d53-f9f9-46c5-b81c-6937301c8eff" +} +``` + +#### delete_snapshot_schedule + +Deletes the snapshot schedule with the given ID, **and all of the snapshots** associated with that schedule. + +Returns a JSON object with the `schedule_id` that was just deleted. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_snapshot_schedule +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *schedule-id*: The snapshot schedule's unique identifier. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + delete_snapshot_schedule 6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256 +``` + +The output should show the schedule ID we just deleted. + +```output.json +{ + "schedule_id": "6eaaa4fb-397f-41e2-a8fe-a93e0c9f5256" +} +``` + +--- + + + +### Multi-zone and multi-region deployment commands + +#### modify_placement_info + +Modifies the placement information (cloud, region, and zone) for a deployment. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + modify_placement_info \ + [ ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *placement-info*: Comma-delimited list of placements for *cloud*.*region*.*zone*. Optionally, after each placement block, you can also specify a minimum replica count separated by a colon. This count indicates how many minimum replicas of each tablet we want in that placement block. Its default value is 1. It is not recommended to repeat the same placement multiple times but instead specify the total count after the colon. However, if you specify a placement multiple times, the total count from all mentions is taken. +* *replication-factor*: The number of replicas for each tablet. This value should be greater than or equal to the total of replica counts specified in *placement-info*. +* *placement-id*: The identifier of the primary cluster, which can be any unique string. Optional; if not set, a randomly-generated ID is used. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + modify_placement_info \ + aws.us-west.us-west-2a:2,aws.us-west.us-west-2b:2,aws.us-west.us-west-2c 5 +``` + +This will place a minimum of: + +1. 2 replicas in aws.us-west.us-west-2a +2. 2 replicas in aws.us-west.us-west-2b +3. 1 replica in aws.us-west.us-west-2c + +You can verify the new placement information by running the following `curl` command: + +```sh +curl -s http://:7000/cluster-config +``` + +Use the wildcard `*` to allow placement in any zone in a specific region or any region in a specific cloud. For example: + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + modify_placement_info \ + aws.*.*:5 5` +``` + +This requests a placement of 5 replicas anywhere in the `aws` cloud. Similarly: + +```sh +./bin/yb-admin \ + --master_addresses $MASTER_RPC_ADDRS \ + modify_placement_info \ + aws.us-east-1.*:3 3` +``` + +This requests a placement of 3 replicas anywhere in the `us-east-1` region of `aws` cloud. + +#### set_preferred_zones + +Sets the preferred availability zones (AZs) and regions. Tablet leaders are placed in alive and healthy nodes of AZs in order of preference. When no healthy node is available in the most preferred AZs (preference value 1), then alive and healthy nodes from the next preferred AZs are picked. AZs with no preference are equally eligible to host tablet leaders. + +Having all tablet leaders reside in a single region reduces the number of network hops for the database to write transactions, which increases performance and reduces latency. + +{{< note title="Note" >}} + +* Make sure you've already run [modify_placement_info](#modify-placement-info) command beforehand. + +* By default, the transaction status tablet leaders don't respect these preferred zones and are balanced across all nodes. Transactions include a roundtrip from the user to the transaction status tablet serving the transaction - using the leader closest to the user rather than forcing a roundtrip to the preferred zone improves performance. + +* Leader blacklisted nodes don't host any leaders irrespective of their preference. + +* Cluster configuration stores preferred zones in either affinitized_leaders or multi_affinitized_leaders object. + +* Tablespaces don't inherit cluster-level placement information, leader preference, or read replica configurations. + +* If the client application uses a smart driver, set the [topology keys](../../drivers-orms/smart-drivers/#topology-aware-load-balancing) to target the preferred zones. + +{{< /note >}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + set_preferred_zones [:] \ + [[:]]... +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *cloud.region.zone*: Specifies the cloud, region, and zone. Default is `cloud1.datacenter1.rack1`. +* *preference*: Specifies the leader preference for a zone. Values have to be contiguous non-zero integers. Multiple zones can have the same value. Default is 1. + +**Example** + +Suppose you have a deployment in the following regions: `gcp.us-west1.us-west1-a`, `gcp.us-west1.us-west1-b`, `gcp.asia-northeast1.asia-northeast1-a`, and `gcp.us-east4.us-east4-a`. Looking at the cluster configuration: + +```sh +curl -s http://:7000/cluster-config +``` + +The following is a sample configuration: + +```output +replication_info { + live_replicas { + num_replicas: 5 + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-a" + } + min_num_replicas: 1 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-b" + } + min_num_replicas: 1 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-east4" + placement_zone: "us-east4-a" + } + min_num_replicas: 2 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-asia-northeast1" + placement_zone: "us-asia-northeast1-a" + } + min_num_replicas: 1 + } + } +} +``` + +The following command sets the preferred region to `gcp.us-west1` and the fallback to zone `gcp.us-east4.us-east4-a`: + +```sh +ssh -i $PEM $ADMIN_USER@$MASTER1 \ + ~/master/bin/yb-admin --master_addresses $MASTER_RPC_ADDRS \ + set_preferred_zones \ + gcp.us-west1.us-west1-a:1 \ + gcp.us-west1.us-west1-b:1 \ + gcp.us-east4.us-east4-a:2 +``` + +Verify by running the following. + +```sh +curl -s http://:7000/cluster-config +``` + +Looking again at the cluster configuration you should see `multi_affinitized_leaders` added: + +```output +replication_info { + live_replicas { + num_replicas: 5 + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-a" + } + min_num_replicas: 1 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-b" + } + min_num_replicas: 1 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-east4" + placement_zone: "us-east4-a" + } + min_num_replicas: 2 + } + placement_blocks { + cloud_info { + placement_cloud: "gcp" + placement_region: "us-asia-northeast1" + placement_zone: "us-asia-northeast1-a" + } + min_num_replicas: 1 + } + } + multi_affinitized_leaders { + zones { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-a" + } + zones { + placement_cloud: "gcp" + placement_region: "us-west1" + placement_zone: "us-west1-b" + } + } + multi_affinitized_leaders { + zones { + placement_cloud: "gcp" + placement_region: "us-east4" + placement_zone: "us-east4-a" + } + } +} +``` + +### Read replica deployment commands + +#### add_read_replica_placement_info + +Add a read replica cluster to the master configuration. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + add_read_replica_placement_info \ + \ + [ ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *placement-info*: A comma-delimited list of read replica placements for *cloud*.*region*.*zone*, using the format `:,:,...`. Default is `cloud1.datacenter1.rack1`. + Read replica availability zones must be uniquely different from the primary availability zones. To use the same cloud, region, and availability zone for a read replica as a primary cluster, you can suffix the zone with `_rr` (for read replica). For example, `c1.r1.z1` vs `c1.r1.z1_rr:1`. +* *replication-factor*: The total number of read replicas. +* *placement-id*: The identifier of the read replica cluster, which can be any unique string. If not set, a randomly-generated ID will be used. Primary and read replica clusters must use different placement IDs. + +#### modify_read_replica_placement_info + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + modify_read_replica_placement_info \ + \ + [ ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *placement-info*: A comma-delimited list of placements for *cloud*.*region*.*zone*. Default is `cloud1.datacenter1.rack1`. +* *replication-factor*: The number of replicas. +* *placement-id*: The identifier of the read replica cluster. + +#### delete_read_replica_placement_info + +Delete the read replica. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_read_replica_placement_info +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +--- + +### Security commands + +#### Encryption at rest commands + +For details on using encryption at rest, see [Encryption at rest](../../secure/encryption-at-rest). + +#### add_universe_key_to_all_masters + +Sets the contents of *key-path* in-memory on each YB-Master node. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + add_universe_key_to_all_masters +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *key-id*: Universe-unique identifier (can be any string, such as a string of a UUID) that will be associated to the universe key contained in the contents of *key-path* as a byte[]. +* *key-path*: The path to the file containing the universe key. + +{{< note title="Note" >}} + +After adding the universe keys to all YB-Master nodes, you can verify the keys exist using the [all_masters_have_universe_key_in_memory](#all-masters-have-universe-key-in-memory) command and enable encryption using the [rotate_universe_key_in_memory](#rotate-universe-key-in-memory) command. + +{{< /note >}} + +#### all_masters_have_universe_key_in_memory + +Checks whether the universe key associated with the provided *key-id* exists in-memory on each YB-Master node. + +```sh +yb-admin \ + --master_addresses all_masters_have_universe_key_in_memory +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *key-id*: Universe-unique identifier (can be any string, such as a string of a UUID) that will be associated to the universe key contained in the contents of *key-path* as a byte[]. + +#### rotate_universe_key_in_memory + +Rotates the in-memory universe key to start encrypting newly-written data files with the universe key associated with the provided *key-id*. + +{{< note title="Note" >}} + +The [all_masters_have_universe_key_in_memory](#all-masters-have-universe-key-in-memory) value must be true for the universe key to be successfully rotated and enabled). + +{{< /note >}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses rotate_universe_key_in_memory +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *key-id*: Universe-unique identifier (can be any string, such as a string of a UUID) that will be associated to the universe key contained in the contents of *key-path* as a byte[]. + +#### disable_encryption_in_memory + +Disables the in-memory encryption at rest for newly-written data files. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + disable_encryption_in_memory +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### is_encryption_enabled + +Checks if cluster-wide encryption is enabled. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + is_encryption_enabled +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +Returns message: + +```output +Encryption status: ENABLED with key id +``` + +The new key ID (``) should be different from the previous one (``). + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + is_encryption_enabled +``` + +```output +Encryption status: ENABLED with key id +``` + +### Change Data Capture (CDC) commands + +#### create_change_data_stream + +Create a change data capture (CDC) DB stream for the specified namespace using the following command. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_change_data_stream ysql. +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-name*: The namespace on which the DB stream ID is to be created. + +For example: + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100 \ + create_change_data_stream ysql.yugabyte +``` + +##### Creating a stream for Transactional CDC + +Create a change data capture (CDC) DB stream for the specified namespace that can be used for Transactional CDC using the following command. +This feature is {{}}. Use the [yb_enable_cdc_consistent_snapshot_streams](../../reference/configuration/yb-tserver/#yb-enable-cdc-consistent-snapshot-streams) flag to enable the feature. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_change_data_stream ysql. [EXPLICIT] [] [USE_SNAPSHOT | NOEXPORT_SNAPSHOT] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-name*: The namespace on which the DB stream ID is to be created. +* EXPLICIT: Checkpointing type on the server. See [Creating stream in EXPLICIT checkpointing mode](#creating-stream-in-explicit-checkpointing-mode). +* *before-image-mode*: Record type indicating to the server that the stream should send only the new values of the changed columns. See [Enabling before image](#enabling-before-image). +* USE_SNAPSHOT: Snapshot option indicating intention of client to consume the snapshot. If you don't want the client to consume the snapshot, use the NOEXPORT_SNAPSHOT option. + +For example: + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100 \ + create_change_data_stream ysql.yugabyte EXPLICIT CHANGE USE_SNAPSHOT +``` + +##### Enabling before image + +To create a change data capture (CDC) DB stream which also supports sending the before image of the record, use the following command. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_change_data_stream ysql. [EXPLICIT] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-name*: The namespace on which the DB stream ID is to be created. +* EXPLICIT: Checkpointing type on the server. See [Creating stream in EXPLICIT checkpointing mode](#creating-stream-in-explicit-checkpointing-mode). +* *before-image-mode*: Record type indicating to the server that the stream should send only the new values of the changed columns. Refer to [Before image modes](../../additional-features/change-data-capture/using-yugabytedb-grpc-replication/cdc-get-started/#before-image-modes). + +A successful operation of the above command returns a message with a DB stream ID: + +```output +CDC Stream ID: d540f5e4890c4d3b812933cbfd703ed3 +``` + +##### Creating stream in EXPLICIT checkpointing mode + +To create a change data capture (CDC) DB stream which works in the EXPLICIT checkpointing mode where the client is responsible for managing the checkpoints, use the following command: + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_change_data_stream ysql. EXPLICIT +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-name*: The namespace on which the DB stream ID is to be created. +* EXPLICIT: Checkpointing type on the server. + +A successful operation of the above command returns a message with a DB stream ID: + +```output +CDC Stream ID: d540f5e4890c4d3b812933cbfd703ed3 +``` + +{{< note title="IMPLICIT checkpointing is deprecated" >}} + +It is recommended that you create streams in EXPLICIT checkpointing mode only (the default). IMPLICIT checkpointing mode will be completely removed in future releases. + +{{< /note >}} + +#### list_change_data_streams + +Lists all the created CDC DB streams. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_change_data_streams [namespace-name] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-name*: (Optional) The namespace name for which to list the streams. If not specified, all streams are listed without filtering. + +**Example:** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100 \ + list_change_data_streams +``` + +This command results in the following response. It will have all the table IDs associated with the stream ID: + +```output +CDC Streams: +streams { + stream_id: "d540f5e4890c4d3b812933cbfd703ed3" + table_id: "000033e1000030008000000000004000" + options { + key: "id_type" + value: "NAMESPACEID" + } + options { + key: "checkpoint_type" + value: "EXPLICIT" + } + options { + key: "source_type" + value: "CDCSDK" + } + options { + key: "record_format" + value: "PROTO" + } + options { + key: "record_type" + value: "CHANGE" + } + options { + key: "state" + value: "ACTIVE" + } +} +``` + +#### get_change_data_stream_info + +Get the information associated with a particular CDC DB stream. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_change_data_stream_info +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *db-stream-id*: The CDC DB stream ID to get the info of. + +**Example:** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100 \ + get_change_data_stream_info d540f5e4890c4d3b812933cbfd703ed3 +``` + +The previous command results in the following response. It will have the table_id(s) associated with the stream and the namespace_id on which the stream is created: + +```output +CDC DB Stream Info: +table_info { + stream_id: "d540f5e4890c4d3b812933cbfd703ed3" + table_id: "000033e1000030008000000000004000" +} +namespace_id: "000033e1000030008000000000000000" +``` + +#### delete_change_data_stream + +Delete the specified CDC DB stream. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_change_data_stream +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *db-stream-id*: The CDC DB stream ID to be deleted. + +**Example:** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100 \ + delete_change_data_stream d540f5e4890c4d3b812933cbfd703ed3 +``` + +The above command results in the following response: + +```output +Successfully deleted CDC DB Stream ID: d540f5e4890c4d3b812933cbfd703ed3 +``` + +### xCluster Replication Commands + +For detailed step-by-step instructions on deploying xCluster, refer to the [Deploy xCluster](../../deploy/multi-dc/async-replication). For monitoring xCluster, refer to [Monitor xCluster](../../launch-and-manage/monitor-and-alert/xcluster-monitor). + +#### setup_universe_replication + +Sets up the universe replication for the specified source universe. Use this command only if no tables have been configured for replication. If tables are already configured for replication, use [alter_universe_replication](#alter-universe-replication) to add more tables. + +To verify if any tables are already configured for replication, use [list_cdc_streams](#list-cdc-streams). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + setup_universe_replication \ + \ + \ + \ + [ ] \ + [ transactional ] +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *source-master-addresses*: Comma-separated list of the source master addresses. +* *source-table-ids*: Comma-separated list of source universe table identifiers (`table_id`). +* *bootstrap-ids*: Comma-separated list of source universe bootstrap identifiers (`bootstrap_id`). Obtain these with [bootstrap_cdc_producer](#bootstrap-cdc-producer-comma-separated-list-of-table-ids), using a comma-separated list of source universe table IDs. +* `transactional`: identifies the universe as Active in a transactional xCluster deployment. + +{{< warning title="Important" >}} +Enter the source universe bootstrap IDs in the same order as their corresponding table IDs. +{{< /warning >}} + +{{< note title="Tip" >}} + +To display a list of tables and their UUID (`table_id`) values, open the **YB-Master UI** (`:7000/`) and click **Tables** in the navigation bar. + +{{< /note >}} + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.11:7100,127.0.0.12:7100,127.0.0.13:7100 \ + setup_universe_replication e260b8b6-e89f-4505-bb8e-b31f74aa29f3_xClusterSetup1 \ + 127.0.0.1:7100,127.0.0.2:7100,127.0.0.3:7100 \ + 000030a5000030008000000000004000,000030a5000030008000000000004005,dfef757c415c4b2cacc9315b8acb539a +``` + +#### alter_universe_replication + +Changes the universe replication for the specified source universe. Use this command to do the following: + +* Add or remove tables in an existing replication UUID. +* Modify the source master addresses. + +If no tables have been configured for replication, use [setup_universe_replication](#setup-universe-replication). + +To check if any tables are configured for replication, use [list_cdc_streams](#list-cdc-streams). + +**Syntax** + +Use the `set_master_addresses` subcommand to replace the source master address list. Use this if the set of masters on the source changes: + +```sh +yb-admin --master_addresses \ + alter_universe_replication \ + set_master_addresses +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *source-master-addresses*: Comma-separated list of the source master addresses. + +Use the `add_table` subcommand to add one or more tables to the existing list: + +```sh +yb-admin --master_addresses \ + alter_universe_replication \ + add_table \ + [ ] +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *source-table-ids*: Comma-separated list of source universe table identifiers (`table_id`). +* *bootstrap-ids*: Comma-separated list of source universe bootstrap identifiers (`bootstrap_id`). Obtain these with [bootstrap_cdc_producer](#bootstrap-cdc-producer-comma-separated-list-of-table-ids), using a comma-separated list of source universe table IDs. + +{{< warning title="Important" >}} +Enter the source universe bootstrap IDs in the same order as their corresponding table IDs. +{{< /warning >}} + +Use the `remove_table` subcommand to remove one or more tables from the existing list: + +```sh +yb-admin --master_addresses \ + alter_universe_replication \ + remove_table [ignore-errors] +``` + +* *target-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *source-table-ids*: Comma-separated list of source universe table identifiers (`table_id`). +* `ignore-errors`: Execute the command, ignoring any errors. It is recommended that you contact support before using this option. + +Use the `rename_id` subcommand to rename xCluster replication streams. + +```sh +yb-admin --master_addresses \ + alter_universe_replication \ + rename_id +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The existing replication group identifier. +* *new-replication-group-id*: The new replication group identifier. + +#### delete_universe_replication + +Deletes universe replication for the specified source universe. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_universe_replication +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. + +#### set_universe_replication_enabled + +Sets the universe replication to be enabled or disabled. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + set_universe_replication_enabled [0|1] +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* `0` | `1`: Disabled (`0`) or enabled (`1`). Default is `1`. + +#### get_xcluster_safe_time + +Reports the current xCluster safe time for each namespace, which is the time at which reads will be performed. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_xcluster_safe_time \ + [include_lag_and_skew] +``` + +* *target-master-addresses*: Comma-separated list of target YB-Master hosts and ports. Default is `localhost:7100`. +* `include_lag_and_skew`: Display the `safe_time_lag_sec` and `safe_time_skew_sec`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.11:7100,127.0.0.12:7100,127.0.0.13:7100 \ + get_xcluster_safe_time include_lag_and_skew +``` + +```output +{ + "namespace_id": "000033f1000030008000000000000000", + "namespace_name": "yugabyte", + "safe_time": "2023-04-14 18:34:18.429430", + "safe_time_epoch": "1681522458429430", + "safe_time_lag_sec": "15.66", + "safe_time_skew_sec": "14.95" +} +``` + +* *namespace_id*: ID of the stream. +* *namespace_name*: Name of the stream. +* *safe_time*: Safe time in timestamp format. +* *safe_time_epoch*: The `epoch` of the safe time. +* *safe_time_lag_sec*: Safe time lag is computed as `(current time - current safe time)`. +* *safe_time_skew_sec*: Safe time skew is computed as `(safe time of most caught up tablet - safe time of laggiest tablet)`. + +#### wait_for_replication_drain + +Verify when the producer and consumer are in sync for a given list of `stream_ids` at a given timestamp. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + wait_for_replication_drain \ + [ | minus ] +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *stream-ids*: Comma-separated list of stream IDs. +* *timestamp*: The time to which to wait for replication to drain. If not provided, it will be set to current time in the YB-Master API. +* `minus `: The same format as described in [Restore from a relative time](../../explore/cluster-management/point-in-time-recovery-ysql/#restore-from-a-relative-time), or see [restore_snapshot_schedule](#restore-snapshot-schedule). + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.1:7100,127.0.0.2:7100,127.0.0.3:7100 \ + wait_for_replication_drain 000033f1000030008000000000000000,200033f1000030008000000000000002 minus 1m +``` + +If all streams are caught-up, the API outputs `All replications are caught-up.` to the console. + +Otherwise, it outputs the non-caught-up streams in the following format: + +```output +Found undrained replications: +- Under Stream : + - Tablet: + - Tablet: + // ...... +// ...... +``` + +#### list_cdc_streams + +Lists the xCluster outbound streams. + +{{< note title="Tip" >}} + +Use this command when setting up xCluster replication to verify if any tables are configured for replication. If not, run [setup_universe_replication](#setup-universe-replication); if tables are already configured for replication, use [alter_universe_replication](#alter-universe-replication) to add more tables. + +{{< /note >}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_cdc_streams +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 127.0.0.11:7100,127.0.0.12:7100,127.0.0.13:7100 \ + list_cdc_streams +``` + +#### delete_cdc_stream + +Deletes underlying xCluster outbound streams. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + delete_cdc_stream \ + [force_delete] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *stream-id*: The ID of the xCluster stream. +* `force_delete`: Force the delete operation. + +{{< note title="Note" >}} +This command should only be needed for advanced operations, such as doing manual cleanup of old bootstrapped streams that were never fully initialized, or otherwise failed replication streams. For normal xCluster replication cleanup, use [delete_universe_replication](#delete-universe-replication-source-universe-uuid). +{{< /note >}} + +#### bootstrap_cdc_producer + +Mark a set of tables in preparation for setting up xCluster replication. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + bootstrap_cdc_producer +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *source-table-ids*: Comma-separated list of unique UUIDs associated with the tables (`table_id`). + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 172.0.0.11:7100,127.0.0.12:7100,127.0.0.13:7100 \ + bootstrap_cdc_producer 000030ad000030008000000000004000 +``` + +```output +table id: 000030ad000030008000000000004000, CDC bootstrap id: dd5ea73b5d384b2c9ebd6c7b6d05972c +``` + +{{< note title="Note" >}} +The xCluster bootstrap IDs are the ones that should be used with [setup_universe_replication](#setup-universe-replication) and [alter_universe_replication](#alter-universe-replication). +{{< /note >}} + +#### get_replication_status + +Returns the xCluster replication status of all inbound replication groups. If *replication-group-id* is provided, this will only return streams that belong to an associated replication group. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_replication_status [ ] +``` + +* *target-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses 172.0.0.11:7100,127.0.0.12:7100,127.0.0.13:7100 \ + get_replication_status e260b8b6-e89f-4505-bb8e-b31f74aa29f3 +``` + +```output +statuses { + table_id: "03ee1455f2134d5b914dd499ccad4377" + stream_id: "53441ad2dd9f4e44a76dccab74d0a2ac" + errors { + error: REPLICATION_MISSING_OP_ID + error_detail: "Unable to find expected op id on the producer" + } +} +``` + +#### create_xcluster_checkpoint + +Checkpoint namespaces for use in xCluster replication. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + create_xcluster_checkpoint \ + \ + \ + [automatic_ddl_mode] +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *namespace_names*: Comma-separated list of namespaces. +* `automatic_ddl_mode`: Use Automatic xCluster mode. {{}} + +#### is_xcluster_bootstrap_required + +Checks if the databases of a previously checkpointed replication group requires a bootstrap (backup/restore) of the database to the target universe. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + is_xcluster_bootstrap_required \ + \ + +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *namespace-names*: Comma-separated list of namespaces. + +#### setup_xcluster_replication + +Setup xCluster replication using a previously created [checkpoint](#create-xcluster-checkpoint). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + setup_xcluster_replication \ + \ + +``` + +* *source-master-addresses*: Comma-separated list of source universe YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *target-master-addresses*: Comma-separated list of target universe YB-Master hosts and ports. Default is `localhost:7100`. + +#### drop_xcluster_replication + +Drops the xCluster replication group. If *target-master-addresses* are provided, it will also drop the replication on the target universe. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + drop_xcluster_replication \ + \ + [] +``` + +* *source-master-addresses*: Comma-separated list of source universe YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *target-master-addresses*: Comma-separated list of target universe YB-Master hosts and ports. Default is `localhost:7100`. + +#### add_namespace_to_xcluster_checkpoint + +Adds a database to an existing xCluster checkpoint. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + add_namespace_to_xcluster_checkpoint \ + \ + +``` + +* *source-master-addresses*: Comma-separated list of source universe YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *namespace-name*: The namespace to checkpoint. + +#### add_namespace_to_xcluster_replication + +Adds a database to an existing xCluster replication after it has been checkpointed (and bootstrapped if needed). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + add_namespace_to_xcluster_replication \ + \ + \ + +``` + +* *source-master-addresses*: Comma-separated list of source universe YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *namespace-name*: The namespace name. +* *target-master-addresses*: Comma-separated list of target universe YB-Master hosts and ports. Default is `localhost:7100`. + +#### remove_namespace_from_xcluster_replication + +Removes a database from an existing xCluster replication. If target master addresses are provided, it will also remove the database from the target universe xCluster metadata. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + remove_namespace_from_xcluster_replication \ + \ + \ + [] +``` + +* *source-master-addresses*: Comma-separated list of source universe YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. +* *namespace-name*: The namespace name. +* *target-master-addresses*: Comma-separated list of target universe YB-Master hosts and ports. Default is `localhost:7100`. + +#### list_xcluster_outbound_replication_groups + +List The replication group identifiers for all outbound xCluster replications. If namespace-id is provided, only the replication groups for that namespace will be returned. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_xcluster_outbound_replication_groups \ + [] +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-id*: The namespace UUID. + +#### get_xcluster_outbound_replication_group_info + +Display the status of a specific outbound xCluster replication group. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_xcluster_outbound_replication_group_info \ + +``` + +* *source-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *replication-group-id*: The replication group identifier. + +#### list_universe_replications + +List The replication group identifiers for all inbound xCluster replications. If *namespace-id* is provided, only the replication groups for that namespace will be returned. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + list_universe_replications \ + [] +``` + +* *target-master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *namespace-id*: The namespace UUID. + +--- + +### Decommissioning commands + +#### get_leader_blacklist_completion + +Gets the tablet load move completion percentage for blacklisted nodes. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_leader_blacklist_completion +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + get_leader_blacklist_completion +``` + +#### change_blacklist + +Changes the blacklist for YB-TServer servers. + +After old YB-TServer servers are terminated, you can use this command to clean up the blacklist. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + change_blacklist [ ADD | REMOVE ] : \ + [ : ]... +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* ADD | REMOVE: Adds or removes the specified YB-TServer server from blacklist. +* *ip_addr:port*: The IP address and port of the YB-TServer. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + change_blacklist \ + ADD node1:9100 node2:9100 node3:9100 node4:9100 node5:9100 node6:9100 +``` + +#### change_leader_blacklist + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + change_leader_blacklist [ ADD | REMOVE ] : \ + [ : ]... +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* ADD | REMOVE: Adds or removes the specified YB-Master, or YB-TServer from leader blacklist. +* *ip_addr:port*: The IP address and port of the YB-TServer. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + change_leader_blacklist \ + ADD node1:9100 node2:9100 node3:9100 node4:9100 node5:9100 node6:9100 +``` + +#### leader_stepdown + +Forces the YB-TServer leader of the specified tablet to step down. + +{{< note title="Note" >}} + +Use this command only if recommended by Yugabyte support. + +There is a possibility of downtime. + +{{< /note >}} + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + leader_stepdown +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *tablet-id*: The identifier (ID) of the tablet. +* *dest-ts-uuid*: The destination identifier (UUID) for the new YB-TServer leader. To move leadership **from** the current leader, when you do not need to specify a new leader, use `""` for the value. If you want to transfer leadership intentionally **to** a specific new leader, then specify the new leader. + +{{< note title="Note" >}} + +If specified, *dest-ts-uuid* becomes the new leader. If the argument is empty (`""`), then a new leader will be elected automatically. In a future release, this argument will be optional. See GitHub issue [#4722](https://github.com/yugabyte/yugabyte-db/issues/4722) + +{{< /note >}} + +--- + +### Rebalancing commands + +For information on YB-Master load balancing, see [Data placement and load balancing](../../architecture/yb-master/#tablet-assignments). + +For YB-Master load balancing flags, see [Load balancing flags](../../reference/configuration/yb-master/#load-balancing-flags). + +#### set_load_balancer_enabled + +Enables or disables the load balancer. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + set_load_balancer_enabled [ 0 | 1 ] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* `0` | `1`: Enabled (`1`) is the default. To disable, set to `0`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + set_load_balancer_enabled 0 +``` + +#### get_load_balancer_state + +Returns the cluster load balancer state. + +**Syntax** + +```sh +yb-admin \ + --master_addresses get_load_balancer_state +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +#### get_load_move_completion + +Checks the percentage completion of the data move. + +You can rerun this command periodically until the value reaches `100.0`, indicating that the data move has completed. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_load_move_completion +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +{{< note title="Note" >}} + +The time needed to complete a data move depends on the following: + +* number of tablets and tables +* size of each of those tablets +* SSD transfer speeds +* network bandwidth between new nodes and existing ones + +{{< /note >}} + +For an example of performing a data move and the use of this command, refer to [Change cluster configuration](../../manage/change-cluster-config/). + +**Example** + +In the following example, the data move is `66.6` percent done. + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + get_load_move_completion +``` + +Returns the following percentage: + +```output +66.6 +``` + +#### get_is_load_balancer_idle + +Finds out if the load balancer is idle. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_is_load_balancer_idle +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + get_is_load_balancer_idle +``` + +--- + +### Upgrade + +Refer to [Upgrade a deployment](../../manage/upgrade-deployment/) to learn about how to upgrade a YugabyteDB cluster. + +For information on AutoFlags and how it secures upgrades with new data formats, refer to [AutoFlags](https://github.com/yugabyte/yugabyte-db/blob/master/architecture/design/auto_flags.md). + +#### get_auto_flags_config + +Returns the current AutoFlags configuration of the universe. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + get_auto_flags_config +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin --master_addresses ip1:7100,ip2:7100,ip3:7100 get_auto_flags_config +``` + +If the operation is successful you should see output similar to the following: + +```output +AutoFlags config: +config_version: 1 +promoted_flags { + process_name: "yb-master" + flags: "enable_automatic_tablet_splitting" + flags: "master_enable_universe_uuid_heartbeat_check" + flag_infos { + promoted_version: 1 + } + flag_infos { + promoted_version: 1 + } +} +promoted_flags { + process_name: "yb-tserver" + flags: "regular_tablets_data_block_key_value_encoding" + flags: "remote_bootstrap_from_leader_only" + flags: "ysql_yb_enable_expression_pushdown" + flag_infos { + promoted_version: 1 + } + flag_infos { + promoted_version: 1 + } + flag_infos { + promoted_version: 1 + } +} +``` + +#### promote_auto_flags + +After all YugabyteDB processes have been upgraded to the new version, these features can be enabled by promoting their AutoFlags. + +Note that `promote_auto_flags` is a cluster-level operation; you don't need to run it on every node. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + promote_auto_flags \ + [ [ [force]]] +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. +* *max-flags-class*: The maximum AutoFlag class to promote. Allowed values are `kLocalVolatile`, `kLocalPersisted` and `kExternal`. Default is `kExternal`. +* *promote-non-runtime-flags*: Weather to promote non-runtime flags. Allowed values are `true` and `false`. Default is `true`. +* `force`: Forces the generation of a new AutoFlag configuration and sends it to all YugabyteDB processes even if there are no new AutoFlags to promote. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + promote_auto_flags kLocalPersisted +``` + +If the operation is successful you should see output similar to the following: + +```output +PromoteAutoFlags status: +New AutoFlags were promoted. Config version: 2 +``` + +OR + +```output +PromoteAutoFlags status: +No new AutoFlags to promote +``` + +#### upgrade_ysql + +Upgrades the YSQL system catalog after a successful [YugabyteDB cluster upgrade](../../manage/upgrade-deployment/). + +YSQL upgrades are not required for clusters where [YSQL is not enabled](../../reference/configuration/yb-tserver/#ysql). + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + upgrade_ysql +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + upgrade_ysql +``` + +A successful upgrade returns the following message: + +```output +YSQL successfully upgraded to the latest version +``` + +In certain scenarios, a YSQL upgrade can take longer than 60 seconds, which is the default timeout value for yb-admin. To account for that, run the command with a higher timeout value: + +```sh +./bin/yb-admin \ + --master_addresses ip1:7100,ip2:7100,ip3:7100 \ + --timeout_ms 180000 \ + upgrade_ysql +``` + +Running `upgrade_ysql` is an online operation and doesn't require stopping a running cluster. `upgrade_ysql` is also a cluster-level operation; you don't need to run it on every node. + +{{< note title="Note" >}} +Concurrent operations in a cluster can lead to various transactional conflicts, catalog version mismatches, and read restart errors. This is expected, and should be addressed by rerunning the upgrade command. +{{< /note >}} + +#### finalize_upgrade + +Finalizes an upgrade after a successful [YSQL major upgrade](../../manage/ysql-major-upgrade-local/). You can run this command from any node in the cluster. + +Note that `finalize_upgrade` is a cluster-level operation; you don't need to run it on every node. + +**Syntax** + +```sh +yb-admin \ + --master_addresses \ + finalize_upgrade +``` + +* *master-addresses*: Comma-separated list of YB-Master hosts and ports. Default is `localhost:7100`. + +**Example** + +```sh +./bin/yb-admin --master_addresses 127.0.0.1:7100,127.0.0.2:7100,127.0.0.3:7100 finalize_upgrade +``` + +```output +Finalizing YSQL major catalog upgrade +Finalize successful + +Promoting auto flags +PromoteAutoFlags completed successfully +New AutoFlags were promoted +New config version: 2 + +Upgrading YSQL +YSQL successfully upgraded to the latest version + +Upgrade successfully finalized +``` diff --git a/docs/content/v2.25/admin/yb-ctl.md b/docs/content/v2.25/admin/yb-ctl.md new file mode 100644 index 000000000000..18a88dc0bf0c --- /dev/null +++ b/docs/content/v2.25/admin/yb-ctl.md @@ -0,0 +1,566 @@ +--- +title: yb-ctl - command line tool for administering local YugabyteDB clusters +headerTitle: yb-ctl +linkTitle: yb-ctl +description: Use the yb-ctl command line tool to administer local YugabyteDB clusters used for development and learning. +menu: + preview: + identifier: yb-ctl + parent: admin + weight: 90 +aliases: + - /admin/yb-ctl +type: docs +rightNav: + hideH4: true +--- + +## Overview + +The yb-ctl utility provides a command line interface for administering local clusters used for development and learning. It invokes the [yb-tserver](../../reference/configuration/yb-tserver/) and [yb-master](../../reference/configuration/yb-master/) servers to perform the necessary orchestration. + +yb-ctl is meant for managing local clusters only. This means that a single host machine like a local laptop is used to simulate YugabyteDB clusters even though the YugabyteDB cluster can have 3 nodes or more. For creating multi-host clusters, follow the instructions in the [Deploy](../../deploy/) section. + +yb-ctl can manage a cluster if and only if it was initially created via yb-ctl. This means that clusters created through any other means including those in the [Deploy](../../deploy/) section cannot be administered using yb-ctl. + +{{% note title="Running on macOS" %}} + +Running YugabyteDB on macOS requires additional settings. For more information, refer to [Running on macOS](#running-on-macos). + +{{% /note %}} + +### Installation + +yb-ctl is installed with YugabyteDB and is located in the `bin` directory of the YugabyteDB home directory. + +## Syntax + +Run yb-ctl commands from the YugabyteDB home directory. + +```sh +./bin/yb-ctl [ command ] [ flag1, flag2, ... ] +``` + +### Online help + +To display the online help, run `yb-ctl --help` from the YugabyteDB home directory. + +```sh +$ ./bin/yb-ctl --help +``` + +## Commands + +##### create + +Creates a local YugabyteDB cluster. With no flags, creates a 1-node cluster. + +For more details and examples, see [Create a local cluster](#create-a-local-cluster), [Create a cluster across multiple zones, regions, and clouds](#create-a-cluster-across-multiple-zones-regions-and-clouds), and [Create a local cluster with custom flags](#create-a-local-cluster-with-custom-flags). + +##### start + +Starts the existing cluster or, if not existing, creates and starts the cluster. + +##### stop + +Stops the cluster, if running. + +##### destroy + +Destroys the current cluster. + +For details and examples, see [Destroy a local cluster](#destroy-a-local-cluster). + +##### status + +Displays the current status of the cluster. + +For details and examples, see [Check cluster status](#check-cluster-status). + +##### restart + +Restarts the current cluster all at once. + +For details and examples, see [Restart a cluster](#restart-a-cluster) and [Restart with custom flags](#restart-with-custom-flags). + +##### wipe_restart + +Stops the current cluster, wipes all data files and starts the cluster as before (losing all flags). + +For details and examples, see [Wipe and restart with placement info flags](#wipe-and-restart-with-placement-info-flags). + +##### add_node + +Adds a new node to the current cluster. It also takes an optional flag `--master`, which denotes that the server to add is a yb-master. + +For details and examples, see [Add nodes](#add-nodes) and [Create a cluster across multiple zones, regions, and clouds](#create-a-cluster-across-multiple-zones-regions-and-clouds). + +##### remove_node + +Stops a particular node in the running cluster. It also takes an optional flag `--master`, which denotes that the server is a yb-master. + +For details and examples, see [Stop and remove nodes](#stop-and-remove-nodes). + +##### start_node + +Starts a specified node in the running cluster. It also takes an optional flag `--master`, which denotes that the server is a yb-master. + +##### stop_node + +Stops the specified node in the running cluster. It also takes an optional flag `--master`, which denotes that the server is a yb-master. + +For details and examples, see [Stop and remove nodes](#stop-and-remove-nodes). + +##### restart_node + +Restarts the specified node in a running cluster. It also takes an optional flag `--master`, which denotes that the server is a yb-master. + +For details and examples, see [Restart node with placement information](#restart-node-with-placement-information). + +## Flags + +##### --help, -h + +Shows the help message and then exits. + +##### --binary_dir + +Specifies the directory in which to find the YugabyteDB yb-master and yb-tserver binary files. + +Default: `/bin/` + +##### --data_dir + +Specifies the data directory for YugabyteDB. + +Default: `$HOME/yugabyte-data/` + +Changing the value of this flag after the cluster has already been created is not supported. + +##### --master_flags + +Specifies a list of YB-Master flags, separated by commas. + +For details and examples, see [Create a local cluster with custom flags](#create-a-local-cluster-with-custom-flags). + +##### --tserver_flags + +Specifies a list of YB-TServer flags, separated by commas. + +For details and examples, see [Create a local cluster with custom flags](#create-a-local-cluster-with-custom-flags). + +**Example** + +To enable [YSQL authentication](../../secure/enable-authentication/authentication-ysql/), you can use the `--tserver_flags` flag to add the yb-tserver [`--ysql_enable_auth`](../../reference/configuration/yb-tserver/#ysql-enable-auth) flag to the `yb-ctl create | start | restart` commands. + +```sh +$./bin/yb-ctl create --tserver_flags "ysql_enable_auth=true" +``` + +##### --placement_info + +Specifies the cloud, region, and zone as `cloud.region.zone`, separated by commas. + +Default: `cloud1.datacenter1.rack1` + +For details and examples, see [Create a cluster across multiple zones, regions, and clouds](#create-a-cluster-across-multiple-zones-regions-and-clouds), [Restart node with placement information](#restart-node-with-placement-information), +and [Wipe and restart with placement info flags](#wipe-and-restart-with-placement-info-flags). + +##### --replication_factor, -rf + +Specifies the number of replicas for each tablet. This parameter is also known as Replication Factor (RF). Should be an odd number so that a majority consensus can be established. A minimum value of `3` is needed to create a fault-tolerant cluster as `1` signifies that there is no only 1 replica with no fault tolerance. + +This value also sets the default number of YB-Master servers. + +Default: `1` + +##### --require_clock_sync + +Specifies whether YugabyteDB requires clock synchronization between the nodes in the cluster. + +Default: `false` + +##### --listen_ip + + Specifies the IP address, or port, for a 1-node cluster to listen on. To enable external access of the YugabyteDB APIs and administration ports, set the value to `0.0.0.0`. Note that this flag is not applicable to multi-node clusters. + +Default: `127.0.0.1` + +##### --num_shards_per_tserver + +Number of shards (tablets) to start per tablet server for each table. + +Default: `2` + +##### --timeout-yb-admin-sec + +Timeout, in seconds, for operations that call yb-admin and wait on the cluster. + +##### --timeout-processes-running-sec + +Timeout, in seconds, for operations that wait on the cluster. + +##### --verbose + +Flag to log internal debug messages to `stderr`. + +## Using yb-ctl + +### Running on macOS + +#### Port conflicts + +macOS Monterey enables AirPlay receiving by default, which listens on port 7000. This conflicts with YugabyteDB and causes `yb-ctl start` to fail. Use the [--master_flags](#master-flags) flag when you start the cluster to change the default port number, as follows: + +```sh +./bin/yb-ctl start --master_flags "webserver_port=7001" +``` + +Alternatively, you can disable AirPlay receiving, then start YugabyteDB normally, and then, optionally, re-enable AirPlay receiving. + +#### Loopback addresses + +On macOS, every additional node after the first needs a loopback address configured to simulate the use of multiple hosts or nodes. For example, for a three-node cluster, you add two additional addresses as follows: + +```sh +sudo ifconfig lo0 alias 127.0.0.2 +sudo ifconfig lo0 alias 127.0.0.3 +``` + +The loopback addresses do not persist upon rebooting your computer. + +### Create a local cluster + +To create a local YugabyteDB cluster for development and learning, use the `yb-ctl create` command. + +To ensure that all of the replicas for a given tablet can be placed on different nodes, the number of nodes created with the initial create command is always equal to the replication factor. To expand or shrink the cluster, use the [add_node](#add-nodes) and [remove_node](#stop-and-remove-nodes) commands. + +Each of these initial nodes run a yb-tserver server and a yb-master server. Note that the number of YB-Master servers in a cluster must equal the replication factor for the cluster to be considered operating normally. + +If you are running YugabyteDB on your local computer, you can't run more than one cluster at a time. To set up a new local YugabyteDB cluster using yb-ctl, first [destroy the currently running cluster](#destroy-a-local-cluster). + +#### Create a local 1-node cluster with replication factor of 1 + +```sh +$ ./bin/yb-ctl create +``` + +Note that the default replication factor is 1. + +#### Create a 4-node cluster with replication factor of 3 + +First create a 3-node cluster with replication factor of `3`. + +```sh +$ ./bin/yb-ctl --rf 3 create +``` + +Use `yb-ctl add_node` command to add a node and make it a 4-node cluster. + +```sh +$ ./bin/yb-ctl add_node +``` + +#### Create a 5-node cluster with replication factor of 5 + +```sh +$ ./bin/yb-ctl --rf 5 create +``` + +### Destroy a local cluster + +The following command stops all the nodes and deletes the data directory of the cluster. + +```sh +$ ./bin/yb-ctl destroy +``` + +### Enable external access + +There are essentially two modes with yb-ctl: + +- 1-node RF1 cluster where the bind IP address for all ports can be bound to `0.0.0.0` using the `listen_ip` flag. This is the mode you use if you want to have external access for the database APIs and admin UIs. + + ```sh + $ ./bin/yb-ctl create --listen_ip=0.0.0.0 + ``` + +- Multi-node (say 3-node RF3) cluster where the bind IP addresses are the loopback IP addresses since binding to `0.0.0.0` is no longer possible. Hence, this mode is only meant for internal access. + +### Check cluster status + +To get the status of your local cluster, including the Admin UI URLs for the YB-Master and YB-TServer, run the `yb-ctl status` command. + +```sh +$ ./bin/yb-ctl status +``` + +Following is the output shown for a 3-node RF3 cluster. + +```output +---------------------------------------------------------------------------------------------------- +| Node Count: 3 | Replication Factor: 3 | +---------------------------------------------------------------------------------------------------- +| JDBC : jdbc:postgresql://127.0.0.1:5433/yugabyte | +| YSQL Shell : bin/ysqlsh | +| YCQL Shell : bin/ycqlsh | +| Web UI : http://127.0.0.1:7000/ | +| Cluster Data : /Users/testuser12/yugabyte-data | +---------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------- +| Node 1: yb-tserver (pid 27389), yb-master (pid 27380) | +---------------------------------------------------------------------------------------------------- +| JDBC : jdbc:postgresql://127.0.0.1:5433/yugabyte | +| YSQL Shell : bin/ysqlsh | +| YCQL Shell : bin/ycqlsh | +| data-dir[0] : /Users/testuser12/yugabyte-data/node-1/disk-1/yb-data | +| yb-tserver Logs : /Users/testuser12/yugabyte-data/node-1/disk-1/yb-data/tserver/logs | +| yb-master Logs : /Users/testuser12/yugabyte-data/node-1/disk-1/yb-data/master/logs | +---------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------- +| Node 2: yb-tserver (pid 27392), yb-master (pid 27383) | +---------------------------------------------------------------------------------------------------- +| JDBC : jdbc:postgresql://127.0.0.2:5433/yugabyte | +| YSQL Shell : bin/ysqlsh -h 127.0.0.2 | +| YCQL Shell : bin/ycqlsh 127.0.0.2 | +| data-dir[0] : /Users/testuser12/yugabyte-data/node-2/disk-1/yb-data | +| yb-tserver Logs : /Users/testuser12/yugabyte-data/node-2/disk-1/yb-data/tserver/logs | +| yb-master Logs : /Users/testuser12/yugabyte-data/node-2/disk-1/yb-data/master/logs | +---------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------- +| Node 3: yb-tserver (pid 27395), yb-master (pid 27386) | +---------------------------------------------------------------------------------------------------- +| JDBC : jdbc:postgresql://127.0.0.3:5433/yugabyte | +| YSQL Shell : bin/ysqlsh -h 127.0.0.3 | +| YCQL Shell : bin/ycqlsh 127.0.0.3 | +| data-dir[0] : /Users/testuser12/yugabyte-data/node-3/disk-1/yb-data | +| yb-tserver Logs : /Users/testuser12/yugabyte-data/node-3/disk-1/yb-data/tserver/logs | +| yb-master Logs : /Users/testuser12/yugabyte-data/node-3/disk-1/yb-data/master/logs | +---------------------------------------------------------------------------------------------------- +``` + +### Start and stop an existing cluster + +Start the existing cluster, or create and start a cluster (if one doesn't exist) by running the `yb-ctl start` command. + +```sh +$ ./bin/yb-ctl start +``` + +Stop a cluster so that you can start it later by running the `yb-ctl stop` command. + +```sh +$ ./bin/yb-ctl stop +``` + +### Add and remove nodes + +#### Add nodes + +This will start a new YB-TServer server and give it a new `node_id` for tracking purposes. + +```sh +$ ./bin/yb-ctl add_node +``` + +#### Stop and remove nodes + +We can stop a node by executing the `yb-ctl stop` command. The command takes the `node_id` of the node that has to be removed as input. Stop node command expects a node id which denotes the index of the server that needs to be stopped. It also takes an optional flag `--master`, which denotes that the server is a yb-master. + +```sh +$ ./bin/yb-ctl stop_node 3 +``` + +We can also pass an optional flag `--master`, which denotes that the server is a yb-master. + +```sh +$ ./bin/yb-ctl stop_node 3 --master +``` + +Currently `stop_node` and `remove_node` implement exactly the same behavior. So they can be used interchangeably. + +#### Test failure of a node + +You can test the failure of a node in a 3-node RF3 cluster by killing 1 instance of yb-tserver and 1 instance of yb-master by using the following commands. + +```sh +./bin/yb-ctl destroy +./bin/yb-ctl --rf 3 create +./bin/yb-ctl stop_node 3 +./bin/yb-ctl stop_node 3 --master +./bin/yb-ctl start_node 3 +./bin/yb-ctl start_node 3 --master +``` + +The command `./bin/yb-ctl start_node 3` starts the third YB-TServer. This displays an error, though the command succeeds. This is because only 2 YB-Masters are present in the cluster at this point. This is not an error in the cluster configuration but rather a warning to highlight that the cluster is under-replicated and does not have enough YB-Masters to ensure continued fault tolerance. See [issue 3506](https://github.com/yugabyte/yugabyte-db/issues/3506). + +## Default directories for local clusters + +YugabyteDB clusters created using the yb-ctl utility are created locally on the same host and simulate a distributed multi-host cluster. + +### Data directory + +YugabyteDB cluster data is installed in `$HOME/yugabyte-data/`, containing the following: + +```sh +cluster_config.json +initdb.log +node-#/ +node-#/disk-#/ +``` + +#### Node directories + +For each simulated YugabyteDB node, a `yugabyte-data` subdirectory, named `node-#` (where # is the number of the node), is created. + +Example: `/yugabyte-data/node-#/` + +Each `node-#` directory contains the following: + +```sh +yugabyte-data/node-#/disk-#/ +``` + +#### Disk directories + +For each simulated disk, a `disk-#` subdirectory is created in each `/yugabyte-data/node-#` directory. + +Each `disk-#` directory contains the following: + +```sh +master.err +master.out +pg_data/ +tserver.err +tserver.out +yb-data/ +``` + +#### Logs + +YB-Master logs are added in the following location: + +```sh +yugabyte-data/node-#/disk-#/master.out +yugabyte-data/node-#/disk-#/yb-data/master/logs +``` + +YB-TServer logs are added in the following location: + +```sh +yugabyte-data/node-#/disk-#/tserver.out +yugabyte-data/node-#/disk-#/yb-data/tserver/logs +``` + +## Advanced commands + +### Create a cluster across multiple zones, regions, and clouds + +You can pass the placement information for nodes in a cluster from the command line. The placement information is provided as a set of (cloud, region, zone) tuples separated by commas. Each cloud, region and zone entry is separated by dots. + +```sh +$ ./bin/yb-ctl --rf 3 create --placement_info "cloud1.region1.zone1,cloud2.region2.zone2" +``` + +The total number of placement information entries cannot be more than the replication factor (this is because you would not be able to satisfy the data placement constraints for this replication factor). If the total number of placement information entries is lesser than the replication factor, the placement information is passed down to the node in a round robin approach. + +To add a node: + +```sh +$ ./bin/yb-ctl add_node --placement_info "cloud1.region1.zone1" +``` + +### Create a local cluster with custom flags + +When you use yb-ctl, you can pass "custom" flags (flags unavailable directly in yb-ctl) to the YB-Master and YB-TServer servers. + +```sh +$ ./bin/yb-ctl --rf 1 create --master_flags "log_cache_size_limit_mb=128,log_min_seconds_to_retain=20,master_backup_svc_queue_length=70" --tserver_flags "log_inject_latency=false,log_segment_size_mb=128,raft_heartbeat_interval_ms=1000" +``` + +To add a node with custom YB-TServer flags: + +```sh +$ ./bin/yb-ctl add_node --tserver_flags "log_inject_latency=false,log_segment_size_mb=128" +``` + +To add a node with custom YB-Master flags: + +```sh +$ ./bin/yb-ctl add_node --master_flags "log_cache_size_limit_mb=128,log_min_seconds_to_retain=20" +``` + +To handle flags whose value contains commas or equals, quote the whole key-value pair with double-quotes: + +```sh +$ ./bin/yb-ctl create --tserver_flags 'ysql_enable_auth=false,"vmodule=tablet_service=1,pg_doc_op=1",ysql_prefetch_limit=1000' +``` + +### Restart a cluster + +The `yb-ctl restart` command can be used to restart a cluster. Please note that if you restart the cluster, all custom defined flags and placement information will be lost. Nevertheless, you can pass the placement information and custom flags in the same way as they are passed in the `yb-ctl create` command. + +```sh +$ ./bin/yb-ctl restart +``` + +- Restart with cloud, region and zone flags + +```sh +$ ./bin/yb-ctl restart --placement_info "cloud1.region1.zone1" +``` + +### Restart with custom flags + +```sh +$ ./bin/yb-ctl restart --master_flags "log_cache_size_limit_mb=128,log_min_seconds_to_retain=20,master_backup_svc_queue_length=70" --tserver_flags "log_inject_latency=false,log_segment_size_mb=128,raft_heartbeat_interval_ms=1000" +``` + +### Restart a node + +The `yb-ctl restart` first stops the node and then starts it again. At this point of time, the node is not decommissioned from the cluster. Thus one of the primary advantages of this command is that it can be used to clear old flags and pass in new ones. Just like create, you can pass the cloud/region/zone and custom flags in the `yb-ctl restart` command. + +```sh +$ ./bin/yb-ctl restart_node 2 + +``` + +#### Restart yb-master on a node + +```sh +$ ./bin/yb-ctl restart_node 2 --master +``` + +#### Restart node with placement information + +```sh +$ ./bin/yb-ctl restart_node 2 --placement_info "cloud1.region1.zone1" +``` + +#### Restart node with flags + +```sh +$ ./bin/yb-ctl restart_node 2 --master --master_flags "log_cache_size_limit_mb=128,log_min_seconds_to_retain=20" +``` + +### Wipe and restart a cluster + +The `yb-ctl wipe_restart` command stops all the nodes, removes the underlying data directories, and then restarts with the same number of nodes that you had in your previous configuration. + +Just like the `yb-ctl restart` command, the custom-defined flags and placement information will be lost during `wipe_restart`, though you can pass placement information and custom flags in the same way as they are passed in the `yb-ctl create` command. + +```sh +$ ./bin/yb-ctl wipe_restart +``` + +#### Wipe and restart with placement info flags + +```sh +$ ./bin/yb-ctl wipe_restart --placement_info "cloud1.region1.zone1" +``` + +#### Wipe and restart with custom flags + +```sh +$ ./bin/yb-ctl wipe_restart --master_flags "log_cache_size_limit_mb=128,log_min_seconds_to_retain=20,master_backup_svc_queue_length=70" --tserver_flags "log_inject_latency=false,log_segment_size_mb=128,raft_heartbeat_interval_ms=1000" +``` diff --git a/docs/content/v2.25/admin/yb-docker-ctl.md b/docs/content/v2.25/admin/yb-docker-ctl.md new file mode 100644 index 000000000000..f2c64443ef39 --- /dev/null +++ b/docs/content/v2.25/admin/yb-docker-ctl.md @@ -0,0 +1,248 @@ +--- +title: yb-docker-ctl - command line tool for administering local Docker-based clusters +headerTitle: yb-docker-ctl +linkTitle: yb-docker-ctl +description: Use the yb-docker-ctl command line tool to administer local Docker-based YugabyteDB clusters for development and learning. +menu: + preview: + identifier: yb-docker-ctl + parent: admin + weight: 100 +type: docs +--- + +{{< warning title="yb-docker-ctl is deprecated" >}} + +yb-docker-ctl is no longer maintained. The recommended method to run YugabyteDB in Docker is to use [yugabyted](../../reference/configuration/yugabyted/#create-a-multi-region-cluster-in-docker). For more information, see the [Quick Start](/preview/quick-start/docker/). + +{{< /warning >}} + +The `yb-docker-ctl` utility provides a basic command line interface (CLI), or shell, for administering a local Docker-based cluster for development and learning. It manages the [YB-Master](../../reference/configuration/yb-master/) and [YB-TServer](../../reference/configuration/yb-tserver/) containers to perform the necessary administration. + +{{% note title="macOS Monterey" %}} + +macOS Monterey enables AirPlay receiving by default, which listens on port 7000. This conflicts with YugabyteDB and causes `yb-docker-ctl create` to fail. Use the `--master_flags` flag when you start the cluster to change the default port number, as follows: + +```sh +./bin/yb-docker-ctl create --master_flags "webserver_port=7001" +``` + +Alternatively, you can disable AirPlay receiving, then start YugabyteDB normally, and then, optionally, re-enable AirPlay receiving. + +{{% /note %}} + +## Download + +```sh +$ mkdir ~/yugabyte && cd ~/yugabyte +``` + +```sh +$ wget https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/bin/yb-docker-ctl && chmod +x yb-docker-ctl +``` + +## Online help + +Run `yb-docker-ctl --help` to display the online help. + +```sh +$ ./yb-docker-ctl -h +``` + +## Syntax + +```sh +yb-docker-ctl [ command ] [ arguments ] + +``` + +## Commands + +### create + +Creates a local YugabyteDB cluster. + +### add_node + +Adds a new local YugabyteDB cluster node. + +### status + +Displays the current status of the local YugabyteDB cluster. + +### destroy + +Destroys the local YugabyteDB cluster. + +### stop_node + +Stops the specified local YugabyteDB cluster node. + +### start_node + +Starts the specified local YugabyteDB cluster node. + +### stop + +Stops the local YugabyteDB cluster so that it can be started later. + +### start + +Starts the local YugabyteDB cluster, if it already exists. + +### remove_node + +Stops the specified local YugabyteDB cluster node. + +## Flags + +### --help, -h + +Displays the online help and then exits. + +### --tag + +Use with `create` and `add_node` commands to specify a specific Docker image tag (version). If not included, then latest Docker image is used. + +## Create a cluster + +Use the `yb-docker-ctl create` command to create a local Docker-based cluster for development and learning. + +The number of nodes created when you use the `yb-docker-ctl create` command is always equal to the replication factor (RF), ensuring that all of the replicas for a given tablet can be placed on different nodes. With the [`add_node`](#add-a-node) and [`remove_node`](#remove-a-node) commands, the size of the cluster can thereafter be expanded or shrunk as needed. + +### Specify a docker image tag + +By default, the `create` and `add_node` commands pull the latest Docker Hub `yugabytedb/yugabyte` image to create clusters or add nodes. + +To pull an earlier Docker image tag (version), add the `--tag ` flag to use an earlier release. + +In the following example, a 1-node YugabyteDB cluster is created using the earlier v1.3.2.1 release that has a tag of `1.3.2.1-b2`. + +```sh +$ ./yb-docker-ctl create --tag 1.3.2.1-b2 +``` + +To get the correct tag value, see the [Docker Hub listing of tags for `yugabytedb/yugabyte`](https://hub.docker.com/r/yugabytedb/yugabyte/tags). + +### Create a 1-node local cluster with replication factor of 1 + +To create a 1-node local YugabyteDB cluster for development and learning, run the default yb-docker-ctl command. By default, this creates a 1-node cluster with a replication factor (RF) of 1. Note that the `yb-docker-ctl create` command pulls the latest `yugabytedb/yugabyte` image at the outset, in case the image has not yet downloaded or is not the latest version. + +```sh +$ ./yb-docker-ctl create +``` + +### Create a 3-node local cluster with replication factor of 3 + +When you create a 3-node local Docker-based cluster using the `yb-docker-ctl create` command, each of the initial nodes run a yb-tserver process and a yb-master process. Note that the number of YB-Masters in a cluster has to equal to the replication factor (RF) for the cluster to be considered as operating normally and the number of YB-TServers is equal to be the number of nodes. + +To create a 3-node local Docker-based cluster for development and learning, run the following yb-docker-ctl command. + +```sh +$ ./yb-docker-ctl create --rf 3 +``` + +```output +docker run --name yb-master-n1 --privileged -p 7000:7000 --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-master --fs_data_dirs=/mnt/disk0,/mnt/disk1 --master_addresses=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-master-n1:7100 +Adding node yb-master-n1 +docker run --name yb-master-n2 --privileged --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-master --fs_data_dirs=/mnt/disk0,/mnt/disk1 --master_addresses=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-master-n2:7100 +Adding node yb-master-n2 +docker run --name yb-master-n3 --privileged --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-master --fs_data_dirs=/mnt/disk0,/mnt/disk1 --master_addresses=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-master-n3:7100 +Adding node yb-master-n3 +docker run --name yb-tserver-n1 --privileged -p 9000:9000 -p 9042:9042 -p 6379:6379 --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-tserver --fs_data_dirs=/mnt/disk0,/mnt/disk1 --tserver_master_addrs=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-tserver-n1:9100 +Adding node yb-tserver-n1 +docker run --name yb-tserver-n2 --privileged --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-tserver --fs_data_dirs=/mnt/disk0,/mnt/disk1 --tserver_master_addrs=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-tserver-n2:9100 +Adding node yb-tserver-n2 +docker run --name yb-tserver-n3 --privileged --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-tserver --fs_data_dirs=/mnt/disk0,/mnt/disk1 --tserver_master_addrs=yb-master-n1:7100,yb-master-n2:7100,yb-master-n3:7100 --rpc_bind_addresses=yb-tserver-n3:9100 +Adding node yb-tserver-n3 +PID Type Node URL Status Started At +11818 tserver yb-tserver-n3 http://172.19.0.7:9000 Running 2017-11-28T23:33:00.369124907Z +11632 tserver yb-tserver-n2 http://172.19.0.6:9000 Running 2017-11-28T23:32:59.874963849Z +11535 tserver yb-tserver-n1 http://172.19.0.5:9000 Running 2017-11-28T23:32:59.444064946Z +11350 master yb-master-n3 http://172.19.0.4:9000 Running 2017-11-28T23:32:58.899308826Z +11231 master yb-master-n2 http://172.19.0.3:9000 Running 2017-11-28T23:32:58.403788411Z +11133 master yb-master-n1 http://172.19.0.2:9000 Running 2017-11-28T23:32:57.905097927Z +``` + +### Create a 5-node local cluster with replication factor of 5 + +```sh +$ ./yb-docker-ctl create --rf 5 +``` + +## Check cluster status + +Get the status of your local cluster, including the URLs for the Admin UI for each YB-Master and YB-TServer. + +```sh +$ ./yb-docker-ctl status +``` + +```output +PID Type Node URL Status Started At +11818 tserver yb-tserver-n3 http://172.19.0.7:9000 Running 2017-11-28T23:33:00.369124907Z +11632 tserver yb-tserver-n2 http://172.19.0.6:9000 Running 2017-11-28T23:32:59.874963849Z +11535 tserver yb-tserver-n1 http://172.19.0.5:9000 Running 2017-11-28T23:32:59.444064946Z +11350 master yb-master-n3 http://172.19.0.4:9000 Running 2017-11-28T23:32:58.899308826Z +11231 master yb-master-n2 http://172.19.0.3:9000 Running 2017-11-28T23:32:58.403788411Z +11133 master yb-master-n1 http://172.19.0.2:9000 Running 2017-11-28T23:32:57.905097927Z +``` + +## Add a node + +Add a new node to the cluster. This will start a new yb-tserver process and give it a new `node_id` for tracking purposes. + +```sh +$ ./yb-docker-ctl add_node +``` + +```output +docker run --name yb-tserver-n4 --net yb-net --detach yugabytedb/yugabyte:latest /home/yugabyte/yb-tserver --fs_data_dirs=/mnt/disk0,/mnt/disk1 --tserver_master_addrs=04:7100,04:7100,04:7100 --rpc_bind_addresses=yb-tserver-n4:9100 +Adding node yb-tserver-n4 +``` + +## Remove a node + +Remove a node from the cluster by executing the following command. The command takes the `node_id` of the node to be removed as input. + +### Help + +```sh +$ ./yb-docker-ctl remove_node --help +``` + +```output +usage: yb-docker-ctl remove_node [-h] node + +positional arguments: + node_id Index of the node to remove + +optional arguments: + -h, --help show this help message and exit +``` + +### Example + +```sh +$ ./yb-docker-ctl remove_node 3 +``` + +```output +Stopping node :yb-tserver-n3 +``` + +## Destroy cluster + +The `yb-docker-ctl destroy` command below destroys the local cluster, including deletion of the data directories. + +```sh +$ ./yb-docker-ctl destroy +``` + +## Upgrade container image + +The following `docker pull` command below upgrades the Docker image of YugabyteDB to the latest version. + +```sh +$ docker pull yugabytedb/yugabyte +``` diff --git a/docs/content/v2.25/admin/yb-ts-cli.md b/docs/content/v2.25/admin/yb-ts-cli.md new file mode 100644 index 000000000000..37a3bb1ce5cd --- /dev/null +++ b/docs/content/v2.25/admin/yb-ts-cli.md @@ -0,0 +1,339 @@ +--- +title: yb-ts-cli - command line tool for advanced yb-tserver operations +headerTitle: yb-ts-cli +linkTitle: yb-ts-cli +description: Use the yb-ts-cli command line utility to perform advanced YB-TServer operations. +menu: + preview: + identifier: yb-ts-cli + parent: admin + weight: 50 +type: docs +--- + +yb-ts-cli is a command line tool that can be used to perform an operation on a particular tablet server ([yb-tserver](../../reference/configuration/yb-tserver/)). Some of the commands perform operations similar to [yb-admin commands](../yb-admin/). The yb-admin commands focus on cluster administration, the yb-ts-cli commands apply to specific YB-TServer nodes. + +yb-ts-cli is a binary file installed with YugabyteDB and is located in the `bin` directory of the YugabyteDB home directory. + +## Syntax + +```sh +yb-ts-cli [ --server_address=: ] +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* *command*: The operation to be performed. See [Commands](#commands). +* *flags*: The flags to be applied to the command. See [Flags](#flags). + +### Online help + +To display the available online help, run yb-ts-cli without any commands or flags at the YugabyteDB home directory. + +```sh +./bin/yb-ts-cli +``` + +## Commands + +### are_tablets_running + +If all tablets are running, returns "All tablets are running". + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] are_tablets_running +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### is_server_ready + +Prints the number of tablets that have not yet bootstrapped. +If all tablets have bootstrapped, returns "Tablet server is ready". + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] is_server_ready +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### clear_server_metacache + +Clears all metacaches that are stored on a specified server. Works on both YB-Master (port 9100) and YB-TServer (port 7100) processes. Tablet servers and masters use MetaCaches to cache information about which tablet server hosts which tablet. Because these caches could become stale in some cases, you may want to use this command to clear the MetaCaches on a particular tablet server or master. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] clear_server_metacache +``` + +* *host*:*port*: The *host* and *port* of the tablet/master server. Default is `localhost:9100`. + +### compact_all_tablets + +Compact all tablets on the tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] compact_all_tablets +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### compact_tablet + +Compact the specified tablet on the tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] compact_tablet +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* *tablet_id*: The identifier of the tablet to compact. + +### count_intents + +Print the count of uncommitted intents (or [provisional records](../../architecture/transactions/distributed-txns/#provisional-records)). Helpful for debugging transactional workloads. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] count_intents +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### current_hybrid_time + +Prints the value of the current [hybrid time](../../architecture/transactions/transactions-overview/#hybrid-logical-clocks). + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] current_hybrid_time +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### delete_tablet + +Deletes the tablet with the specified tablet ID (`tablet_id`) and reason. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] delete_tablet "" +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* *tablet_id*: The identifier (ID) for the tablet. +* *reason-string*: Text string providing information on why the tablet was deleted. + +### dump_tablet + +Dump, or export, the specified tablet ID (`tablet_id`). + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] dump_tablet +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* *tablet_id*: The identifier (ID) for the tablet. + +### flush_all_tablets + +Flush all tablets on the tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] flush_all_tablets +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### flush_tablet + +Flush the specified tablet on the tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] flush_tablet +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* *tablet_id*: The identifier of the tablet to compact. + +### list_tablets + +Lists the tablets on the specified tablet server, displaying the following properties: column name, tablet ID, state, table name, shard, and schema. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] list_tablets +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +### reload_certificates + +Trigger a reload of TLS certificates and private keys from disk on the specified (master or tablet) server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] reload_certificates +``` + +* *host*:*port*: The *host* and *port* of the master or tablet server. Default is `localhost:9100`. + +### remote_bootstrap + +Trigger a remote bootstrap of a tablet from another tablet server to the specified tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] remote_bootstrap +``` + +* *host*:*port*: The *host* and *port* of the tablet server running the remote bootstrap. Default is `localhost:9100`. +* *source_host*: The *host* or *host* and *port* of the tablet server to bootstrap from. +* *tablet_id*: The identifier of the tablet to trigger a remote bootstrap for. + +See [Manual remote bootstrap of failed peer](/preview/troubleshoot/cluster/replace_failed_peers/) for example usage. + +### set_flag + +Sets the specified configuration flag for the tablet server. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] set_flag [ --force ] +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. +* `--force`: Flag to allow a change to a flag that is not explicitly marked as runtime-settable. Note that the change may be ignored on the server or may cause the server to crash, if unsafe values are provided. See [--force](#force). +* *flag*: The yb-tserver configuration flag (without the `--` prefix) to be set. See [yb-tserver](../../reference/configuration/yb-tserver/) +* *value*: The value to be applied. + +{{< note title="Important" >}} + +The `set_flag` command changes the in-memory value of the specified flag, atomically, for a running server, and can alter its behavior. **The change does NOT persist across restarts.** + +In practice, there are some flags that are runtime safe to change (runtime-settable) and some that are not. For example, the bind address of the server cannot be changed at runtime, because the server binds just once at startup. While most of the flags are probably runtime-settable, you need to review the flags and note in the configuration pages which flags are not runtime-settable. (See GitHub issue [#3534](https://github.com/yugabyte/yugabyte-db/issues/3534)). + +One typical operational flow is that you can use this to modify runtime flags in memory and then out of band also modify the configuration file that the server uses to start. This allows for flags to be changed on running servers, without executing a restart of the server. + +{{< /note >}} + +### status + +Prints the status of the tablet server, including information on the node instance, bound RPC addresses, bound HTTP addresses, and version information. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] status +``` + +* *host*:*port*: The *host* and *port* of the tablet server. Default is `localhost:9100`. + +For an example, see [Return the status of a tablet server](#return-the-status-of-a-tablet-server) + +### refresh_flags + +Refresh flags that are loaded from the configuration file. Works on both YB-Master (port 9100) and YB-TServer (port 7100) process. No parameters needed. + +Each process needs to have the following command issued, for example, issuing the command on one YB-TServer won't update the flags on the other YB-TServers. + +**Syntax** + +```sh +yb-ts-cli [ --server_address=: ] refresh_flags +``` + +* *host*:*port*: The *host* and *port* of the YB-Master or YB-TServer. Default is `localhost:9100`. + +## Flags + +The following flags can be used, when specified, with the commands above. + +### --force + +Use this flag with the [`set_flag`](#set-flag) command to allow a change to a flag that is not explicitly marked as runtime-settable. Note that the change may be ignored on the server or may cause the server to crash, if unsafe values are provided. + +Default: `false` + +### --server-address + +The address (*host* and *port*) of the tablet server to run against. + +Default: `localhost:9100` + +### --timeout_ms + +The duration, in milliseconds (ms), before the RPC request times out. + +Default: `60000` (1000 ms = 1 sec) + +### --certs_dir_name + +To connect to a cluster with TLS enabled, you must include the `--certs_dir_name` flag with the directory location where the root certificate is located. + +Default: `""` + +## Examples + +### Return the status of a tablet server + +```sh +./bin/yb-ts-cli --server_address=127.0.0.1 --certs_dir_name="/path/to/dir/name" status +``` + +```output +node_instance { + permanent_uuid: "237678d61086489991080bdfc68a28db" + instance_seqno: 1579278624770505 +} +bound_rpc_addresses { + host: "127.0.0.1" + port: 9100 +} +bound_http_addresses { + host: "127.0.0.1" + port: 9000 +} +version_info { + git_hash: "83610e77c7659c7587bc0c8aea76db47ff8e2df1" + build_hostname: "yb-macmini-6.dev.yugabyte.com" + build_timestamp: "06 Jan 2020 17:47:22 PST" + build_username: "jenkins" + build_clean_repo: true + build_id: "743" + build_type: "RELEASE" + version_number: "2.0.10.0" + build_number: "4" +} +``` + +### Display the current hybrid time + +```sh +./bin/yb-ts-cli --server_address=yb-tserver-1:9100 current_hybrid_time +``` + +```output +6470519323472437248 +``` diff --git a/docs/content/v2.25/admin/ysql-dump.md b/docs/content/v2.25/admin/ysql-dump.md new file mode 100644 index 000000000000..2bfeb3b72bd0 --- /dev/null +++ b/docs/content/v2.25/admin/ysql-dump.md @@ -0,0 +1,365 @@ +--- +title: ysql_dump +headerTitle: ysql_dump +linkTitle: ysql_dump +description: Back up a specified YSQL database into plain-text, SQL script file. +headcontent: Extract a YugabyteDB database into a SQL script file. +menu: + preview: + identifier: ysql-dump + parent: admin + weight: 70 +type: docs +--- + +## Overview + +ysql_dump is a utility for backing up a YugabyteDB database into a plain-text, SQL script file. ysql_dump makes consistent backups, even if the database is being used concurrently. ysql_dump does not block other users accessing the database (readers or writers). + +ysql_dump only dumps a single database. To backup global objects that are common to all databases in a cluster, such as roles, use [ysql_dumpall](../ysql-dumpall/). + +Dumps are output in plain-text, SQL script files. Script dumps are plain-text files containing the SQL statements required to reconstruct the database to the state it was in at the time it was saved. To restore from such a script, import it using the [ysqlsh \i](../../api/ysqlsh-meta-commands/#i-filename-include-filename) meta-command. Script files can be used to reconstruct the database even on other machines and other architectures; with some modifications, even on other SQL database products. + +While running ysql_dump, you should examine the output for any warnings (printed on standard error). + +The ysql_dump utility is derived from the PostgreSQL [pg_dump](https://www.postgresql.org/docs/10/app-pgdump.html) utility. + +### Installation + +ysql_dump is installed with YugabyteDB and is located in the `postgres/bin` directory of the YugabyteDB home directory. + +### Online help + +Run `ysql_dump --help` to display the online help. + +## Syntax + +```sh +ysql_dump [ ... ] [ ... ] [ ] +``` + +- *connection-option*: See [Database connection options](#database-connection-options). +- *content-output-format-option*: See [Content and output format options](#content-and-output-format-options) +- *dbname*: The name of the database. + +## Content and output format options + +The following command line options control the content and format of the output. + +#### *dbname* + +Specifies the name of the database to be dumped. If this is not specified, the environment variable PGDATABASE is used. If that is not set, the user name specified for the connection is used. + +#### -a, --data-only + +Dump only the data, not the schema (data definitions). Table data, large objects, and sequence values are dumped. + +#### -b, --blobs + +Include large objects in the dump. This is the default behavior except when [-n|--schema](#n-schema-schema-schema), [-t|--table](#t-table-table-table), or [-s|--schema-only](#s-schema-only) is specified. The `-b|--blobs` option is therefore only useful to add large objects to dumps where a specific schema or table has been requested. Note that blobs are considered data and therefore will be included when `-a|--data-only` is used, but not when [-s|--schema-only](#s-schema-only) is used. + +#### -B, --no-blobs + +Exclude large objects in the dump. + +When both `-b|--blobs` and `-B|--no-blobs` are given, the behavior is to output large objects, when data is being dumped, see `-b|--blobs` option. + +#### -c, --clean + +Output statements to clean (drop) database objects prior to outputting the statements for creating them. (Unless `--if-exists` is also specified, restore might generate some harmless error messages, if any objects were not present in the destination database.) + +#### -C, --create + +Begin the output with a statement to create the database itself and reconnect to the created database. (With a script of this form, it doesn't matter which database in the destination installation you connect to before running the script.) If `-c|--clean` is also specified, the script drops and recreates the target database before reconnecting to it. + +#### -E *encoding*, --encoding=*encoding* + +Create the dump in the specified character set encoding. By default, the dump is created in the database encoding. (Another way to get the same result is to set the PGCLIENTENCODING environment variable to the desired dump encoding.) + +#### -f *file*, --file=*file* + +Send output to the specified file. This parameter can be omitted for file-based output formats, in which case the standard output is used. + +#### -m *addresses*, --masters=*addresses* + +Comma-separated list of YB-Master hosts and ports. + +#### -n *schema*, --schema=*schema* + +Dump only schemas matching *schema*; this selects both the schema itself, and all its contained objects. When this option is not specified, all non-system schemas in the target database will be dumped. Multiple schemas can be selected by writing multiple `-n|--schema` options. Also, the *schema* parameter is interpreted as a pattern according to the same rules used by the `ysqlsh \d` commands, so multiple schemas can also be selected by writing wildcard characters in the pattern. When using wildcards, be careful to quote the pattern if needed to prevent the shell from expanding the wildcards. + +{{< note title="Note" >}} + +When `-n|--schema` is specified, ysql_dump makes no attempt to dump any other database objects that the selected schemas might depend upon. Therefore, there is no guarantee that the results of a specific-schema dump can be successfully restored by themselves into a clean database. + +{{< /note >}} + +{{< note title="Note" >}} + +Non-schema objects, such as blobs, are not dumped when `-n|--schema` is specified. You can add blobs back to the dump with the `-b|--blobs` option. + +{{< /note >}} + +#### -N *schema*, --exclude-schema=*schema* + +Do not dump any schemas matching the schema pattern. The pattern is interpreted according to the same rules as for [-n|--schema](#n-schema-schema-schema) option. `-N|--exclude-schema` can be given more than once to exclude schemas matching any of several patterns. + +When both `-n|--schema` and `-N|--exclude-schema` are given, the behavior is to dump just the schemas that match at least one `-n|--schema` option but no `-N|--exclude-schema` options. If `-N|--exclude-schema` appears without `-n|--schema`, then schemas matching `-N|--exclude-schema` are excluded from what is otherwise a normal dump. + +#### -o, --oids + +Dump object identifiers (OIDs) as part of the data for every table. Use this option if your application references the OID columns in some way (for example, in a foreign key constraint). Otherwise, this option should not be used. + +#### -O, --no-owner + +Do not output statements to set ownership of objects to match the original database. By default, ysql_dump issues ALTER OWNER or SET SESSION AUTHORIZATION statements to set ownership of created database objects. These statements will fail when the script is run unless it is started by a superuser (or the same user that owns all of the objects in the script). To make a script that can be restored by any user, but will give that user ownership of all the objects, specify `-O|--no-owner`. + +#### -s, --schema-only + +Dump only the object definitions (schema), not data. + +This option is the inverse of [-a|--data-only](#a-data-only). + +(Do not confuse this with the [-n|--schema](#n-schema-schema-schema) option, which uses the word "schema" in a different meaning.) + +To exclude table data for only a subset of tables in the database, see [--exclude-table-data](#exclude-table-data-table). + +#### -S *username*, --superuser=*username* + +Specify the superuser username to use when disabling triggers. This is relevant only if [--disable-triggers](#disable-triggers) is used. (Usually, it's better to leave this out, and instead start the resulting script as superuser.) + +#### -t *table*, --table=*table* + +Dump only tables with names matching *table*. For this purpose, "table" includes views, materialized views, sequences, and foreign tables. Multiple tables can be selected by writing multiple `-t|--table` options. Also, the table parameter is interpreted as a pattern according to the same rules used by `ysqlsh \d` commands, so multiple tables can also be selected by writing wildcard characters in the pattern. When using wildcards, be careful to quote the pattern if needed to prevent the shell from expanding the wildcards. + +The [-n|--schema](#n-schema-schema-schema) and `-N|--exclude-schema` options have no effect when `-t|--table` is used, because tables selected by `-t|--table` will be dumped regardless of those options, and non-table objects will not be dumped. + +{{< note title="Note" >}} + +When `-t|--table` is specified, ysql_dump makes no attempt to dump any other database objects that the selected tables might depend upon. Therefore, there is no guarantee that the results of a specific-table dump can be successfully restored by themselves into a clean database. + +{{< /note >}} + +#### -T *table*, --exclude-table=*table* + +Do not dump any tables matching the table pattern. The pattern is interpreted according to the same rules as for [-t](#t-table-table-table). `-T|--exclude-table` can be given more than once to exclude tables matching any of several patterns. + +When both `-t|--table` and `-T|--exclude-table` are given, the behavior is to dump just the tables that match at least one `-t|--table` option but no `-T|--exclude-table` options. If `-T|--exclude-table` appears without `-t|--table`, then tables matching `-T|--exclude-table` are excluded from what is otherwise a normal dump. + +#### -v, --verbose + +Specifies verbose mode. This causes ysql_dump to output detailed object comments and start and stop times to the dump file, and progress messages to standard error. + +#### -V, --version + +Print the ysql_dump version and exit. + +#### -x, --no-privileges, --no-acl + +Prevent dumping of access privileges (GRANT and REVOKE statements). + +#### -Z *0..9*, --compress=*0..9* + +Specify the compression level to use. Zero (`0`) means no compression. For plain text output, setting a nonzero compression level causes the entire output file to be compressed, as though it had been fed through `gzip`; but the default is not to compress. + +#### --column-inserts, --attribute-inserts + +Dump data as INSERT statements with explicit column names (`INSERT INTO table (column, ...) VALUES ...`). This makes restoration very slow; it is mainly helpful for making dumps that can be loaded into non-YugabyteDB databases. However, as this option generates a separate statement for each row, an error in reloading a row causes only that row to be lost rather than the entire table contents. + +#### --disable-dollar-quoting + +This option disables the use of dollar quoting for function bodies, and forces them to be quoted using SQL standard string syntax. + +#### --disable-triggers + +This option is relevant only when creating a data-only dump. It instructs ysql_dump to include statements to temporarily disable triggers on the target tables while the data is reloaded. Use this if you have referential integrity checks or other triggers on the tables that you do not want to invoke during data reload. + +Presently, the statements emitted for `--disable-triggers` must be done as superuser. So, you should also specify a superuser name with `-S|--superuser`, or preferably be careful to start the resulting script as a superuser. + +#### --enable-row-security + +This option is relevant only when dumping the contents of a table which has row security. By default, ysql_dump sets `row_security` to `off`, to ensure that all data is dumped from the table. If the user does not have sufficient privileges to bypass row security, then an error is thrown. This parameter instructs ysql_dump to set `row_security` to `on` instead, allowing the user to dump the parts of the contents of the table that they have access to. + +Note that if you use this option currently, you probably also want the dump be in INSERT format, as the COPY FROM during restore does not support row security. + +#### --exclude-table-data=*table* + +Do not dump data for any tables matching the table pattern. The pattern is interpreted according to the same rules as for [-t|--table](#t-table-table-table). The `--exclude-table-data` option can be given more than once to exclude tables matching any of several patterns. This option is helpful when you need the definition of a particular table even though you do not need the data in it. + +To exclude data for all tables in the database, see [-s|--schema-only](#s-schema-only). + +#### --if-exists + +Use conditional statements (that is, add an IF EXISTS clause) when cleaning database objects. This option is not valid unless `-c|--clean` is also specified. + +#### --inserts + +Dump data as INSERT statements (rather than COPY statements). This will make restoration very slow; it is mainly helpful for making dumps that can be loaded into non-YugabyteDB databases. However, as this option generates a separate statement for each row, an error in reloading a row causes only that row to be lost rather than the entire table contents. Note that the restore might fail altogether if you have rearranged column order. The `--column-inserts` option is safe against column order changes, though even slower. + +#### --lock-wait-timeout=*timeout* + +Do not wait forever to acquire shared table locks at the beginning of the dump. Instead fail if unable to lock a table in the specified timeout. The timeout may be specified in any of the formats accepted by `SET statement_timeout`. (Allowed formats vary depending on the server version you are dumping from, but an integer number of milliseconds is accepted by all versions.) + +#### --no-publications + +Do not dump publications. + +#### --no-security-labels + +Do not dump security labels. + +#### --no-subscriptions + +Do not dump subscriptions. + +#### --no-sync + +By default, ysql_dump waits for all files to be written safely to disk. This option causes ysql_dump to return without waiting, which is faster, but means that a subsequent operating system crash can leave the dump corrupt. Generally, this option is helpful for testing but should not be used when dumping data from production installation. + +#### --no-unlogged-table-data + +Do not dump the contents of unlogged tables. This option has no effect on whether or not the table definitions (schema) are dumped; it only suppresses dumping the table data. Data in unlogged tables is always excluded when dumping from a standby server. + +#### --quote-all-identifiers + +Force quoting of all identifiers. This option is recommended when dumping a database from a server whose YugabyteDB major version is different from ysql_dump, or when the output is intended to be loaded into a server of a different major version. By default, ysql_dump quotes only identifiers that are reserved words in its own major version. This sometimes results in compatibility issues when dealing with servers of other versions that may have slightly different sets of reserved words. Using `--quote-all-identifiers` prevents such issues, at the price of a harder-to-read dump script. + +#### --section=*sectionname* + +Only dump the named section. The section name can be pre-data, data, or post-data. This option can be specified more than once to select multiple sections. The default is to dump all sections. + +The data section contains actual table data, large-object contents, and sequence values. Post-data items include definitions of indexes, triggers, rules, and constraints other than validated check constraints. Pre-data items include all other data definition items. + +#### --no-serializable-deferrable + +Use the `--no-serializable-deferrable` flag to disable the default `serializable-deferrable` transaction mode. The `serializable-deferrable` mode ensures that the snapshot used is consistent with later database states by waiting for a point in the transaction stream at which no anomalies can be present, so that there is no risk of the dump failing or causing other transactions to roll back with a `serialization_failure`. + +If there are active read-write transactions, the maximum wait time until the start of the dump will be `50ms` (based on the default [--max_clock_skew_usec](../../reference/configuration/yb-tserver/#max-clock-skew-usec) for YB-TServer and YB-Master servers.) If there are no active read-write transactions when ysql_dump is started, this option will not make any difference. Once running, performance with or without the option is the same. + +#### --snapshot=*snapshotname* + +Use the specified synchronized snapshot when making a dump of the database. This option is helpful when needing to synchronize the dump with a logical replication slot or with a concurrent session. In the case of a parallel dump, the snapshot name defined by this option is used rather than taking a new snapshot. + +#### --strict-names + +Require that each schema ([-n|--schema](#n-schema-schema-schema)) and table ([-t|--table](#t-table-table-table)) qualifier match at least one schema or table in the database to be dumped. Note that if none of the schema or table qualifiers find matches, ysql_dump generates an error even without `--strict-names`. + +This option has no effect on [-N|--exclude-schema](#n-schema-exclude-schema-schema), [-T|--exclude-table](#t-table-exclude-table-table), or [--exclude-table-data](#exclude-table-data-table). An exclude pattern failing to match any objects is not considered an error. + +#### --use-set-session-authorization + +Output SQL-standard SET SESSION AUTHORIZATION statements instead of ALTER OWNER statements to determine object ownership. This makes the dump more standards-compatible, but depending on the history of the objects in the dump, might not restore properly. Also, a dump using SET SESSION AUTHORIZATION statements will certainly require superuser privileges to restore correctly, whereas ALTER OWNER statements requires lesser privileges. + +#### -?, --help + +Show help about ysql_dump command line arguments and then exit. + +## Database connection options + +The following command line options control the database connection parameters. + +#### -d *dbname*, --dbname=*dbname* + +Specifies the name of the database to connect to. This is equivalent to specifying `dbname` as the first non-option argument on the command line. + +If this parameter contains an equal sign (`=`) or starts with a valid URI prefix (`yugabytedb://`), it is treated as a `conninfo` string. + +#### -h *host*, --host=*host* + +Specifies the host name of the machine on which the server is running. If the value begins with a slash (`/`), it is used as the directory for the Unix domain socket. Defaults to the compiled-in host of `127.0.0.1` else a Unix domain socket connection is attempted. + +#### -p *port*, --port=*port* + +Specifies the TCP port or local Unix domain socket file extension on which the server is listening for connections. Defaults to the compiled-in port of `5433`. + +#### -U *username*, --username=*username* + +The username to connect as. + +#### -w, --no-password + +Never issue a password prompt. If the server requires password authentication and a password is not available by other means such as a `~/.pgpass` file, the connection attempt will fail. This option can be useful in batch jobs and scripts where no user is present to enter a password. + +#### -W, --password + +Force ysql_dump to prompt for a password before connecting to a database. + +This option is never essential, as ysql_dump automatically prompts for a password if the server demands password authentication. However, ysql_dump will waste a connection attempt finding out that the server wants a password. In some cases it is worth typing `-W|--password` to avoid the extra connection attempt. + +#### --role=*rolename* + +Specifies a role name to be used to create the dump. This option causes ysql_dump to issue a `SET ROLE ` statement after connecting to the database. It is useful when the authenticated user (specified by [-U|--username](#u-username-username-username)) lacks privileges needed by ysql_dump, but can switch to a role with the required rights. Some installations have a policy against logging in directly as a superuser, and use of this option allows dumps to be made without violating the policy. + +## Environment + +The following PostgreSQL environment variables, referenced in some ysql_dump options, are used by YugabyteDB for PostgreSQL compatibility: + +- `PGHOST` +- `PGPORT` +- `PGOPTIONS` +- `PGUSER` +- `PGDATABASE` +- `PGCLIENTENCODING` + +This utility also uses the environment variables supported by `libpq`. + +## Diagnostics + +ysql_dump internally executes SELECT statements. If you have problems running ysql_dump, make sure you are able to select information from the database using, for example, [ysqlsh](../../api/ysqlsh/). Also, any default connection settings and environment variables used by the `libpq` front-end library will apply. + +The database activity of ysql_dump is normally collected by the statistics collector. If this is undesirable, you can set parameter `track_counts` to `false` using PGOPTIONS or the [ALTER USER](../../api/ysql/the-sql-language/statements/dcl_alter_user) statement. + +## Notes + +If your YugabyteDB cluster has any local additions to the `template1` database, be careful to restore the output of ysql_dump into a truly empty database; otherwise you are likely to get errors due to duplicate definitions of the added objects. To make an empty database without any local additions, copy from `template0` not `template1`, for example: + +```plpgsql +CREATE DATABASE foo WITH TEMPLATE template0; +``` + +When a data-only dump is chosen and the option [--disable-triggers](#disable-triggers) is used, ysql_dump emits statements to disable triggers on user tables before inserting the data, and then statements to re-enable them after the data has been inserted. If the restore is stopped in the middle, the system catalogs might be left in the wrong state. + +The dump file produced by ysql_dump does not contain the statistics used by the optimizer to make query planning decisions. Therefore, running ANALYZE after restoring from a dump file can ensure optimal performance. + +Because ysql_dump is used to transfer data to newer versions of YugabyteDB, the output of ysql_dump can be expected to load into YugabyteDB versions newer than the ysql_dump version. ysql_dump can also dump from YugabyteDB servers older than its own version. However, ysql_dump cannot dump from YugabyteDB servers newer than its own major version; it will refuse to even try, rather than risk making an invalid dump. Also, it is not guaranteed that the ysql_dump output can be loaded into a server of an older major version — not even if the dump was taken from a server of that version. Loading a dump file into an older server may require manual editing of the dump file to remove syntax not understood by the older server. Use of the [--quote-all-identifiers](#quote-all-identifiers) option is recommended in cross-version cases, as it can prevent problems arising from varying reserved-word lists in different YugabyteDB versions. + +## Examples + +#### Dump a database into a SQL script file + +```sh +$ ysql_dump mydb > mydb.sql +``` + +#### Dump a single table named `mytable` + +```sh +$ ysql_dump -t mytable mydb -f mytable_mydb.sql +``` + +#### Dump schemas based on filters + +The following command dumps all schemas whose names start with `east` or `west` and end in `gsm`, excluding any schema whose names contain the word `test`: + +```sh +$ ysql_dump -n 'east*gsm' -n 'west*gsm' -N '*test*' mydb > myschemas_mydb.sql +``` + +Here's the same example, using regular expression notation to consolidate the options: + +```sh +$ ysql_dump -n '(east|west)*gsm' -N '*test*' mydb > myschemas_mydb.sql +``` + +#### Dump all database objects based on a filter + +The following command dumps all database objects except for tables whose names begin with `ts_`: + +```sh +$ ysql_dump -T 'ts_*' mydb > objects_mydb.sql +``` + +## See also + +- [ysql_dumpall](../ysql-dumpall/) +- [ysqlsh](../../api/ysqlsh/) diff --git a/docs/content/v2.25/admin/ysql-dumpall.md b/docs/content/v2.25/admin/ysql-dumpall.md new file mode 100644 index 000000000000..71d4b6a4f69f --- /dev/null +++ b/docs/content/v2.25/admin/ysql-dumpall.md @@ -0,0 +1,252 @@ +--- +title: ysql_dumpall +headerTitle: ysql_dumpall +linkTitle: ysql_dumpall +description: ysql_dumpall +headcontent: Back up all YSQL databases and roles into a SQL script file. +menu: + preview: + identifier: ysql-dumpall + parent: admin + weight: 80 +type: docs +--- + +## Overview + +ysql_dumpall is a utility for writing out ("dumping") all YugabyteDB databases of a cluster into one plain-text, SQL script file. The script file contains SQL statements that can be used as input to ysqlsh to restore the databases. It does this by calling [ysql_dump](../ysql-dump/) for each database in the YugabyteDB cluster. ysql_dumpall also dumps global objects that are common to all databases, such as database roles. (ysql_dump does not export roles.) + +Because ysql_dumpall reads tables from all databases, you will most likely have to connect as a database superuser in order to produce a complete dump. Also, you will need superuser privileges to execute the saved script in order to be allowed to add roles and create databases. + +The SQL script will be written to the standard output. Use the [`-f|--file`](#f-filename-file-filename) option or shell operators to redirect it into a file. + +ysql_dumpall needs to connect multiple times (once per database) to the YugabyteDB cluster. If you use password authentication, it will ask for a password each time. It is convenient to have a `~/.pgpass` file in such cases. + +### Installation + +ysql_dumpall is installed with YugabyteDB and is located in the `postgres/bin` directory of the YugabyteDB home directory. + +### Online help + +Run `ysql_dumpall --help` to display the online help. + +## Syntax + +```sh +ysql_dumpall [ ... ] [ ... ] +``` + +- *connection-option*: See [Connection options](#connection-options). +- *content-output-format-option*: See [Content and output format options](#content-and-output-format-options) + +## Content and output format options + +The following command line options control the content and format of the output. + +#### -a, --data-only + +Dump only the data, not the schema (data definitions). + +#### -c, --clean + +Include SQL statements to clean (drop) databases before recreating them. `DROP` statements for roles are added as well. + +#### -E encoding, --encoding=*encoding* + +Create the dump in the specified character set encoding. By default, the dump is created in the database encoding. (Another way to get the same result is to set the `PGCLIENTENCODING` environment variable to the desired dump encoding.) + +#### -f filename, --file=*filename* + +Send output to the specified file. If this is omitted, the standard output is used. + +#### -g, --globals-only + +Dump only global objects (roles), no databases. + +#### -o, --oids + +Dump object identifiers (OIDs) as part of the data for every table. Use this option if your application references the OID columns in some way (that is, in a foreign key constraint). Otherwise, this option should not be used. + +#### -O, --no-owner + +Do not output statements to set ownership of objects to match the original database. By default, ysql_dumpall issues `ALTER OWNER` or `SET SESSION AUTHORIZATION` statements to set ownership of created schema elements. These statements will fail when the script is run unless it is started by a superuser (or the same user that owns all of the objects in the script). To make a script that can be restored by any user, but will give that user ownership of all the objects, specify [`-O|--no-owner`](#o-no-owner). + +#### -r, --roles-only + +Dump only roles, no databases. + +#### -s, --schema-only + +Dump only the object definitions (schema), not data. + +#### -S *username*, --superuser=*username* + +Specify the superuser username to use when disabling triggers. This is relevant only if [`--disable-triggers`](#disable-triggers) is used. (Usually, it's better to leave this out, and instead start the resulting script as superuser.) + +#### -v, --verbose + +Specifies verbose mode. This causes ysql_dumpall to output start and stop times to the dump file, and progress messages to standard error. It will also enable verbose output in [ysql_dump](../ysql-dump/). + +#### --version, -V + +Print the ysql_dumpall version and exit. + +#### -x, --no-privileges, --no-acl + +Prevent dumping of access privileges (`GRANT` and `REVOKE` statements). + +#### --column-inserts, --attribute-inserts + +Dump data as `INSERT` statements with explicit column names (`INSERT INTO table (column, ...) VALUES ...`). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-YugabyteDB databases. + +#### --disable-dollar-quoting + +This option disables the use of dollar quoting for function bodies, and forces them to be quoted using SQL standard string syntax. + +#### --disable-triggers + +This option is relevant only when creating a data-only dump. It instructs ysql_dumpall to include statements to temporarily disable triggers on the target tables while the data is reloaded. Use this if you have referential integrity checks or other triggers on the tables that you do not want to invoke during data reload. + +Presently, the statements emitted for `--disable-triggers` must be done as superuser. So, you should also specify a superuser name with [`-S|--superuser`](#s-username-superuser-username), or preferably be careful to start the resulting script as a superuser. + +#### --if-exists + +Use conditional statements (that is, add an `IF EXISTS` clause) to drop databases and other objects. This option is not valid unless [`-c|--clean`](#c-clean) is also specified. + +#### --inserts + +Dump data as `INSERT` statements (rather than `COPY` statements). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-YugabyteDB databases. Note that the restore might fail altogether if you have rearranged column order. The [`--column-inserts`](#column-inserts-attribute-inserts) option is safer, though even slower. + +#### --load-via-partition-root + +When dumping data for a table partition, make the COPY or INSERT statements target the root of the partitioning hierarchy that contains it, rather than the partition itself. This causes the appropriate partition to be re-determined for each row when the data is loaded. This may be useful when reloading data on a server where rows do not always fall into the same partitions as they did on the original server. That could happen, for example, if the partitioning column is of type text, and the two systems have different definitions of the collation used to sort the partitioning column. + +#### --lock-wait-timeout=*timeout* + +Do not wait forever to acquire shared table locks at the beginning of the dump. Instead, fail if unable to lock a table within the specified timeout. The timeout may be specified in any of the formats accepted by `SET statement_timeout`. Allowed values vary depending on the server version you are dumping from, but an integer number of milliseconds is accepted by all versions. + +#### --no-comments + +Do not dump comments. + +#### --no-publications + +Do not dump publications. + +#### --no-role-passwords + +Do not dump passwords for roles. When restored, roles will have a null password, and password authentication will always fail until the password is set. As password values aren't needed when this option is specified, the role information is read from the catalog view `pg_roles` instead of `pg_authid`. Therefore, this option also helps if access to `pg_authid` is restricted by some security policy. Note: YugabyteDB uses the `pg_roles` and `pg_authid` system tables for PostgreSQL compatibility. + +#### --no-security-labels + +Do not dump security labels. + +#### --no-subscriptions + +Do not dump subscriptions. + +#### --no-sync + +By default, ysql_dumpall waits for all files to be written safely to disk. This option causes ysql_dumpall to return without waiting, which is faster, but means that a subsequent operating system crash can leave the dump corrupt. Generally, this option is helpful for testing but should not be used when dumping data from production installation. + +#### --no-unlogged-table-data + +Do not dump the contents of unlogged tables. This option has no effect on whether or not the table definitions (schema) are dumped; it only suppresses dumping the table data. + +#### --quote-all-identifiers + +Force quoting of all identifiers. This option is recommended when dumping a database from a server whose YugabyteDB major version is different from the ysql_dumpall version, or when the output is intended to be loaded into a server of a different major version. By default, ysql_dumpall quotes only identifiers that are reserved words in its own major version. This sometimes results in compatibility issues when dealing with servers of other versions that may have slightly different sets of reserved words. Using `--quote-all-identifiers` prevents such issues, at the price of a harder-to-read dump script. + +#### --use-set-session-authorization + +Output SQL-standard `SET SESSION AUTHORIZATION` statements instead of `ALTER OWNER` statements to determine object ownership. This makes the dump more standards compatible, but depending on the history of the objects in the dump, might not restore properly. + +### -?, --help + +Show help about ysql_dumpall command line arguments and then exit. + +## Connection options + +The following command line options control the database connection parameters. + +#### -d *connstr*, --dbname=*connstr* + +Specifies parameters used to connect to the server, as a connection string. + +The option is called `-d|--dbname` for consistency with other client applications, but because ysql_dumpall needs to connect to many databases, the database name in the connection string will be ignored. Use the [`-l|--database`](#l-dbname-database-database) option to specify the name of the database used for the initial connection, which will dump global objects and discover what other databases should be dumped. + +#### -h *host*, --host *host* + +Specifies the host name of the machine on which the database server is running. If the value begins with a slash, it is used as the directory for the Unix domain socket. The default is taken from the `PGHOST` environment variable, if set, else a Unix domain socket connection is attempted. + +#### -l *dbname*, --database=*database* + +Specifies the name of the database to connect to for dumping global objects and discovering what other databases should be dumped. If not specified, the `yugabyte` database will be used, and if that does not exist, `template1` will be used. + +#### -p *port*, --port=*port* + +Specifies the TCP port or local Unix domain socket file extension on which the server is listening for connections. Defaults to the `PGPORT` environment variable, if set, or the compiled-in default. + +#### -U *username*, --username=*username* + +The username to connect as. + +#### -w, --no-password + +Never issue a password prompt. If the server requires password authentication and a password is not available by other means such as a `~/.pgpass` file, the connection attempt will fail. This option can be helpful in batch jobs and scripts where no user is present to enter a password. + +#### -W, --password + +Force ysql_dumpall to prompt for a password before connecting to a database. + +This option is never essential, because ysql_dumpall automatically prompts for a password if the server demands password authentication. However, ysql_dumpall will waste a connection attempt finding out that the server wants a password. In some cases it is worth typing `-W|--password` to avoid the extra connection attempt. + +{{< note title="Note" >}} + +For each database to be dumped, a password prompt will occur. To avoid having to manually enter passwords each time, you can set up a `~/.pgpass` file. + +{{< /note >}} + +#### --role=*rolename* + +Specifies a role name to be used to create the dump. This option causes ysql_dumpall to issue a `SET ROLE ` statement after connecting to the database. It is helpful when the authenticated user (specified by [`-U|--username`](#u-username-username-username)) lacks privileges needed by ysql_dumpall, but can switch to a role with the required rights. Some installations have a policy against logging in directly as a superuser, and use of this option allows dumps to be made without violating the policy. + +## Environment + +The following PostgreSQL environment variables, referenced in some ysql_dumpall and ysql_dump options, are used by YugabyteDB for PostgreSQL compatibility: + +- `PGHOST` +- `PGPORT` +- `PGOPTIONS` +- `PGUSER` +- `PGCLIENTENCODING` + +This utility also uses the environment variables supported by `libpq`. + +## Notes + +- Because ysql_dumpall calls [ysql_dump](../ysql-dump/) internally, some diagnostic messages will refer to ysql_dump. +- The [`-c|--clean`](#c-clean) option can be helpful even when your intention is to restore the dump script into a fresh cluster. Use of `-c|--clean` authorizes the script to drop and recreate the built-in `yugabyte`, `postgres`, and `template1` databases, ensuring that those databases will retain the same properties (for instance, locale and encoding) that they had in the source cluster. Without the option, those databases will retain their existing database-level properties, as well as any pre-existing contents. +- Once restored, it is recommended to run `ANALYZE` on each database so the optimizer has helpful statistics. You can also run `vacuumdb -a -z` to analyze all databases. +- The dump script should not be expected to run completely without errors. In particular, because the script will issue `CREATE ROLE` statements for every role existing in the source cluster, it is certain to get a `role already exists` error for the bootstrap superuser, unless the destination cluster was initialized with a different bootstrap superuser name. This error is harmless and should be ignored. Use of the [`-c|--clean`](#c-clean) option is likely to produce additional harmless error messages about non-existent objects, although you can minimize those by adding [`--if-exists`](#if-exists). + +## Examples + +#### Dump all databases + +```sh +$ ./postgres/bin/ysql_dumpall > db.out +``` + +To reload databases from this file, you can use: + +```sh +$ ./bin/ysqlsh -f db.out yugabyte +``` + +The database to which you connect is not important because the script file created by ysql_dumpall will contain the appropriate statements to create and connect to the saved databases. An exception is that if you specified [`-c|--clean`](#c-clean), you must connect to the `postgres` database initially; the script will attempt to drop other databases immediately, and that will fail for the database you are connected to. + +## See Also + +- [ysql_dump](../ysql-dump/) +- [ysqlsh](../../api/ysqlsh/) diff --git a/docs/content/v2.25/api/_index.md b/docs/content/v2.25/api/_index.md new file mode 100644 index 000000000000..e244bf2dbc95 --- /dev/null +++ b/docs/content/v2.25/api/_index.md @@ -0,0 +1,80 @@ +--- +title: YugabyteDB API reference (for YSQL and YCQL) +headerTitle: API +linkTitle: API +description: YugabyteDB API reference for PostgreSQL-compatible YSQL and Cassandra-compatible YCQL +headcontent: YugabyteDB API reference +type: indexpage +showRightNav: true +--- + + +## SQL APIs + +YugabyteDB supports two flavors of distributed SQL: + +- [YSQL](ysql/) is a fully-relational SQL API that is wire compatible with the SQL language in PostgreSQL. It is best fit for RDBMS workloads that need horizontal write scalability and global data distribution while also using relational modeling features such as JOINs, distributed transactions and referential integrity (such as foreign keys). +- [YCQL](ycql/) is a semi-relational SQL API that is best fit for internet-scale OLTP and HTAP applications needing massive data ingestion and blazing-fast queries. It supports distributed transactions, strongly consistent secondary indexes and a native JSON column type. YCQL has its roots in the Cassandra Query Language. + +Note that the APIs are isolated and independent from one another, and you need to select an API first before undertaking detailed database schema and query design and implementation. + +{{}} + + {{}} + + {{}} + +{{}} + +## Client shells + +YugabyteDB ships with command line interface (CLI) shells for interacting with each SQL API. + +{{}} + + {{}} + + {{}} + +{{}} + +## Management APIs + +YugabyteDB Anywhere and Aeon both provide APIs that can be used to deploy and manage universes, query system status, manage accounts, and more. + +{{< sections/2-boxes >}} + {{< sections/bottom-image-box + title="YugabyteDB Anywhere API" + description="Manage YugabyteDB Anywhere using the API." + buttonText="API Documentation" + buttonUrl="https://api-docs.yugabyte.com/docs/yugabyte-platform/f10502c9c9623-yugabyte-db-anywhere-api-overview" + >}} + + {{< sections/bottom-image-box + title="YugabyteDB Aeon API" + description="Manage YugabyteDB Aeon using the API." + buttonText="API Documentation" + buttonUrl="https://api-docs.yugabyte.com/docs/managed-apis/9u5yqnccbe8lk-yugabyte-db-aeon-rest-api" + >}} + +{{< /sections/2-boxes >}} diff --git a/docs/content/v2.25/api/ycql/_index.md b/docs/content/v2.25/api/ycql/_index.md new file mode 100644 index 000000000000..59a1d7304ab4 --- /dev/null +++ b/docs/content/v2.25/api/ycql/_index.md @@ -0,0 +1,137 @@ +--- +title: YCQL API reference +headerTitle: YCQL API reference +linkTitle: YCQL +description: YCQL is a semi-relational API that is best fit for internet-scale OLTP & HTAP applications. +summary: Reference for the YCQL API +headcontent: Cassandra-compatible API +showRightNav: true +type: indexpage +--- + +## Introduction + +Yugabyte Cloud Query Language (YCQL) is a semi-relational SQL API that is best fit for internet-scale OLTP and HTAP applications needing massive data ingestion and blazing-fast queries. It supports strongly consistent secondary indexes, a native JSON column type, and distributed transactions. It has its roots in the [Cassandra Query Language (CQL)](http://cassandra.apache.org/doc/latest/cql/index.html). + +This page covers the following YCQL features. + +- Data definition language (DDL) statements. +- Data manipulation language (DML) statements. +- Builtin functions and Expression operators. +- Primitive user-defined data types. + +## DDL statements + +Data definition language (DDL) statements are instructions for the following database operations. + +- Create, alter, and drop database objects +- Create, grant, and revoke users and roles + +Statement | Description | +----------|-------------| +[`ALTER TABLE`](ddl_alter_table) | Alter a table | +[`ALTER KEYSPACE`](ddl_alter_keyspace) | Alter a keyspace | +[`CREATE INDEX`](ddl_create_index/) | Create a new index on a table | +[`CREATE KEYSPACE`](ddl_create_keyspace) | Create a new keyspace | +[`CREATE TABLE`](ddl_create_table) | Create a new table | +[`CREATE TYPE`](ddl_create_type) | Create a user-defined data type | +[`DROP INDEX`](ddl_drop_index) | Remove an index | +[`DROP KEYSPACE`](ddl_drop_keyspace) | Remove a keyspace | +[`DROP TABLE`](ddl_drop_table) | Remove a table | +[`DROP TYPE`](ddl_drop_type) | Remove a user-defined data type | +[`USE`](ddl_use) | Use an existing keyspace for subsequent commands | + +## DDL security statements + +Security statements are instructions for managing and restricting operations on the database objects. + +- Create, grant, and revoke users and roles +- Grant, and revoke permissions on database objects + +This feature is enabled by setting the YB-TServer configuration flag [`--use_cassandra_authentication`](../../reference/configuration/yb-tserver/#use-cassandra-authentication) to `true`. + +Statement | Description | +----------|-------------| +[`ALTER ROLE`](ddl_alter_role) | Alter a role | +[`CREATE ROLE`](ddl_create_role) | Create a new role | +[`DROP ROLE`](ddl_drop_role) | Remove a role | +[`GRANT PERMISSION`](ddl_grant_permission) | Grant a permission on an object to a role | +[`REVOKE PERMISSION`](ddl_revoke_permission) | Revoke a permission on an object from a role | +[`GRANT ROLE`](ddl_grant_role) | Grant a role to another role | +[`REVOKE ROLE`](ddl_revoke_role) | Revoke a role from another role | + +## DML statements + +Data manipulation language (DML) statements are used to read from and write to the existing database objects. YugabyteDB implicitly commits any updates by DML statements (similar to how Apache Cassandra behaves). + +Statement | Description | +----------|-------------| +[`INSERT`](dml_insert) | Insert rows into a table | +[`SELECT`](dml_select/) | Select rows from a table | +[`UPDATE`](dml_update/) | Update rows in a table | +[`DELETE`](dml_delete/) | Delete specific rows from a table | +[`TRANSACTION`](dml_transaction) | Makes changes to multiple rows in one or more tables in a transaction | +[`TRUNCATE`](dml_truncate) | Remove all rows from a table | + +## Expressions + +An expression is a finite combination of one or more values, operators, functions, and expressions that specifies a computation. Expressions can be used in the following components. + +- The select list of [`SELECT`](dml_select/) statement. For example, `SELECT id + 1 FROM sample_table;`. +- The WHERE clause in [`SELECT`](dml_select/), [`DELETE`](dml_delete/), [`INSERT`](dml_insert), or [`UPDATE`](dml_update/). +- The IF clause in [`DELETE`](dml_delete/), [`INSERT`](dml_insert), or [`UPDATE`](dml_update/). +- The VALUES clause in [`INSERT`](dml_insert). +- The SET clause in [`UPDATE`](dml_update/). + +Currently, the following expressions are supported. + +Expression | Description | +-----------|-------------| +[Simple Value](expr_simple) | Column, constant, or null. Column alias cannot be used in expression yet. | +[Subscript `[]`](expr_subscript) | Subscripting columns of collection data types | +[Operator Call](expr_ocall) | Builtin operators only | +[Function Call](expr_fcall/) | Builtin function calls only | + +## Data types + +The following table lists all supported primitive types. + +Primitive Type | Allowed in Key | Type Parameters | Description | +---------------|----------------|-----------------|-------------| +[`BIGINT`](type_int) | Yes | - | 64-bit signed integer | +[`BLOB`](type_blob) | Yes | - | String of binary characters | +[`BOOLEAN`](type_bool) | Yes | - | Boolean | +[`COUNTER`](type_int) | No | - | 64-bit signed integer | +[`DECIMAL`](type_number) | Yes | - | Exact, arbitrary-precision number, no upper-bound on decimal precision | +[`DATE`](type_datetime/) | Yes | - | Date | +[`DOUBLE`](type_number) | Yes | - | 64-bit, inexact, floating-point number | +[`FLOAT`](type_number) | Yes | - | 64-bit, inexact, floating-point number | +[`FROZEN`](type_frozen) | Yes | 1 | Collection in binary format | +[`INET`](type_inet) | Yes | - | String representation of IP address | +[`INT` | `INTEGER`](type_int) | Yes | - | 32-bit signed integer | +[`LIST`](type_collection) | No | 1 | Collection of ordered elements | +[`MAP`](type_collection) | No | 2 | Collection of pairs of key-and-value elements | +[`SET`](type_collection) | No | 1 | Collection of unique elements | +[`SMALLINT`](type_int) | Yes | - | 16-bit signed integer | +[`TEXT` | `VARCHAR`](type_text) | Yes | - | String of Unicode characters | +[`TIME`](type_datetime/) | Yes | - | Time of day | +[`TIMESTAMP`](type_datetime/) | Yes | - | Date-and-time | +[`TIMEUUID`](type_uuid) | Yes | - | Timed UUID | +[`TINYINT`](type_int) | Yes | - | 8-bit signed integer | +[`UUID`](type_uuid) | Yes | - | Standard UUID | +[`VARINT`](type_int) | Yes | - | Arbitrary-precision integer | +[`JSONB`](type_jsonb) | No | - | JSON data type similar to PostgreSQL jsonb | + +[User-defined data types](ddl_create_type) are also supported. + +## Learn more + +- [Advantages of YCQL over Cassandra](/preview/faq/comparisons/cassandra) +- [YCQL - Cassandra 3.4 compatibility](../../explore/ycql-language/cassandra-feature-support) diff --git a/docs/content/v2.25/api/ycql/batch.md b/docs/content/v2.25/api/ycql/batch.md new file mode 100644 index 000000000000..e05e6adf06d3 --- /dev/null +++ b/docs/content/v2.25/api/ycql/batch.md @@ -0,0 +1,168 @@ +--- +title: BATCH requests [YCQL] +headerTitle: BATCH +linkTitle: BATCH +summary: Execute multiple DML in 1 request +description: Use batch to update multiple rows in 1 request. +menu: + preview_api: + parent: api-cassandra + weight: 19991 +type: docs +--- + +Batch operations let you send multiple operations in a single RPC call to the database. The larger the batch size, the higher the latency for the entire batch. Although the latency for the entire batch of operations is higher than the latency of any single operation, the throughput of the batch of operations is much higher. + +## Example in Java + +To perform a batch insert operation in Java: + +1. Create a BatchStatement object. +1. Add the desired number of prepared and bound insert statements to it. +1. Execute the batch object. + +```java +// Create a batch statement object. +BatchStatement batch = new BatchStatement(); + +// Create a prepared statement object to add to the batch. +PreparedStatement insert = client.prepare("INSERT INTO table (k, v) VALUES (?, ?)"); + +// Bind values to the prepared statement and add them to the batch. +for (...) { + batch.add(insert.bind( ... ... )); +} + +// Execute the batch operation. +ResultSet resultSet = client.execute(batch); +``` + +## Example in Python using RETURNS AS STATUS + +An example using Python client and RETURNS AS STATUS clause: + +```sql +ycqlsh> create keyspace if not exists yb_demo; +ycqlsh> CREATE TABLE if not exists yb_demo.test_rs_batch(h int, r bigint, v1 int, v2 varchar, primary key (h, r)); +ycqlsh> INSERT INTO yb_demo.test_rs_batch(h,r,v1,v2) VALUES (1,1,1,'a'); +ycqlsh> INSERT INTO yb_demo.test_rs_batch(h,r,v2) VALUES (3,3,'b'); +ycqlsh> select * from yb_demo.test_rs_batch; + + h | r | v1 | v2 +---+---+------+---- + 1 | 1 | 1 | a + 3 | 3 | null | b + +(2 rows) +``` + +Getting status from DML operations in Python: + +```python +from cassandra.cluster import Cluster, BatchStatement + +# Create a cluster and a session +cluster = Cluster(['127.0.0.1']) +session = cluster.connect() + +# Create a batch statement object. +b = BatchStatement() + +# Add multiple queries +b.add(f"INSERT INTO test_rs_batch(h, r, v1, v2) VALUES (1, 1, 1 ,'a') RETURNS STATUS AS ROW") +b.add(f"UPDATE test_rs_batch SET v2='z' WHERE h=3 AND r=3 IF v2='z' RETURNS STATUS AS ROW") +b.add(f"DELETE FROM test_rs_batch WHERE h=2 AND r=2 IF EXISTS RETURNS STATUS AS ROW") + +# Execute the batch operation. +result = session.execute(b, trace=True) + +# Print status for each DML operation +for row in result: + print(row) +``` + +The output generated is: + +```python +Row(applied=True, message=None, h=None, r=None, v1=None, v2=None) +Row(applied=False, message=None, h=3, r=3, v1=None, v2='b') +Row(applied=False, message=None, h=None, r=None, v1=None, v2=None) +``` + +## Row Status + +When executing a batch in YCQL, the protocol allows returning only one error or return status. + +If one statement fails with an error or for conditional DMLs, some are not applied because of failing the IF condition, the driver or application cannot accurately identify the relevant statements, it will just receive one general error or return-status for the batch. + +Therefore, it is not possible for an application to react to such failures appropriately (for example, retry, abort, +and change some parameters for either the entire batch or just the relevant statements). + +You can address this limitation by using the `RETURNS STATUS AS ROW` feature. + +If used, the write statement will return its status (whether applied, unapplied, or errored-out with a message) as a regular CQL row that the application can inspect and decide what to do. + +For a batch, it is required that either none or all statements use `RETURNS STATUS AS ROW`. + +When executing `n` statements in a batch with `RETURN STATUS AS ROW`, `n` rows are returned, in the same order as the statements and the application can easily inspect the result. + +For batches containing conditional DMLs, `RETURN STATUS AS ROW` must be used. + +For conditional DMLs (not normally allowed in batches), any subset of them could fail due to +their `IF` condition and thus returning rows only for them makes it impossible to identify which ones actually failed. + +To distinguish between the two not-applied cases (error vs condition is false), there is an error `message` column in the return row that will be null for not-applied and filled-in for errors. + +Conversely, there will be one column for each table column, which will be `null` for errors but filled-in for not-applied (justifying the decision to not apply). + +For instance: + +1. Set up a simple table: + +```sql +cqlsh:sample> CREATE TABLE test(h INT, r INT, v LIST, PRIMARY KEY(h,r)) WITH transactions={'enabled': true}; +cqlsh:sample> INSERT INTO test(h,r,v) VALUES (1,1,[1,2]); +``` + +1. Unapplied update when `IF` condition is false: + +```sql +cqlsh:sample> UPDATE test SET v[2] = 4 WHERE h = 1 AND r = 1 IF v[1] = 3 RETURNS STATUS AS ROW; + [applied] | [message] | h | r | v +-----------+-----------+---+---+-------- + False | null | 1 | 1 | [1, 2] +``` + +1. Unapplied update when `IF` condition true but error: + +```sql +cqlsh:sample> UPDATE test SET v[20] = 4 WHERE h = 1 AND r = 1 IF v[1] = 2 RETURNS STATUS AS ROW; + [applied] | [message] | h | r | v +-----------+----------------------------------------------------------------------------------------+------+------+------ + False | Unable to replace items into list, expecting index 20, reached end of list with size 2 | null | null | null +``` + +1. Applied update when `IF` condition true: + +```sql +cqlsh:sample> UPDATE test SET v[0] = 4 WHERE h = 1 AND r = 1 IF v[1] = 2 RETURNS STATUS AS ROW; + [applied] | [message] | h | r | v +-----------+-----------+------+------+------ + True | null | null | null | null +``` + +1. Final table result: + +```sql +cqlsh:sample> SELECT * FROM test; + h | r | v +---+---+-------- + 1 | 1 | [4, 2] +(1 rows) +``` + +{{< note title="Note" >}} + +`BEGIN/END TRANSACTION` doesn't currently support `RETURNS STATUS AS ROW`. + +{{< /note >}} diff --git a/docs/content/v2.25/api/ycql/ddl_alter_keyspace.md b/docs/content/v2.25/api/ycql/ddl_alter_keyspace.md new file mode 100644 index 000000000000..142a066a7d85 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_alter_keyspace.md @@ -0,0 +1,95 @@ +--- +title: ALTER KEYSPACE statement [YCQL] +headerTitle: ALTER KEYSPACE +linkTitle: ALTER KEYSPACE +description: Use the ALTER KEYSPACE statement to change the properties of an existing keyspace. +menu: + preview_api: + parent: api-cassandra + weight: 1200 +aliases: + - /preview/api/cassandra/ddl_alter_keyspace + - /preview/api/ycql/ddl_alter_keyspace +type: docs +--- + +## Synopsis + +Use the `ALTER KEYSPACE` statement to change the properties of an existing keyspace. + +This statement is supported for compatibility reasons only, and has no effect internally (no-op statement). + +The statement can fail if the specified keyspace does not exist or if the user (role) has no permissions for the keyspace ALTER operation. + +## Syntax + +### Diagram + +#### alter_keyspace + +ALTERKEYSPACESCHEMAkeyspace_namekeyspace_properties + +#### keyspace_properties + +WITHREPLICATION={,keyspace_property}ANDDURABLE_WRITES=truefalse + +### Grammar + +```ebnf +alter_keyspace ::= ALTER { KEYSPACE | SCHEMA } keyspace_name + [ WITH REPLICATION '=' '{' keyspace_property '}'] + [ AND DURABLE_WRITES '=' { true | false } ] + +keyspace_property ::= property_name = property_value +``` + +Where + +- `keyspace_name` and `property_name` are identifiers. +- `property_value` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. + +## Semantics + +- An error is raised if the specified `keyspace_name` does not exist. +- An error is raised if the user (used role) has no ALTER permission for this specified keyspace and no ALTER permission for ALL KEYSPACES. +- YCQL keyspace properties are supported in the syntax but have no effect internally (where YugabyteDB defaults are used instead). + +## Examples + +```sql +ycqlsh> ALTER KEYSPACE example; +``` + +```sql +ycqlsh> ALTER KEYSPACE example WITH DURABLE_WRITES = true; +``` + +```sql +ycqlsh> ALTER KEYSPACE example WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': '3'} AND DURABLE_WRITES = true; +``` + +```sql +ycqlsh> ALTER SCHEMA keyspace_example; +``` + +```output +SQL error: Keyspace Not Found. +ALTER SCHEMA keyspace_example; + ^^^^^^ +``` + +```sql +ycqlsh> ALTER KEYSPACE example; +``` + +```output +SQL error: Unauthorized. User test_role has no ALTER permission on or any of its parents. +ALTER KEYSPACE example; +^^^^^^ +``` + +## See also + +- [`CREATE KEYSPACE`](../ddl_create_keyspace) +- [`DROP KEYSPACE`](../ddl_drop_keyspace) +- [`USE`](../ddl_use) diff --git a/docs/content/v2.25/api/ycql/ddl_alter_role.md b/docs/content/v2.25/api/ycql/ddl_alter_role.md new file mode 100644 index 000000000000..8ba2228d6a8e --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_alter_role.md @@ -0,0 +1,72 @@ +--- +title: ALTER ROLE statement [YCQL] +headerTitle: ALTER ROLE +linkTitle: ALTER ROLE +description: Use the ALTER ROLE statement to change the properties of an existing role. +menu: + preview_api: + parent: api-cassandra + weight: 1210 +aliases: + - /preview/api/cassandra/ddl_alter_role + - /preview/api/ycql/ddl_alter_role +type: docs +--- + +## Synopsis + +Use the `ALTER ROLE` statement to change the properties of an existing role. +It allows modifying properties `SUPERUSER`, `PASSWORD`, and `LOGIN`. + +This statement is enabled by setting the YB-TServer flag [`--use_cassandra_authentication`](../../../reference/configuration/yb-tserver/#use-cassandra-authentication) to `true`. + +## Syntax + +### Diagram + +ALTERROLErole_nameWITHANDrole_property + +### Grammar + +```ebnf +alter_table ::= ALTER ROLE role_name WITH role_property [ AND role_property ...]; + +role_property ::= PASSWORD = '' + | LOGIN = ' + | SUPERUSER = '' +``` + +Where + +- `role_name` is a text identifier. + +## Semantics + +An error is raised if `role_name` does not exist. + +## Examples + +```sql +ycqlsh:example> CREATE ROLE finance; +``` + +```sql +ycqlsh:example> ALTER ROLE finance with LOGIN = true; +``` + +```sql +ycqlsh:example> ALTER ROLE finance with SUPERUSER = true; +``` + +```sql +ycqlsh:example> ALTER ROLE finance with PASSWORD = 'jsfp9ajhufans2' AND SUPERUSER = false; +``` + +## See also + +- [`CREATE ROLE`](../ddl_create_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`GRANT ROLE`](../ddl_grant_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_alter_table.md b/docs/content/v2.25/api/ycql/ddl_alter_table.md new file mode 100644 index 000000000000..dc477557c24e --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_alter_table.md @@ -0,0 +1,156 @@ +--- +title: ALTER TABLE statement [YCQL] +headerTitle: ALTER TABLE +linkTitle: ALTER TABLE +description: Use the ALTER TABLE statement to change the schema or definition of an existing table. +menu: + preview_api: + parent: api-cassandra + weight: 1220 +aliases: + - /preview/api/cassandra/ddl_alter_table + - /preview/api/ycql/ddl_alter_table +type: docs +--- + +## Synopsis + +Use the `ALTER TABLE` statement to change the schema or definition of an existing table. +It allows adding, dropping, or renaming a column as well as updating a table property. + +## Syntax + +### Diagram + +ALTERTABLEtable_nameADD,column_namecolumn_typeDROP,column_nameRENAME,column_nameTOcolumn_nameWITHANDproperty_name=property_literal + +### Grammar + +```ebnf +alter_table ::= ALTER TABLE table_name alter_operator [ alter_operator ...] + +alter_operator ::= add_op | drop_op | rename_op | property_op + +add_op ::= ADD column_name column_type [ ',' column_name column_type ...] + +drop_op ::= DROP column_name [ ',' column_name ...] + +rename_op ::= RENAME column_name TO column_name [ ',' column_name TO column_name ...] + +property_op ::= WITH property_name '=' property_literal [ AND property_name '=' property_literal ...] +``` + +Where + +- `table_name`, `column_name`, and `property_name` are identifiers (`table_name` may be qualified with a keyspace name). +- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. + +## Semantics + +- An error is raised if `table_name` does not exist in the associated keyspace. +- Columns that are part of `PRIMARY KEY` cannot be altered. +- When adding a column, its value for all existing rows in the table defaults to `null`. +- After dropping a column, all values currently stored for that column in the table are discarded (if any). + +## Examples + +### Add a column to a table + +```sql +ycqlsh:example> CREATE TABLE employees (id INT, name TEXT, salary FLOAT, PRIMARY KEY((id), name)); +``` + +```sql +ycqlsh:example> ALTER TABLE employees ADD title TEXT; +``` + +```sql +ycqlsh:example> DESCRIBE TABLE employees; +``` + +Following result would be shown. + +```output +CREATE TABLE example.employees ( + id int, + name text, + salary float, + title text, + PRIMARY KEY (id, name) +) WITH CLUSTERING ORDER BY (name ASC); +``` + +### Remove a column from a table + +```sql +ycqlsh:example> ALTER TABLE employees DROP salary; +``` + +```sql +ycqlsh:example> DESCRIBE TABLE employees; +``` + +Following result would be shown. + +```output +CREATE TABLE example.employees ( + id int, + name text, + title text, + PRIMARY KEY (id, name) +) WITH CLUSTERING ORDER BY (name ASC); +``` + +### Rename a column in a table + +```sql +ycqlsh:example> ALTER TABLE employees RENAME title TO job_title; +``` + +```sql +ycqlsh:example> DESCRIBE TABLE employees; +``` + +Following result would be shown. + +```output +CREATE TABLE example.employees ( + id int, + name text, + job_title text, + PRIMARY KEY (id, name) +) WITH CLUSTERING ORDER BY (name ASC); +``` + +### Update a table property + +You can do this as follows: + +```sql +ycqlsh:example> ALTER TABLE employees WITH default_time_to_live = 5; +``` + +```sql +ycqlsh:example> DESCRIBE TABLE employees; +``` + +Following result would be shown. + +```output +CREATE TABLE example.employees ( + id int, + name text, + job_title text, + PRIMARY KEY (id, name) +) WITH CLUSTERING ORDER BY (name ASC) + AND default_time_to_live = 5; +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DELETE`](../dml_delete/) +- [`DROP TABLE`](../ddl_drop_table) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) diff --git a/docs/content/v2.25/api/ycql/ddl_create_index.md b/docs/content/v2.25/api/ycql/ddl_create_index.md new file mode 100644 index 000000000000..e1152eec1a73 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_create_index.md @@ -0,0 +1,437 @@ +--- +title: CREATE INDEX statement [YCQL] +headerTitle: CREATE INDEX +linkTitle: CREATE INDEX +summary: Create a new index on a table +description: Use the CREATE INDEX statement to create a new index on a table. +menu: + preview_api: + parent: api-cassandra + weight: 1225 +aliases: + - /preview/api/ycql/ddl_create_index +type: docs +--- + +## Synopsis + +Use the `CREATE INDEX` statement to create a new index on a table. It defines the index name, index columns, and additional columns to include. + +{{}} +In YugabyteDB, indexes are global and are implemented just like tables. They are split into tablets and distributed across the different nodes in the cluster. The sharding of indexes is based on the primary key of the index and is independent of how the main table is sharded and distributed. Indexes are not colocated with the base table. +{{}} + +## Syntax + +### Diagram + +#### create_index + +CREATEUNIQUEDEFERREDINDEXIFNOTEXISTSindex_nameONtable_name(partition_key_columns,clustering_key_columns)covering_columnsindex_propertiesWHEREindex_predicate + +#### partition_key_columns + +index_column(,index_column) + +#### clustering_key_columns + +,index_column + +#### index_properties + +WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,index_columnASCDESC) + +#### index_column + +column_namejsonb_attribute + +#### jsonb_attribute + +column_name->'attribute_name'->>'attribute_name' + +#### covering_columns + +COVERINGINCLUDE(,column_name) + +#### index_predicate + +where_expression + +### Grammar + +```ebnf +create_index ::= CREATE [ UNIQUE ] [ DEFERRED ] INDEX + [ IF NOT EXISTS ] index_name ON table_name ( + partition_key_columns , [ clustering_key_columns ] ) + [ covering_columns ] [ index_properties ] + [ WHERE index_predicate ] + +partition_key_columns ::= index_column | ( index_column [ , ... ] ) + +clustering_key_columns ::= index_column [ , ... ] + +index_properties ::= WITH + { property_name = property_literal + | CLUSTERING ORDER BY ( + { index_column [ ASC | DESC ] } [ , ... ] ) } + [ AND ... ] + +index_column ::= column_name | jsonb_attribute + +jsonb_attribute ::= column_name [ -> 'attribute_name' [ ... ] ] ->> 'attribute_name' + +covering_columns ::= { COVERING | INCLUDE } ( column_name [ , ... ] ) + +index_predicate ::= where_expression +``` + +Where + +- `index_name`, `table_name`, `property_name`, and `column_name` are identifiers. +- `table_name` may be qualified with a keyspace name. +- `index_name` cannot be qualified with a keyspace name because an index must be created in the table's keyspace. +- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. +- `index_column` can be any data type except `MAP`, `SET`, `LIST`, `JSONB`, `USER_DEFINED_TYPE`. + + +## Semantics + +- An error is raised if transactions have not be enabled using the `WITH transactions = { 'enabled' : true }` clause on the table to be indexed. This is because secondary indexes internally use distributed transactions to ensure ACID guarantees in the updates to the secondary index and the associated primary key. More details [here](https://www.yugabyte.com/blog/yugabyte-db-1-1-new-feature-speeding-up-queries-with-secondary-indexes/). +- An error is raised if `index_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. + +{{< note title="Note" >}} + +When an index is created on an existing table, YugabyteDB will automatically backfill existing data into the index in an online manner (that is, while continuing to serve other concurrent writes and traffic). For more details on how this is done, see [Online Index Backfill](https://github.com/yugabyte/yugabyte-db/blob/master/architecture/design/online-index-backfill.md). + +{{< /note >}} + +### User-enforced consistency + +{{}} +Opt for user-enforced consistency only when there is no other solution to your problem. User-enforced consistency requires considerable user effort to keep the index and table in sync. +{{}} + +Indexes require transactions to have been enabled on the table. For cases where the table was created without enabling transactions, `consistency_level` has to be set to `user_enforced` like, + +```sql +CREATE TABLE orders (id int PRIMARY KEY, warehouse int); +CREATE INDEX ON orders (warehouse) + WITH transactions = { 'enabled' : false, 'consistency_level' : 'user_enforced' }; +``` + +{{< warning title="Syncing table and index">}} +When using an index without transactions enabled, it is the responsibility of the application to retry any insert/update/delete failures to make sure that the table and index are in sync. + +Also, if the index is created after data has been added to the table, the index may **not** be backfilled automatically depending on the setting of the `disable_index_backfill_for_non_txn_tables` flag. If set to `true`, then it is the responsibility of the user to trigger a backfill using the [yb-admin backfill_indexes_for_table](../../../admin/yb-admin/#backfill-indexes-for-table) command, which will trigger the backfill after a small delay of about a minute. This delay is controlled by the `index_backfill_upperbound_for_user_enforced_txn_duration_ms` flag. +{{< /warning >}} + +### PARTITION KEY + +- Partition key is required and defines a split of the index into _partitions_. + +### CLUSTERING KEY + +- Clustering key is optional and defines an ordering for index rows within a partition. +- Default ordering is ascending (`ASC`) but can be set for each clustering column as ascending or descending using the `CLUSTERING ORDER BY` property. +- Any primary key column of the table not indexed explicitly in `index_columns` is added as a clustering column to the index implicitly. This is necessary so that the whole primary key of the table is indexed. + +### *index_properties* + +- The `CLUSTERING ORDER BY` property can be used to set the ordering for each clustering column individually (default is `ASC`). +- The `TABLETS = ` property specifies the number of tablets to be used for the specified YCQL index. Setting this property overrides the value from the [`--yb_num_shards_per_tserver`](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) option. For an example, see [Create an index specifying the number of tablets](#create-an-index-specifying-the-number-of-tablets). +- Use the `AND` operator to use multiple index properties. +- When setting a TTL on the index using `default_time_to_live`, please ensure that the TTL value is the same as that of the table's TTL. If they are different, it would lead to the index and the table being out of sync and would lead to unexpected behavior. + +{{}} +**Caveat**: Row-level TTL cannot be set on a table with a secondary index during INSERTS/UPDATES. {{}} +{{}} + +### INCLUDED COLUMNS + +- Included columns are optional table columns whose values are copied into the index in addition to storing them in the table. When additional columns are included in the index, they can be used to respond to queries directly from the index without querying the table. + +- The following can't be added to an index's included columns: static columns of a table, expressions, and table columns with the following types: frozen, map, set, list, tuple, jsonb, and user defined. + +### UNIQUE INDEX + +- A unique index disallows duplicate values from being inserted into the indexed columns. It can be used to ensure uniqueness of index column values. + +### DEFERRED INDEX + +Currently, an "index backfill" job is launched for each index that is created. For the case where you create a table and add multiple indexes, the main table needs to be scanned multiple times to populate each index. This is unnecessary, and can also cause issues with the single touch and multi touch block cache algorithm. + +After creating a set of indexes with their backfill deferred, you can then trigger a backfill job for the entire batch of indexes (on the same table) in one of the following ways: + +- Create a new index that is not deferred: + + ```cql + CREATE DEFERRED INDEX idx_1 on table_name(col_1); // No backfill launched. + CREATE DEFERRED INDEX idx_2 on table_name(col_2); // No backfill launched. + CREATE DEFERRED INDEX idx_9 on table_name(col_9); // No backfill launched. + + + // To launch backfill ... + CREATE INDEX idx_10 on table_name(col_10); // Will launch backfill for idx_10 and + // all deferred indexes idx_1 .. idx_9 + // on the same table viz: table_name. + ``` + +- Use yb-admin to launch backfill for deferred indexes on the table. + + ```cql + CREATE DEFERRED INDEX idx_1 on table_name(col_1); // No backfill launched. + CREATE DEFERRED INDEX idx_2 on table_name(col_2); // No backfill launched. + ... + CREATE DEFERRED INDEX idx_9 on table_name(col_9); // No backfill launched. + CREATE DEFERRED INDEX idx_10 on table_name(col_10); // No backfill launched. + ``` + + Launch a backfill job for backfilling all the deferred indexes using the `backfill_indexes_for_table` command as follows: + + ```bash + bin/yb-admin --master_addresses backfill_indexes_for_table ycql.ybdemo table_name + ``` +- Use the [`--defer_index_backfill`](../../../reference/configuration/yb-master#defer-index-backfill) YB-Master flag to force all indexes to be DEFERRED, and run `yb-admin backfill_indexes_for_table` to backfill indexes. + +### PARTIAL INDEX + +- If a `WHERE` clause is specified, only rows which satisfy the `index_predicate` are indexed. +- An `index_predicate` can have sub-expressions on columns of these data types: `TINYINT`, `SMALLINT`, `INT/INTEGER`, `BIGINT`, `VARINT`, `BOOLEAN` and `TEXT` along with these operators (when applicable): `=, !=, >, <, >=, <=`. +- Partial indexes can be `UNIQUE`. A UNIQUE partial index enforces the constraint that for each possible tuple of indexed columns, only one row that satisfies the `index_predicate` is allowed in the table. +- `SELECT` queries can use a partial index for scanning if the `SELECT` statement's `where_expression` => (logically implies) `index_predicate`. + + {{< note title="Note" >}} + +- A partial index might not be chosen even if the implication holds in case there are better query plans. +- The logical implication holds if all sub-expressions of the `index_predicate` are present as is in the `where_expression`. For example, assume `where_expression = A AND B AND C`, `index_predicate_1 = A AND B`, `index_predicate_2 = A AND B AND D`, `index_predicate_3 = A AND B AND C AND D`. Then `where_expression` only implies `index_predicate_1` + +- Currently, valid mathematical implications are not taken into account when checking for logical implication. For example, even if `where_expression = x > 5` and `index_predicate = x > 4`, the `SELECT` query will not use the index for scanning. This is because the two sub-expressions `x > 5` and `x > 4` differ. + + {{< /note >}} + +- When using a prepared statement, the logical implication check (to decide if a partial index is usable), will only consider those sub-expressions of `where_expression` that don't have a bind variable. This is because the query plan is decided before execution (i.e., when a statement is prepared). + +```sql +ycqlsh:example> CREATE TABLE orders (customer_id INT, + order_date TIMESTAMP, + product JSONB, + warehouse_id INT, + amount DOUBLE, + PRIMARY KEY ((customer_id), order_date)) + WITH transactions = { 'enabled' : true }; + +ycqlsh:example> CREATE INDEX idx ON orders (warehouse_id) + WHERE warehouse_id < 100; + +ycqlsh:example> EXPLAIN SELECT product FROM orders + WHERE warehouse_id < 100 AND order_date >= ?; // Idx can be used +``` + +```output + QUERY PLAN +------------------------------------------ + Index Scan using temp.idx on temp.orders + Filter: (order_date >= :order_date) +``` + +```sql +ycqlsh:example> EXPLAIN SELECT product FROM orders + WHERE warehouse_id < ? and order_date >= ?; // Idx cannot be used +``` + +```output + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on temp.orders + Filter: (warehouse_id < :warehouse_id) AND (order_date >= :order_date) +``` + +- Without partial indexes, we do not allow many combinations of operators together on the same column in a `SELECT`'s where expression e.g.: `WHERE v1 != NULL and v1 = 5`. But if there was a partial index that subsumes some clauses of the `SELECT`'s where expression, two or more operators otherwise not supported together, might be supported. + +```sql +ycqlsh:example> EXPLAIN SELECT product FROM orders + WHERE warehouse_id != NULL AND warehouse_id = ?; +``` + +```output +SyntaxException: Invalid CQL Statement. Illogical condition for where clause +EXPLAIN SELECT product from orders where warehouse_id != NULL and warehouse_id = ?; + ^^^^^^^^^^^^ + (ql error -12) +``` + +```sql +ycqlsh:example> CREATE INDEX warehouse_idx ON orders (warehouse_id) + WHERE warehouse_id != NULL; +ycqlsh:example> EXPLAIN SELECT product FROM orders + WHERE warehouse_id != NULL AND warehouse_id = ?; // warehouse_idx can be used +``` + +```output + QUERY PLAN +---------------------------------------------------- + Index Scan using temp.warehouse_idx on temp.orders + Key Conditions: (warehouse_id = :warehouse_id) +``` + +## Examples + +### Create a table to be indexed + +'customer_id' is the partitioning column and 'order_date' is the clustering column. + +```sql +ycqlsh:example> CREATE TABLE orders (customer_id INT, + order_date TIMESTAMP, + product JSONB, + warehouse_id INT, + amount DOUBLE, + PRIMARY KEY ((customer_id), order_date)) + WITH transactions = { 'enabled' : true }; +``` + +### Create an index for query by the `order_date` column + +```sql +ycqlsh:example> CREATE INDEX orders_by_date ON orders (order_date) INCLUDE (amount); +``` + +### Create an index for query by the JSONB attribute `product->>'name'` + +```sql +ycqlsh:example> CREATE INDEX product_name + ON orders (product->>'name') INCLUDE (amount); +``` + +### Create an index for query by the `warehouse_id` column + +```sql +ycqlsh:example> CREATE INDEX orders_by_warehouse + ON orders (warehouse_id, order_date) INCLUDE (amount); +``` + +### Insert some data + +```sql +ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) + VALUES (1001, '2018-01-10', '{ "name":"desk" }', 107, 100.30); +ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) + VALUES (1002, '2018-01-11', '{ "name":"chair" }', 102, 50.45); +ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) + VALUES (1001, '2018-04-09', '{ "name":"pen" }', 102, 20.25); +ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) + VALUES (1003, '2018-04-09', '{ "name":"pencil" }', 108, 200.80); +``` + +### Query by the partition column `customer_id` in the table + +```sql +ycqlsh:example> SELECT SUM(amount) FROM orders + WHERE customer_id = 1001 AND order_date >= '2018-01-01'; +``` + +```output + sum(amount) +------------- + 120.55 +``` + +### Query by the partition column `order_date` in the index `orders_by_date` + +```sql +ycqlsh:example> SELECT SUM(amount) FROM orders + WHERE order_date = '2018-04-09'; +``` + +```output + sum(amount) +------------- + 221.05 +``` + +### Query by the partition column `product->>'name'` in the index `product_name` + +```sql +ycqlsh:example> SELECT SUM(amount) FROM orders + WHERE product->>'name' = 'desk'; +``` + +```output + sum(amount) +------------- + 100.30 +``` + +### Query by the partition column `warehouse_id` column in the index `orders_by_warehouse` + +```sql +ycqlsh:example> SELECT SUM(amount) FROM orders + WHERE warehouse_id = 102 AND order_date >= '2018-01-01'; +``` + +```output + sum(amount) +------------- + 70.7 +``` + +### Create a table with a unique index + +You can do this as follows: + +```sql +ycqlsh:example> CREATE TABLE emp (enum INT primary key, + lastname VARCHAR, + firstname VARCHAR, + userid VARCHAR) + WITH transactions = { 'enabled' : true }; +ycqlsh:example> CREATE UNIQUE INDEX emp_by_userid ON emp (userid); +``` + +### Insert values into the table and verify no duplicate `userid` is inserted + +```sql +ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) + VALUES (1001, 'Smith', 'John', 'jsmith'); +ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) + VALUES (1002, 'Smith', 'Jason', 'jsmith'); +``` + +```output +InvalidRequest: Error from server: code=2200 [Invalid query] message="SQL error: Execution Error. Duplicate value disallowed by unique index emp_by_userid +INSERT INTO emp (enum, lastname, firstname, userid) + ^^^^ +VALUES (1002, 'Smith', 'Jason', 'jsmith'); + (error -300)" +``` + +```sql +ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) + VALUES (1002, 'Smith', 'Jason', 'jasmith'); +ycqlsh:example> SELECT * FROM emp; +``` + +```output + enum | lastname | firstname | userid +------+----------+-----------+--------- + 1002 | Smith | Jason | jasmith + 1001 | Smith | John | jsmith +``` + +### Create an index specifying the number of tablets + +You can use the `CREATE INDEX` statement with the `WITH tablets = ` clause to specify the number of tablets for an index. This is useful to scale the index up or down based on requirements. +For example, for smaller or partial indexes, it may be wasteful to have a large number of shards (tablets). In that case, you can use this to reduce the number of tablets created for the index. +Similarly, for a very large index, you can use this statement to presplit the index into a large number of shards to get improved performance. + +Note that YugabyteDB, by default, presplits an index in `yb_num_shards_per_tserver * num_of_tserver` shards. This clause can be used to override that setting on per-index basis. + +```sql +ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY, a TEXT) WITH transactions = { 'enabled' : true }; +ycqlsh:example> CREATE INDEX my_indx ON tracking(a) WITH tablets = 10; +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DROP INDEX`](../ddl_drop_index) diff --git a/docs/content/v2.25/api/ycql/ddl_create_keyspace.md b/docs/content/v2.25/api/ycql/ddl_create_keyspace.md new file mode 100644 index 000000000000..8fe731bcaf8d --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_create_keyspace.md @@ -0,0 +1,88 @@ +--- +title: CREATE KEYSPACE statement [YCQL] +headerTitle: CREATE KEYSPACE +linkTitle: CREATE KEYSPACE +description: Use the CREATE KEYSPACE statement to create a keyspace that functions as a grouping mechanism for database objects, such as tables or types. +menu: + preview_api: + parent: api-cassandra + weight: 1230 +aliases: + - /preview/api/cassandra/ddl_create_keyspace + - /preview/api/ycql/ddl_create_keyspace +type: docs +--- + +## Synopsis + +Use the `CREATE KEYSPACE` statement to create a `keyspace` that functions as a grouping mechanism for database objects, (such as [tables](../ddl_create_table) or [types](../ddl_create_type)). + +## Syntax + +### Diagram + +#### create_keyspace + +CREATEKEYSPACESCHEMAIFNOTEXISTSkeyspace_namekeyspace_properties + +#### keyspace_properties + +WITHREPLICATION={,keyspace_property}ANDDURABLE_WRITES=truefalse + +### Grammar + +```ebnf +create_keyspace ::= CREATE { KEYSPACE | SCHEMA } [ IF NOT EXISTS ] keyspace_name + [ WITH REPLICATION '=' '{' keyspace_property '}'] + [ AND DURABLE_WRITES '=' { true | false } ] + +keyspace_property ::= property_name = property_value +``` + +Where + +- `keyspace_name` and `property_name` are identifiers. +- `property_value` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. + +## Semantics + +- An error is raised if the specified `keyspace_name` already exists unless `IF NOT EXISTS` option is present. +- Cassandra's CQL keyspace properties are supported in the syntax but have no effect internally (where YugabyteDB defaults are used instead). + +## Examples + +```sql +ycqlsh> CREATE KEYSPACE example; +``` + +```sql +ycqlsh> DESCRIBE KEYSPACES; +``` + +```output +example system_schema system_auth system +``` + +```sql +ycqlsh> DESCRIBE example; +``` + +```sql +CREATE KEYSPACE example WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': '3'} AND DURABLE_WRITES = true; +``` + +```sql +ycqlsh> CREATE SCHEMA example; +``` + +```output +SQL error: Keyspace Already Exists +CREATE SCHEMA example; +^^^^^^ +``` + +## See also + +- [`ALTER KEYSPACE`](../ddl_alter_keyspace) +- [`DROP KEYSPACE`](../ddl_drop_keyspace) +- [`USE`](../ddl_use) diff --git a/docs/content/v2.25/api/ycql/ddl_create_role.md b/docs/content/v2.25/api/ycql/ddl_create_role.md new file mode 100644 index 000000000000..9fe9488bbfd7 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_create_role.md @@ -0,0 +1,87 @@ +--- +title: CREATE ROLE statement [YCQL] +headerTitle: CREATE ROLE +linkTitle: CREATE ROLE +description: Use the `CREATE ROLE` statement to create a new role that is used to authenticate into YCQL and as a group of permissions used to restrict operations on the database objects. +menu: + preview_api: + parent: api-cassandra + weight: 1235 +aliases: + - /preview/api/cassandra/ddl_create_role + - /preview/api/ycql/ddl_create_role +type: docs +--- + +## Synopsis + +Use the `CREATE ROLE` statement to create a new role that is used to authenticate into YCQL and as a group of permissions is used to restrict operations on the database objects. Note that users are specific roles that are login enabled. There is no explicit `CREATE USER` command in YCQL. + +This statement is enabled by setting the YB-TServer flag [`--use_cassandra_authentication`](../../../reference/configuration/yb-tserver/#use-cassandra-authentication) to `true`. + +## Syntax + +### Diagram + +#### create_role + +CREATEROLEIFNOTEXISTSrole_nameWITHANDrole_property + +#### role_property + +PASSWORD=<Text Literal>LOGIN=<Boolean Literal>SUPERUSER=<Boolean Literal> + +### Grammar + +```ebnf +create_role ::= CREATE ROLE [ IF NOT EXISTS ] role_name + [ WITH role_property [ AND ... ] ] + +role_property ::= PASSWORD = + | LOGIN = + | SUPERUSER = +``` + +Where + +- `role_name` is a text identifier. + +## Semantics + +- An error is raised if `role_name` already exists unless the `IF NOT EXISTS` option is used. +- By default, a role does not possess the `LOGIN` privilege nor `SUPERUSER` status. +- A role with the `SUPERUSER` status possesses all the permissions on all the objects in the database even though they are not explicitly granted. +- Only a role with the `SUPERUSER` status can create another `SUPERUSER` role. +- A role with the `LOGIN` privilege can be used to authenticate into YQL. +- Only a client with the permission `CREATE` on `ALL ROLES` or with the `SUPERUSER` status can create another role. + +## Examples + +### Create a simple role with no properties + +```sql +ycqlsh:example> CREATE ROLE role1; +``` + +### Create a `SUPERUSER` role + +```sql +ycqlsh:example> CREATE ROLE role2 WITH SUPERUSER = true; +``` + +### Create a regular user with ability to log in + +You can create a regular user with login privileges as shown below. Note the `SUPERUSER` set to `false`. + +```sql +ycqlsh:example> CREATE ROLE role3 WITH SUPERUSER = false AND LOGIN = true AND PASSWORD = 'aid8134' +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`GRANT ROLE`](../ddl_grant_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_create_table.md b/docs/content/v2.25/api/ycql/ddl_create_table.md new file mode 100644 index 000000000000..306c1b29e564 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_create_table.md @@ -0,0 +1,273 @@ +--- +title: CREATE TABLE statement [YCQL] +headerTitle: CREATE TABLE +linkTitle: CREATE TABLE +description: Use the CREATE TABLE statement to create a new table in a keyspace. +menu: + preview_api: + parent: api-cassandra + weight: 1240 +aliases: + - /preview/api/ycql/ddl_create_table +type: docs +--- + +## Synopsis + +Use the `CREATE TABLE` statement to create a new table in a keyspace. It defines the table name, column names and types, primary key, and table properties. + +## Syntax + +### Diagram + +#### create_table + +CREATETABLEIFNOTEXISTStable_name(table_schema)table_properties + +#### table_schema + +,column_namecolumn_typePRIMARYKEYSTATICPRIMARYKEY((,column_name),column_name) + +#### table_properties + +WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,column_nameASCDESC)COMPACTSTORAGE + +### Grammar + +```ebnf +create_table ::= CREATE TABLE [ IF NOT EXISTS ] table_name + '(' table_element [ ',' table_element ...] ')' + [WITH table_properties]; + +table_element ::= table_column | table_constraints + +table_column ::= column_name column_type [ column_constraint ...] + +column_constraint ::= PRIMARY KEY | STATIC + +table_constraints ::= PRIMARY KEY '(' partition_key_column_list clustering_key_column_list ')' + +partition_key_column_list ::= '(' column_name [ ',' column_name ...] ')' | column_name + +clustering_key_column_list ::= [ ',' column_name ...] + +table_properties = [table_options] + [[AND] CLUSTERING ORDER BY '(' column_ordering_property [ ',' column_ordering_property ...] ')'] + [[AND] COMPACT STORAGE] + +table_options = property_name '=' property_literal [AND property_name '=' property_literal ...] + +column_ordering_property ::= column_name [ ASC | DESC ] +``` + +Where + +- `table_name`, `column_name`, and `property_name` are identifiers (`table_name` may be qualified with a keyspace name). +- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. + +## Semantics + +- An error is raised if `table_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. + +### PRIMARY KEY + +- Primary key must be defined in either `column_constraint` or `table_constraint` but not in both of them. +- Each row in a table is uniquely identified by its primary key. +- Primary key columns are either _partitioning_ columns or _clustering_ columns (described below). +- If primary key is set as a column constraint, then that column is the partition column and there are no clustering columns. +- If primary key is set as a table constraint then: + - The partition columns are given by the first entry in the primary key list: the nested column list (if given), otherwise the first column. + - The clustering columns are the rest of the columns in the primary key list (if any). +- Types `MAP`, `SET`, `LIST`, `JSONB`, `USER_DEFINED_TYPE` cannot be used in the primary key. + +#### PARTITION KEY + +- Partition key is required and defines a split of rows into _partitions_. +- Rows that share the same partition key form a partition and will be colocated on the same replica node. + +#### CLUSTERING KEY + +- Clustering key is optional and defines an ordering for rows within a partition. +- Default ordering is ascending (`ASC`) but can be set for each clustering column as ascending or descending using the `CLUSTERING ORDER BY` table property. + +### STATIC COLUMNS + +- Columns declared as `STATIC` will share the same value for all rows within a partition (that is, rows having the same partition key). +- Columns in the primary key cannot be static. +- A table without clustering columns cannot have static columns (without clustering columns the primary key and the partition key are identical so static columns would be the same as regular columns). + +### *table_properties* + +- The `CLUSTERING ORDER BY` property can be used to set the ordering for each clustering column individually (default is `ASC`). +- The `default_time_to_live` property sets the default expiration time (TTL) in seconds for a table. The expiration time can be overridden by setting TTL for individual rows. The default value is `0` and means rows do not expire. +- The `transactions` property specifies if distributed transactions are enabled in the table. To enable distributed transactions, use `transactions = { 'enabled' : true }`. +- Use the `AND` operator to use multiple table properties. +- The other YCQL table properties are allowed in the syntax but are currently ignored internally (have no effect). +- The `TABLETS = ` property specifies the number of tablets to be used for the specified YCQL table. Setting this property overrides the value from the [`--yb_num_shards_per_tserver`](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) option. For an example, see [Create a table specifying the number of tablets](#create-a-table-specifying-the-number-of-tablets). +- `COMPACT STORAGE` is only for syntax compatibility with Cassandra. It doesn't affect the underlying storage. + +## Examples + +### Use column constraint to define primary key + +'user_id' is the partitioning column and there are no clustering columns. + +```sql +ycqlsh:example> CREATE TABLE users(user_id INT PRIMARY KEY, full_name TEXT); +``` + +### Use table constraint to define primary key + +'supplier_id' and 'device_id' are the partitioning columns and 'model_year' is the clustering column. + +```sql +ycqlsh:example> CREATE TABLE devices(supplier_id INT, + device_id INT, + model_year INT, + device_name TEXT, + PRIMARY KEY((supplier_id, device_id), model_year)); +``` + +### Use column constraint to define a static column + +You can do this as follows: + +```sql +ycqlsh:example> CREATE TABLE items(supplier_id INT, + item_id INT, + supplier_name TEXT STATIC, + item_name TEXT, + PRIMARY KEY((supplier_id), item_id)); +``` + +```sql +ycqlsh:example> INSERT INTO items(supplier_id, item_id, supplier_name, item_name) + VALUES (1, 1, 'Unknown', 'Wrought Anvil'); +``` + +```sql +ycqlsh:example> INSERT INTO items(supplier_id, item_id, supplier_name, item_name) + VALUES (1, 2, 'Acme Corporation', 'Giant Rubber Band'); +``` + +```sql +ycqlsh:example> SELECT * FROM items; +``` + +```output + supplier_id | item_id | supplier_name | item_name +-------------+---------+------------------+------------------- + 1 | 1 | Acme Corporation | Wrought Anvil + 1 | 2 | Acme Corporation | Giant Rubber Band +``` + +### Use table property to define the order (ascending or descending) for clustering columns + +Timestamp column 'ts' will be stored in descending order (latest values first). + +```sql +ycqlsh:example> CREATE TABLE user_actions(user_id INT, + ts TIMESTAMP, + action TEXT, + PRIMARY KEY((user_id), ts)) + WITH CLUSTERING ORDER BY (ts DESC); +``` + +```sql +ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:15', 'log in'); +``` + +```sql +ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:25', 'change password'); +``` + +```sql +ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:35', 'log out'); +``` + +```sql +ycqlsh:example> SELECT * FROM user_actions; +``` + +```output + user_id | ts | action +---------+---------------------------------+----------------- + 1 | 2000-12-02 19:30:35.000000+0000 | log out + 1 | 2000-12-02 19:30:25.000000+0000 | change password + 1 | 2000-12-02 19:30:15.000000+0000 | log in +``` + +### Use table property to define the default expiration time for rows + +You can do this as follows: + +```sql +ycqlsh:example> CREATE TABLE sensor_data(sensor_id INT, + ts TIMESTAMP, + value DOUBLE, + PRIMARY KEY((sensor_id), ts)) + WITH default_time_to_live = 5; +``` + +First insert at time T (row expires at T + 5). + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (1, '2017-10-1 11:22:31', 3.1); +``` + +Second insert 3 seconds later (row expires at T + 8). + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (2, '2017-10-1 11:22:34', 3.4); +``` + +First select 3 seconds later (at time T + 6). + +```sql +ycqlsh:example> SELECT * FROM sensor_data; +``` + +```output + sensor_id | ts | value +-----------+---------------------------------+------- + 2 | 2017-10-01 18:22:34.000000+0000 | 3.4 +``` + +Second select 3 seconds later (at time T + 9). + +```sql +ycqlsh:example> SELECT * FROM sensor_data; +``` + +```output + sensor_id | ts | value +-----------+----+------- + +``` + +### Create a table specifying the number of tablets + +You can use the `CREATE TABLE` statement with the `WITH tablets = ` clause to specify the number of tablets for a table. This is useful to scale the table up or down based on requirements. For example, for smaller static tables, it may be wasteful to have a large number of shards (tablets). In that case, you can use this to reduce the number of tablets created for the table. Similarly, for a very large table, you can use this statement to presplit the table into a large number of shards to get improved performance. + +Note that YugabyteDB, by default, presplits a table in `yb_num_shards_per_tserver * num_of_tserver` shards. This clause can be used to override that setting on per-table basis. + +```sql +ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY) WITH tablets = 10; +``` + +If you create an index for these tables, you can also specify the number of tablets for the index. + +You can also use `AND` to add other table properties, like in this example. + +```sql +ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY) WITH tablets = 10 AND transactions = { 'enabled' : true }; +``` + +## See also + +- [`ALTER TABLE`](../ddl_alter_table) +- [`DELETE`](../dml_delete/) +- [`DROP TABLE`](../ddl_drop_table) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) diff --git a/docs/content/v2.25/api/ycql/ddl_create_type.md b/docs/content/v2.25/api/ycql/ddl_create_type.md new file mode 100644 index 000000000000..634f76480b65 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_create_type.md @@ -0,0 +1,87 @@ +--- +title: CREATE TYPE statement [YCQL] +headerTitle: CREATE TYPE +linkTitle: CREATE TYPE +description: Use the CREATE TYPE statement to create a new user-defined data type in a keyspace. +menu: + preview_api: + parent: api-cassandra + weight: 1250 +aliases: + - /preview/api/cassandra/ddl_create_type + - /preview/api/ycql/ddl_create_type +type: docs +--- + +## Synopsis + +Use the `CREATE TYPE` statement to create a new user-defined data type in a keyspace. It defines the name of the user-defined type and the names and data types for its fields. + +## Syntax + +### Diagram + +CREATETYPEIFNOTEXISTStype_name(,field_namefield_type) + +### Grammar + +```ebnf +create_type ::= CREATE TYPE [ IF NOT EXISTS ] type_name + (field_name field_type [ ',' field_name field_type ...]); +``` + +Where + +- `type_name` and `field_name` are identifiers (`type_name` may be qualified with a keyspace name). +- `field_type` is a data type. + +## Semantics + +- An error is raised if the specified `type_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. +- Each `field_name` must each be unique (a type cannot have two fields of the same name). +- Each `field_type` must be either a [non-parametric type](../#data-types) or a [frozen type](../type_frozen). + +## Examples + +Collection types must be frozen to be used inside a user-defined type. + +```sql +ycqlsh:example> CREATE TYPE person(first_name TEXT, last_name TEXT, emails FROZEN>); +``` + +```sql +ycqlsh:example> DESCRIBE TYPE person; +``` + +```output +CREATE TYPE example.person ( + first_name text, + last_name text, + emails frozen> +); +``` + +```sql +ycqlsh:example> CREATE TABLE employees(employee_id INT PRIMARY KEY, employee person); +``` + +```sql +ycqlsh:example> INSERT INTO employees(employee_id, employee) + VALUES (1, {first_name : 'John', last_name : 'Doe', emails : ['jdoe@example.com']}); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + employee_id | employee +-------------+--------------------------------------------------------------------------- + 1 | {first_name: 'John', last_name: 'Doe', emails: ['john.doe@yugabyte.com']} + +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DROP TYPE`](../ddl_drop_type) diff --git a/docs/content/v2.25/api/ycql/ddl_drop_index.md b/docs/content/v2.25/api/ycql/ddl_drop_index.md new file mode 100644 index 000000000000..2196fda039f5 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_drop_index.md @@ -0,0 +1,58 @@ +--- +title: DROP INDEX statement [YCQL] +headerTitle: DROP INDEX +linkTitle: DROP INDEX +description: Use the DROP INDEX statement to remove an index and all of its data from the database. +menu: + preview_api: + parent: api-cassandra + weight: 1255 +aliases: + - /preview/api/cassandra/ddl_drop_index + - /preview/api/ycql/ddl_drop_index +type: docs +--- + +## Synopsis + +Use the `DROP INDEX` statement to remove an index and all of its data from the database. + +## Syntax + +### Diagram + +DROPINDEXIFEXISTSindex_name + +### Grammar + +```ebnf +drop_index ::= DROP INDEX [ IF EXISTS ] index_name; +``` + +Where + +- `index_name` is an identifier (possibly qualified with a keyspace name). + +## Semantics + +- An error is raised if the specified `index_name` does not exist unless `IF EXISTS` option is present. +- Associated objects to `index_name` such as prepared statements will be eventually invalidated after the drop statement is completed. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE users(id INT PRIMARY KEY, name TEXT) WITH transactions = { 'enabled' : true }; +``` + +```sql +ycqlsh:example> CREATE INDEX users_by_name ON users(name); +``` + +```sql +ycqlsh:example> DROP INDEX users_by_name; +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`CREATE INDEX`](../ddl_create_index/) diff --git a/docs/content/v2.25/api/ycql/ddl_drop_keyspace.md b/docs/content/v2.25/api/ycql/ddl_drop_keyspace.md new file mode 100644 index 000000000000..ceb8e9671051 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_drop_keyspace.md @@ -0,0 +1,59 @@ +--- +title: DROP KEYSPACE statement [YCQL] +headerTitle: DROP KEYSPACE +linkTitle: DROP KEYSPACE +description: Use the DROP KEYSPACE statement to remove a keyspace from the system. +menu: + preview_api: + parent: api-cassandra + weight: 1260 +aliases: + - /preview/api/cassandra/ddl_drop_keyspace + - /preview/api/ycql/ddl_drop_keyspace +type: docs +--- + +## Synopsis + +Use the `DROP KEYSPACE` statement to remove a keyspace from the system. + +## Syntax + +### Diagram + +DROPKEYSPACESCHEMAIFEXISTSkeyspace_name + +### Grammar + +```ebnf +drop_keyspace ::= DROP { KEYSPACE | SCHEMA } [ IF EXISTS ] keyspace_name; +``` + +Where + +- `keyspace_name` is an identifier. + +## Semantics + +- An error is raised if the specified `keyspace_name` does not exist unless `IF EXISTS` option is present. +- An error is raised if the specified keyspace is non-empty (contains tables or types). + +## Examples + +```sql +ycqlsh> CREATE KEYSPACE example; +``` + +```sql +ycqlsh> DROP KEYSPACE example; +``` + +```sql +ycqlsh> DROP KEYSPACE IF EXISTS example; +``` + +## See also + +- [`ALTER KEYSPACE`](../ddl_alter_keyspace) +- [`CREATE KEYSPACE`](../ddl_create_keyspace) +- [`USE`](../ddl_use) diff --git a/docs/content/v2.25/api/ycql/ddl_drop_role.md b/docs/content/v2.25/api/ycql/ddl_drop_role.md new file mode 100644 index 000000000000..0cff9686adce --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_drop_role.md @@ -0,0 +1,63 @@ +--- +title: DROP ROLE statement [YCQL] +headerTitle: DROP ROLE +linkTitle: DROP ROLE +description: Use the DROP ROLE statement to delete an existing role. +menu: + preview_api: + parent: api-cassandra + weight: 1265 +aliases: + - /preview/api/cassandra/ddl_drop_role + - /preview/api/ycql/ddl_drop_role +type: docs +--- + +## Synopsis + +Use the `DROP ROLE` statement to delete an existing role. + +This statement is enabled by setting the YB-TServer flag [`use_cassandra_authentication`](../../../reference/configuration/yb-tserver/#use-cassandra-authentication) to `true`. + +## Syntax + +### Diagram + +### drop_role + +DROPROLEIFEXISTSrole_name + +### Grammar + +```ebnf +drop_role ::= DROP ROLE [ IF EXISTS ] role_name +``` + +Where + +- `role_name` is a text identifier. + +## Semantics + +- An error is raised if `role_name` does not exist unless IF EXISTS option is present. +- Only a role with the `SUPERUSER` status can delete another `SUPERUSER` role. +- Only a client with the permission `DROP` on `ALL ROLES` or on the specified `role_name`, or with the `SUPERUSER` status can delete another role. + +## Examples + +```sql +ycqlsh:example> DROP ROLE role1; +``` + +```sql +ycqlsh:example> DROP ROLE IF EXISTS role2; +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`CREATE ROLE`](../ddl_drop_role) +- [`GRANT ROLE`](../ddl_grant_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_drop_table.md b/docs/content/v2.25/api/ycql/ddl_drop_table.md new file mode 100644 index 000000000000..e73975002c5a --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_drop_table.md @@ -0,0 +1,58 @@ +--- +title: DROP TABLE statement [YCQL] +headerTitle: DROP TABLE +linkTitle: DROP TABLE +description: Use the DROP TABLE statement to remove a table and all of its data from the database. +menu: + preview_api: + parent: api-cassandra + weight: 1270 +aliases: + - /preview/api/cassandra/ddl_drop_table + - /preview/api/ycql/ddl_drop_table +type: docs +--- + +## Synopsis + +Use the `DROP TABLE` statement to remove a table and all of its data from the database. + +## Syntax + +### Diagram + +DROPTABLEIFEXISTStable_name + +### Grammar + +```ebnf +drop_table ::= DROP TABLE [ IF EXISTS ] table_name; +``` + +Where + +- `table_name` is an identifier (possibly qualified with a keyspace name). + +## Semantics + +- An error is raised if the specified `table_name` does not exist unless `IF EXISTS` option is present. +- Associated objects to `table_name` such as prepared statements will be eventually invalidated after the drop statement is completed. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE users(id INT PRIMARY KEY, name TEXT); +``` + +```sql +ycqlsh:example> DROP TABLE users; +``` + +## See also + +- [`ALTER TABLE`](../ddl_alter_table) +- [`CREATE TABLE`](../ddl_create_table) +- [`DELETE`](../dml_delete/) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) diff --git a/docs/content/v2.25/api/ycql/ddl_drop_type.md b/docs/content/v2.25/api/ycql/ddl_drop_type.md new file mode 100644 index 000000000000..a38741ec544c --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_drop_type.md @@ -0,0 +1,54 @@ +--- +title: DROP TYPE statement [YCQL] +headerTitle: DROP TYPE +linkTitle: DROP TYPE +description: Use the DROP TYPE statement to remove an existing user-defined data type. +menu: + preview_api: + parent: api-cassandra + weight: 1280 +aliases: + - /preview/api/cassandra/ddl_drop_type + - /preview/api/ycql/ddl_drop_type +type: docs +--- + +## Synopsis + +Use the `DROP TYPE` statement to remove an existing user-defined data type. + +## Syntax + +### Diagram + +DROPTYPEIFEXISTStype_name + +### Grammar + +```ebnf +drop_type ::= DROP TYPE [ IF EXISTS ] type_name; +``` + +Where + +- `type_name` is an identifier (possibly qualified with a keyspace name). + +## Semantics + +- An error is raised if the specified `type_name` does not exist unless `IF EXISTS` option is used. +- A user-defined `type_name` cannot be dropped if it is currently used in a table or another type. + +## Examples + +```sql +ycqlsh:example> CREATE TYPE person(first_name TEXT, last_name TEXT, email TEXT); +``` + +```sql +ycqlsh:example> DROP TYPE person; +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DROP KEYSPACE`](../ddl_drop_keyspace) diff --git a/docs/content/v2.25/api/ycql/ddl_grant_permission.md b/docs/content/v2.25/api/ycql/ddl_grant_permission.md new file mode 100644 index 000000000000..dadce9c0238f --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_grant_permission.md @@ -0,0 +1,119 @@ +--- +title: GRANT PERMISSION statement [YCQL] +headerTitle: GRANT PERMISSION +linkTitle: GRANT PERMISSION +description: Use the GRANT PERMISSION statement to grant a permission (or all the available permissions) to a role. +menu: + preview_api: + parent: api-cassandra + weight: 1281 +aliases: + - /preview/api/cassandra/ddl_grant_permission + - /preview/api/ycql/ddl_grant_permission +type: docs +--- + +## Synopsis + +Use the GRANT PERMISSION statement to grant a permission (or all the available permissions) to a role. + +When a database object is created (keyspace, table, or role), an automatic and explicit grant of all the permissions relevant to the object are granted to the role creating it. + +This statement is enabled by setting the YB-TServer flag [--use_cassandra_authentication](../../../reference/configuration/yb-tserver/#ycql) to `true`. + +## Syntax + +### Diagram + +#### grant_permission + +GRANTall_permissionspermissionONresourceTOrole_name + +#### all_permissions + +ALLPERMISSIONS + +#### permission + +CREATEALTERDROPSELECTMODIFYAUTHORIZEDESCRIBEEXECUTEPERMISSION + +#### resource + +ALLKEYSPACESROLESKEYSPACEkeyspace_nameTABLEtable_nameROLErole_name + +### Grammar + +```ebnf +grant_permission := GRANT all_permission | permission ON resource TO role_name; +all_permissions := ALL [ PERMISSIONS ] +permission := ( CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESCRIBE | EXECUTE ) [ PERMISSION ] +resource := ALL ( KEYSPACES | ROLES ) | KEYSPACE keyspace_name | [ TABLE ] table_name | ROLE role_name; +``` + +Where + +- `keyspace_name`, `table_name`, and `role_name` are text identifiers (`table_name` may be qualified with a keyspace name). + +## Semantics + +- Permission `AUTHORIZE` on `ALL ROLES` or on the role being used in the statement is necessary. Otherwise, an unauthorized error will be returned. + +## Permissions + +This section describes the permissions (represented by `ALTER`, `AUTHORIZE`, `CREATE`, `DESCRIBE`, `DROP`, `MODIFY`, and `SELECT`) that are necessary to execute operations on the database objects. A permission can be granted on a specific object (represented by resources `KEYSPACE`, `TABLE`, and `ROLE`) or on a whole group of objects (represented by resources `ALL KEYSPACES`, and `ALL ROLES`). Some permissions are granted implicitly, which means that you will never see them listed when you query `system_auth.role_permissions` table. Implicitly granted permissions follow these rules: + +- Any permission granted on `ALL KEYSPACES` is implicitly granted on every keyspace and table in the database. +- Any permission granted on a specific `KEYSPACE` is implicitly granted to any table in that keyspace. +- Any permission granted on `ALL ROLES` is implicitly granted on every role. + +### Permissions needed to execute specific operations on a database object + +Operation| Permission| Resource| +---------|-----------|---------| +`ALTER KEYSPACE`| `ALTER`| `ALL KEYSPACES`, or `KEYSPACE` +`ALTER ROLE`| `ALTER`| `ALL ROLES`, or `ROLE` +`ALTER TABLE`| `ALTER`| `ALL KEYSPACES`, `KEYSPACE`, or `TABLE` +`CREATE KEYSPACE`| `CREATE`| `ALL KEYSPACES` +`CREATE ROLE`| `CREATE`| `ALL ROLES` +`CREATE TABLE`| `CREATE`| `ALL KEYSPACES`, `KEYSPACE` +`DROP KEYSPACE`| `DROP`| `ALL KEYSPACES`, or `KEYSPACE` +`DROP ROLE`| `DROP`| `ALL ROLES`, or `ROLE` +`DROP TABLE`| `DROP`| `ALL KEYSPACES`, `KEYSPACE`, or `TABLE` +`GRANT PERMISSION` or `REVOKE PERMISSION` on `ALL KEYSPACES`| `AUTHORIZE`| `ALL KEYSPACES` +`GRANT PERMISSION` or `REVOKE PERMISSION` on `ALL ROLES`| `AUTHORIZE`| `ALL ROLES` +`GRANT PERMISSION` or `REVOKE PERMISSION` on a keyspace| `AUTHORIZE`| `ALL KEYSPACES`, or `KEYSPACE` +`GRANT PERMISSION` or `REVOKE PERMISSION` on a role| `AUTHORIZE` | `ALL ROLES`, or `ROLE` +`GRANT PERMISSION` or `REVOKE PERMISSION` on a table| `AUTHORIZE`| `ALL KEYSPACES`, `KEYSPACE`, or `TABLE` +`GRANT ROLE` or `REVOKE ROLE`| `AUTHORIZE` | `ALL ROLES`, or `ROLE` +`INSERT`, `UPDATE`, `DELETE`, or `TRUNCATE`| `MODIFY`| `ALL KEYSPACES`, `KEYSPACE`, or `TABLE` +`LIST ROLES` (not yet implemented)| `DESCRIBE`| `ALL ROLES` +`SELECT`| `SELECT`| `ALL KEYSPACES`, `KEYSPACE`, or `TABLE` + +## Examples + +### Grant `MODIFY` permission on a table so role `qa` can insert rows into a table + +```sql +ycqlsh:example> GRANT MODIFY ON TABLE performance_tests.metrics TO qa; +``` + +### Grant `SELECT` permission on a table so role `qa` can read the table + +```sql +ycqlsh:example> GRANT SELECT ON performance_tests.metrics TO qa; +``` + +### Grant `CREATE` permission on `ALL KEYSPACES` so role `tests` can create new keyspaces + +```sql +ycqlsh:example> GRANT CREATE ON ALL KEYSPACES TO tests; +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`CREATE ROLE`](../ddl_create_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_grant_role.md b/docs/content/v2.25/api/ycql/ddl_grant_role.md new file mode 100644 index 000000000000..114d9b2f74b7 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_grant_role.md @@ -0,0 +1,61 @@ +--- +title: GRANT ROLE statement [YCQL] +headerTitle: GRANT ROLE +linkTitle: GRANT ROLE +description: Use the GRANT ROLE statement to grant a role's permissions and SUPERUSER status to another role. +menu: + preview_api: + parent: api-cassandra + weight: 1282 +aliases: + - /preview/api/cassandra/ddl_grant_role + - /preview/api/ycql/ddl_grant_role +type: docs +--- + +## Synopsis + +Use the `GRANT ROLE` statement to grant a role's permissions and SUPERUSER status to another role. More than one role can be granted to another role, and the receiving role will possess the union of all the permissions from the roles granted to it (either directly of indirectly through inheritance) plus the SUPERUSER status if any of the roles granted to it has it. For example, if A is granted to B, and B is granted to C, C will be granted all the permissions from A and B, and if either A or B is a SUPERUSER, then C will also be a SUPERUSER. + +Granted roles form an acyclic graph, in other words, a role cannot be granted to any of the roles granted to it either directly or indirectly. For example, if A is granted to B, and B granted to C, C cannot be granted to neither A, B, nor C. + +This statement is enabled by setting the YB-TServer flag [--use_cassandra_authentication](../../../reference/configuration/yb-tserver/#ycql) to `true`. + +## Syntax + +### Diagram + +#### grant_role + +GRANTrole_nameTOrole_name + +### Grammar + +```ebnf +grant_role ::= GRANT ROLE role_name TO role_name +``` + +Where + +- `role_name` is a text identifier. + +## Semantics + +- Both roles must exist or an error will be raised. +- Permission `AUTHORIZE` on `ALL ROLES` or on the roles being used in the statement is necessary. Otherwise, an unauthorized error will be returned. +- If a role is granted to any role granted to it (either directly or indirectly), an error will be raised. + +## Examples + +```sql +ycqlsh:example> GRANT ROLE eng to robert; +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`CREATE ROLE`](../ddl_create_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_revoke_permission.md b/docs/content/v2.25/api/ycql/ddl_revoke_permission.md new file mode 100644 index 000000000000..392ab683fe68 --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_revoke_permission.md @@ -0,0 +1,74 @@ +--- +title: REVOKE PERMISSION statement [YCQL] +headerTitle: REVOKE PERMISSION +linkTitle: REVOKE PERMISSION +description: Use the REVOKE PERMISSION statement to revoke a permission (or all the granted permissions) from a role. +menu: + preview_api: + parent: api-cassandra + weight: 1283 +aliases: + - /preview/api/cassandra/ddl_revoke_permission + - /preview/api/ycql/ddl_revoke_permission +type: docs +--- + +## Synopsis + +Use the `REVOKE PERMISSION` statement to revoke a permission (or all the granted permissions) from a role. + +When a database object is deleted (keyspace, table, or role), all the permissions on that object are automatically deleted. + +This statement is enabled by setting the YB-TServer flag [--use_cassandra_authentication](../../../reference/configuration/yb-tserver/#ycql) to `true`. + +## Syntax + +### Diagram + +#### revoke_permission + +REVOKEall_permissionspermissionONresourceFROMrole_name + +#### all_permissions + +ALLPERMISSIONS + +#### permission + +CREATEALTERDROPSELECTMODIFYAUTHORIZEDESCRIBEEXECUTEPERMISSION + +#### resource + +ALLKEYSPACESROLESKEYSPACEkeyspace_nameTABLEtable_nameROLErole_name + +### Grammar + +```ebnf +revoke_permission := REVOKE all_permission | permission ON resource FROM role_name; +all_permissions := ALL [ PERMISSIONS ] +permission := ( CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESCRIBE | EXECUTE ) [ PERMISSION ] +resource := ALL ( KEYSPACES | ROLES ) | KEYSPACE keyspace_name | [ TABLE ] table_name | ROLE role_name; +``` + +Where + +- `keyspace_name`, `table_name`, and `role_name` are text identifiers (`table_name` may be qualified with a keyspace name). + +## Semantics + +Permission `AUTHORIZE` on `ALL ROLES` or on the role being used in the statement is necessary. Otherwise, an unauthorized error will be returned. + +## Examples + +```sql +ycqlsh:example> REVOKE CREATE ON KEYSPACE qa FROM fred; +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`CREATE ROLE`](../ddl_create_role) +- [`GRANT ROLE`](../ddl_grant_role) +- [`REVOKE ROLE`](../ddl_revoke_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_revoke_role.md b/docs/content/v2.25/api/ycql/ddl_revoke_role.md new file mode 100644 index 000000000000..736204af108e --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_revoke_role.md @@ -0,0 +1,59 @@ +--- +title: REVOKE ROLE statement [YCQL] +headerTitle: REVOKE ROLE +linkTitle: REVOKE ROLE +description: Use the `REVOKE ROLE` statement to revoke a role (which represents a group of permissions and the SUPERUSER status) from another role. +menu: + preview_api: + parent: api-cassandra + weight: 1284 +aliases: + - /preview/api/cassandra/ddl_revoke_role + - /preview/api/ycql/ddl_revoke_role +type: docs +--- + +## Synopsis + +Use the `REVOKE ROLE` statement to revoke a role (which represents a group of permissions and the SUPERUSER status) from another role. + +This statement is enabled by setting the YB-TServer flag [--use_cassandra_authentication](../../../reference/configuration/yb-tserver/#ycql) to `true`. + +## Syntax + +### Diagram + +#### revoke_role + +REVOKErole_nameFROMrole_name + +### Grammar + +```ebnf +revoke_role ::= REVOKE ROLE role_name FROM role_name +``` + +Where + +- `role_name` is a text identifier. + +## Semantics + +- Both roles must exist or an error will be raised. +- Permission `AUTHORIZE` on `ALL ROLES` or on the roles being used in the statement is necessary. Otherwise, an unauthorized error will be returned. +- You cannot revoke a role that hasn't been granted or an error will be raised. + +## Examples + +```sql +ycqlsh:example> REVOKE ROLE project_y from diana; +``` + +## See also + +- [`ALTER ROLE`](../ddl_alter_role) +- [`DROP ROLE`](../ddl_drop_role) +- [`CREATE ROLE`](../ddl_create_role) +- [`GRANT ROLE`](../ddl_grant_role) +- [`GRANT PERMISSION`](../ddl_grant_permission) +- [`REVOKE PERMISSION`](../ddl_revoke_permission) diff --git a/docs/content/v2.25/api/ycql/ddl_use.md b/docs/content/v2.25/api/ycql/ddl_use.md new file mode 100644 index 000000000000..33f67bd3b19a --- /dev/null +++ b/docs/content/v2.25/api/ycql/ddl_use.md @@ -0,0 +1,89 @@ +--- +title: USE statement [YCQL] +headerTitle: USE +linkTitle: USE +description: Use the USE statement to specify a default keyspace for the current client session. +menu: + preview_api: + parent: api-cassandra + weight: 1290 +aliases: + - /preview/api/cassandra/ddl_use + - /preview/api/ycql/ddl_use +type: docs +--- + +## Synopsis + +Use the `USE` statement to specify a default keyspace for the current client session. When a database object (such as [table](../ddl_create_table) or [type](../ddl_create_type)) name does not identify a keyspace, this default keyspace is used. + +## Syntax + +### Diagram + +USEkeyspace_name + +### Grammar + +```ebnf +use_keyspace ::= USE keyspace_name; +``` + +Where + +- `keyspace_name` must be an identifier that cannot be any reserved keyword and cannot contains whitespaces, or it has to be double-quoted. + +## Semantics + +- If the specified keyspace does not exist, an error is raised. +- Any unqualified table or type name will use the current default keyspace (or raise an error if no keyspace is set). + +## Examples + +### Create and use keyspaces + +```sql +ycqlsh> CREATE KEYSPACE example; +``` + +```sql +ycqlsh> CREATE KEYSPACE other_keyspace; +``` + +```sql +ycqlsh> USE example; +``` + +### Create a table in the current keyspace + +``` sql +ycqlsh:example> CREATE TABLE test(id INT PRIMARY KEY); +ycqlsh:example> INSERT INTO test(id) VALUES (1); +ycqlsh:example> SELECT * FROM test; +``` + +```output + id +---- + 1 +``` + +### Create a table in another keyspace + +``` sql +ycqlsh:example> CREATE TABLE other_keyspace.test(id INT PRIMARY KEY); +ycqlsh:example> INSERT INTO other_keyspace.test(id) VALUES (2); +ycqlsh:example> SELECT * FROM other_keyspace.test; +``` + +```output + id +---- + 2 +``` + +## See also + +- [`ALTER KEYSPACE`](../ddl_alter_keyspace) +- [`CREATE KEYSPACE`](../ddl_create_keyspace) +- [`DROP KEYSPACE`](../ddl_drop_keyspace) diff --git a/docs/content/v2.25/api/ycql/dml_delete.md b/docs/content/v2.25/api/ycql/dml_delete.md new file mode 100644 index 000000000000..4ad9057a75c7 --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_delete.md @@ -0,0 +1,294 @@ +--- +title: DELETE statement [YCQL] +headerTitle: DELETE +linkTitle: DELETE +description: Use the DELETE statement to remove rows from a specified table that meet a given condition. +menu: + preview_api: + parent: api-cassandra + weight: 1330 +aliases: + - /preview/api/cassandra/ddl_delete + - /preview/api/ycql/ddl_delete +type: docs +--- + +## Synopsis + +Use the `DELETE` statement to remove rows from a specified table that meet a given condition. + +## Syntax + +### Diagram + +DELETEFROMtable_nameUSINGTIMESTAMPtimestamp_expressionWHEREwhere_expressionIFNOTEXISTSif_expressionRETURNS STATUS AS ROW + +### Grammar + +```ebnf +delete ::= DELETE FROM table_name + [ USING TIMESTAMP timestamp_expression ] WHERE + where_expression [ IF { [ NOT ] EXISTS | if_expression } ] + [ RETURNS STATUS AS ROW ] +``` + +Where + +- `table_name` is an identifier (possibly qualified with a keyspace name). +- Restrictions on `where_expression` and `if_expression` are covered in the Semantics section. +- See [Expressions](..#expressions) for more information on syntax rules. + +## Semantics + +- An error is raised if the specified `table_name` does not exist. +- The `where_expression` and `if_expression` must evaluate to [boolean](../type_bool) values. +- The `USING TIMESTAMP` clause indicates you would like to perform the DELETE as if it was done at the + timestamp provided by the user. The timestamp is the number of microseconds since epoch. +- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of + them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to + very confusing results. +- `DELETE` is always done at `QUORUM` consistency level irrespective of setting. + +### WHERE Clause + +- The `where_expression` must specify conditions for all primary-key columns. +- The `where_expression` must not specify conditions for any regular columns. +- The `where_expression` can only apply `AND` and `=` operators. Other operators are not yet supported. + +### IF Clause + +- The `if_expression` can only apply to non-key columns (regular columns). +- The `if_expression` can contain any logical and boolean operators. +- Deleting only some column values from a row is not yet supported. +- `IF EXISTS` and `IF NOT EXISTS` options are mostly for symmetry with the [`INSERT`](../dml_insert) and [`UPDATE`](../dml_update/) commands. + - `IF EXISTS` works like a normal delete but additionally returns whether the delete was applied (a row was found with that primary key). + - `IF NOT EXISTS` is effectively a no-op since rows that do not exist cannot be deleted (but returns whether no row was found with that primary key). + +### `USING` Clause + +The `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). + +## Examples + +### Delete a row from a table + +```sql +ycqlsh:example> CREATE TABLE employees(department_id INT, + employee_id INT, + name TEXT, + PRIMARY KEY(department_id, employee_id)); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe'); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 1 | John + 1 | 2 | Jane + 2 | 1 | Joe +``` + +Delete statements identify rows by the primary key columns. + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 1 AND employee_id = 1; +``` + +Deletes on non-existent rows are no-ops. + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 3 AND employee_id = 1; +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 2 | Jane + 2 | 1 | Joe +``` + +### Conditional delete using the `IF` clause + +'IF' clause conditions will return whether they were applied or not. + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 2 AND employee_id = 1 IF name = 'Joe'; +``` + +```output + [applied] +----------- + True +``` + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 3 AND employee_id = 1 IF EXISTS; +``` + +```output + [applied] +----------- + False +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 2 | Jane +``` + +### Delete several rows with the same partition key + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 2, 'Jack'); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 1 | John + 1 | 2 | Jane + 2 | 1 | Joe + 2 | 2 | Jack +``` + +Delete all entries for a partition key. + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 1; +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + 2 | 2 | Jack +``` + +Delete a range of entries within a partition key. + +```sql +ycqlsh:example> DELETE FROM employees WHERE department_id = 2 AND employee_id >= 2 AND employee_id < 4; +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe +``` + +### Delete with the `USING TIMESTAMP` clause + +You can do this as follows: + +```sql +ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (4, 4, 'Ted') USING TIMESTAMP 1000; +``` + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 4 | 4 | Ted + 2 | 1 | Joe + +(2 rows) +``` + +```sql +ycqlsh:foo> DELETE FROM employees USING TIMESTAMP 500 WHERE department_id = 4 AND employee_id = 4; +``` + +Not applied since timestamp is lower than 1000 + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 4 | 4 | Ted + 2 | 1 | Joe + +(2 rows) +``` + +```sql +ycqlsh:foo> DELETE FROM employees USING TIMESTAMP 1500 WHERE department_id = 4 AND employee_id = 4; +``` + +Applied since timestamp is higher than 1000. + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + +(1 rows) +``` + +### RETURNS STATUS AS ROW + +When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. + +See examples in [batch docs](../batch#row-status). + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) +- [`TRUNCATE`](../dml_truncate) +- [`Expression`](..#expressions) diff --git a/docs/content/v2.25/api/ycql/dml_insert.md b/docs/content/v2.25/api/ycql/dml_insert.md new file mode 100644 index 000000000000..aa8efabda674 --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_insert.md @@ -0,0 +1,275 @@ +--- +title: INSERT statement [YCQL] +headerTitle: INSERT +linkTitle: INSERT +description: Use the INSERT statement to add a row to a specified table. +menu: + preview_api: + parent: api-cassandra + weight: 1300 +aliases: + - /preview/api/cassandra/dml_insert + - /preview/api/ycql/dml_insert +type: docs +--- + +## Synopsis + +Use the `INSERT` statement to add a row to a specified table. + +## Syntax + +### Diagram + +INSERTINTOtable_name(,column_name)VALUES(,expression)IFNOTEXISTSif_expressionUSINGusing_expressionRETURNS STATUS AS ROW + +### using_expression + +```ebnf +using_expression = ttl_or_timestamp_expression { 'AND' ttl_or_timestamp_expression }; +``` + +ANDttl_or_timestamp_expression + +### ttl_or_timestamp_expression + +```ebnf +ttl_or_timestamp_expression = 'TTL' ttl_expression | 'TIMESTAMP' timestamp_expression; +``` + +TTLttl_expressionTIMESTAMPtimestamp_expression + +### Grammar + +```ebnf +insert ::= INSERT INTO table_name ( column_name [ , ... ] ) VALUES ( + expression [ , ... ] ) + [ IF { [ NOT ] EXISTS | if_expression } ] + [ USING using_expression ] + [ RETURNS STATUS AS ROW ] +``` + +Where + +- `table_name` and `column` are identifiers (`table_name` may be qualified with a keyspace name). +- `value` can be any expression although Apache Cassandra requires that `value`s must be literals. +- Restrictions for `if_expression` and `ttl_expression` are covered in the Semantics section. +- See [Expressions](..#expressions) for more information on syntax rules. + +## Semantics + +- An error is raised if the specified `table_name` does not exist. +- The columns list must include all primary key columns. +- The `USING TIMESTAMP` clause indicates you would like to perform the INSERT as if it was done at the + timestamp provided by the user. The timestamp is the number of microseconds since epoch. +- By default `INSERT` has `upsert` semantics, that is, if the row already exists, it behaves like an `UPDATE`. If pure + `INSERT` semantics is desired then the `IF NOT EXISTS` clause can be used to make sure an existing row is not overwritten by the `INSERT`. +- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of + them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to + very confusing results. +- Inserting rows with TTL is not supported on tables with [transactions enabled](./../ddl_create_table#table-properties-1). +- `INSERT` is always done at `QUORUM` consistency level irrespective of setting. + +### `VALUES` clause + +- The values list must have the same length as the columns list. +- Each value must be convertible to its corresponding (by position) column type. +- Each value literal can be an expression that evaluates to a simple value. + +### `IF` clause + +- The `if_expression` can only apply to non-key columns (regular columns). +- The `if_expression` can contain any logical and boolean operators. + +### `USING` clause + +- `ttl_expression` must be an integer value (or a bind variable marker for prepared statements). +- `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). + +## Examples + +### Insert a row into a table + +```sql +ycqlsh:example> CREATE TABLE employees(department_id INT, + employee_id INT, + name TEXT, + PRIMARY KEY(department_id, employee_id)); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 1 | John + 1 | 2 | Jane +``` + +### Conditional insert using the `IF` clause + +Example 1 + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe') IF name = null; +``` + +```output + [applied] +----------- + True +``` + +Example 2 + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Jack') IF NOT EXISTS; +``` + +```output + [applied] +----------- + False +``` + +Example 3 + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + 1 | 1 | John + 1 | 2 | Jane +``` + +### Insert a row with expiration time using the `USING TTL` clause + +You can do this as follows: + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 2, 'Jack') USING TTL 10; +``` + +Now query the employees table. + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + 2 | 2 | Jack + 1 | 1 | John + 1 | 2 | Jane +``` + +Again query the employees table after 11 seconds or more. + +```sql +ycqlsh:example> SELECT * FROM employees; -- 11 seconds after the insert. +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + 1 | 1 | John + 1 | 2 | Jane +``` + +### Insert a row with `USING TIMESTAMP` clause + +#### Insert a row with a low timestamp + +```sql +ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'Jeff') USING TIMESTAMP 1000; +``` + +Now query the employees table. + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 1 | John + 1 | 2 | Jane + 1 | 3 | Jeff + 2 | 1 | Joe + +(4 rows) +``` + +#### Overwrite the row with a higher timestamp + +```sql +ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'Jerry') USING TIMESTAMP 2000; +``` + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------- + 1 | 1 | John + 1 | 2 | Jane + 1 | 3 | Jerry + 2 | 1 | Joe + +(4 rows) +``` + +#### Try to overwrite the row with a lower timestamp + +```sql +ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'James') USING TIMESTAMP 1500; +``` + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------- + 1 | 1 | John + 1 | 2 | Jane + 1 | 3 | Jerry + 2 | 1 | Joe + +(4 rows) +``` + +### RETURNS STATUS AS ROW + +When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. + +See examples in [batch docs](../batch#row-status). + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DELETE`](../dml_delete/) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) +- [`Expression`](..#expressions) diff --git a/docs/content/v2.25/api/ycql/dml_select.md b/docs/content/v2.25/api/ycql/dml_select.md new file mode 100644 index 000000000000..fa92929c997c --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_select.md @@ -0,0 +1,315 @@ +--- +title: SELECT statement [YCQL] +headerTitle: SELECT +linkTitle: SELECT +description: Use the SELECT statement to retrieve (part of) rows of specified columns that meet a given condition from a table. +menu: + preview_api: + parent: api-cassandra + weight: 1310 +aliases: + - /preview/api/cassandra/dml_select + - /preview/api/ycql/dml_select +type: docs +--- + +## Synopsis + +Use the `SELECT` statement to retrieve (part of) rows of specified columns that meet a given condition from a table. It specifies the columns to be retrieved, the name of the table, and the condition each selected row must satisfy. + +## Syntax + +### Diagram + +#### select + +SELECTDISTINCT*,column_nameFROMtable_nameWHEREwhere_expressionALLOW FILTERINGIFif_expressionORDER BYorder_expressionLIMITlimit_expressionOFFSEToffset_expression + +#### order_expression + +(,column_nameASCDESC) + +### Grammar + +```ebnf +select ::= SELECT [ DISTINCT ] { * | column_name [ , column_name ... ] } + FROM table_name + [ WHERE where_expression ] + [ IF where_expression ] + [ ORDER BY order_expression ] + [ LIMIT limit_expression ] [ OFFSET offset_expression ] + +order_expression ::= ( { column_name [ ASC | DESC ] } [ , ... ] ) +``` + +Where + +- `table_name` and `column_name` are identifiers (`table_name` may be qualified with a keyspace name). +- `limit_expression` is an integer literal (or a bind variable marker for prepared statements). +- Restrictions for `where_expression` are discussed in the Semantics section. +- See [Expressions](..#expressions) for more information on syntax rules. + +## Semantics + +- An error is raised if the specified `table_name` does not exist. +- `SELECT DISTINCT` can only be used for partition columns or static columns. +- `*` means all columns of the table will be retrieved. +- `LIMIT` clause sets the maximum number of results (rows) to be returned. +- `OFFSET` clause sets the number of rows to be skipped before returning results. +- `ALLOW FILTERING` is provided for syntax compatibility with Cassandra. You can always filter on all columns. +- Reads default to `QUORUM` and read from the tablet-leader. +- To read from followers use `ONE` consistency level. +- To benefit from local reads, in addition to specifying the consistency level of `ONE`, set the `region` also in the client driver to indicate where the request is coming from, and it should match the `--placement_region` argument for the yb-tservers in that region. + +### `ORDER BY` clause + +- The `ORDER BY` clause sets the order for the returned results. +- Only clustering columns are allowed in the `order_expression`. +- For a given column, `DESC` means descending order and `ASC` or omitted means ascending order. +- Currently, only two overall orderings are allowed, the clustering order from the `CREATE TABLE` statement (forward scan) or its opposite (reverse scan). + +### `WHERE` clause + +- The `where_expression` must evaluate to boolean values. +- The `where_expression` can specify conditions on any columns including partition, clustering, and regular columns. +- The `where_expression` has a restricted list of operators. + + - Only `=`, `!=`, `IN` and `NOT IN` operators can be used for conditions on partition columns. + - Only operators `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN` and `NOT IN` can be used for conditions on clustering and regular columns. + - Only `IN` operator can be used for conditions on tuples of clustering columns. + +### `IF` clause + +- The `if_expression` must evaluate to boolean values. +- The `if_expression` supports any combinations of all available boolean and logical operators. +- The `if_expression` can only specify conditions for non-primary-key columns although it can used on a key column of a secondary index. +- While WHERE condition is used to generate efficient query plan, the IF condition is not. ALL rows that satisfy WHERE condition will be read from the database before the IF condition is used to filter unwanted data. In the following example, although the two queries yield the same result set, SELECT with WHERE clause will use INDEX-SCAN while SELECT with IF clause will use FULL-SCAN. + +```cql +SELECT * FROM a_table WHERE key = 'my_key'; +SELECT * FROM a_table IF key = 'my_key'; +``` + +{{< note title="Note" >}} +While the where clause allows a wide range of operators, the exact conditions used in the where clause have significant performance considerations (especially for large datasets). +Some best practices are: + +- Use equality conditions on all partition columns (to fix the value of the partition key). +- Use comparison operators on the clustering columns (tighter restrictions are more valuable for left-most clustering columns). +- Generally, the closer a column is to the beginning of the primary key, the higher the performance gain for setting tighter restrictions on it. + +Ideally, these performance considerations should be taken into account when creating the table schema.{{< /note >}} + +## Examples + +### Select all rows from a table + +```sql +ycqlsh:example> CREATE TABLE employees(department_id INT, + employee_id INT, + dept_name TEXT STATIC, + employee_name TEXT, + PRIMARY KEY(department_id, employee_id)); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) + VALUES (1, 1, 'Accounting', 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) + VALUES (1, 2, 'Accounting', 'Jane'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) + VALUES (1, 3, 'Accounting', 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) + VALUES (2, 1, 'Marketing', 'Joe'); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 1 | Accounting | John + 1 | 2 | Accounting | Jane + 1 | 3 | Accounting | John + 2 | 1 | Marketing | Joe +``` + +### Select with limit + +```sql +ycqlsh:example> SELECT * FROM employees LIMIT 2; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 1 | Accounting | John + 1 | 2 | Accounting | Jane +``` + +### Select with offset + +```sql +ycqlsh:example> SELECT * FROM employees LIMIT 2 OFFSET 1; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 2 | Accounting | Jane + 1 | 3 | Accounting | John +``` + +### Select distinct values + +```sql +ycqlsh:example> SELECT DISTINCT dept_name FROM employees; +``` + +```output + dept_name +------------ + Accounting + Marketing +``` + +### Select with a condition on the partitioning column + +```sql +ycqlsh:example> SELECT * FROM employees WHERE department_id = 2; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+-----------+--------------- + 2 | 1 | Marketing | Joe +``` + +### Select with condition on the clustering column + +```sql +ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id <= 2; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 1 | Accounting | John + 1 | 2 | Accounting | Jane +``` + +### Select with condition on a regular column, using WHERE clause + +```sql +ycqlsh:example> SELECT * FROM employees WHERE employee_name = 'John'; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 1 | Accounting | John + 1 | 3 | Accounting | John +``` + +### Select with condition on a regular column, using IF clause + +```sql +ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 IF employee_name != 'John'; +``` + +```output + department_id | employee_id | dept_name | employee_name +---------------+-------------+------------+--------------- + 1 | 2 | Accounting | Jane +``` + +### Select with `ORDER BY` clause + +``` sql +ycqlsh:example> CREATE TABLE sensor_data(device_id INT, + sensor_id INT, + ts TIMESTAMP, + value TEXT, + PRIMARY KEY((device_id), sensor_id, ts)) WITH CLUSTERING ORDER BY (sensor_id ASC, ts DESC); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) + VALUES (1, 1, '2018-1-1 12:30:30 UTC', 'a'); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) + VALUES (1, 1, '2018-1-1 12:30:31 UTC', 'b'); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) + VALUES (1, 2, '2018-1-1 12:30:30 UTC', 'x'); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) + VALUES (1, 2, '2018-1-1 12:30:31 UTC', 'y'); +``` + +Reverse scan, opposite of the table's clustering order. + +```sql +ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id DESC, ts ASC; +``` + +```output + device_id | sensor_id | ts | value +-----------+-----------+---------------------------------+------- + 1 | 2 | 2018-01-01 12:30:30.000000+0000 | x + 1 | 2 | 2018-01-01 12:30:31.000000+0000 | y + 1 | 1 | 2018-01-01 12:30:30.000000+0000 | a + 1 | 1 | 2018-01-01 12:30:31.000000+0000 | b +``` + +Forward scan, same as a SELECT without an ORDER BY clause. + +```sql +ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts DESC; +``` + +```output + device_id | sensor_id | ts | value +-----------+-----------+---------------------------------+------- + 1 | 1 | 2018-01-01 12:30:31.000000+0000 | b + 1 | 1 | 2018-01-01 12:30:30.000000+0000 | a + 1 | 2 | 2018-01-01 12:30:31.000000+0000 | y + 1 | 2 | 2018-01-01 12:30:30.000000+0000 | x +``` + +Other orderings are not allowed. + +```sql +ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts ASC; +``` + +```output +InvalidRequest: Unsupported order by relation +SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts ASC; + ^^^^^^^^^^^^^^^^^^^^^ +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`INSERT`](../dml_insert) +- [`UPDATE`](../dml_update/) +- [`DELETE`](../dml_delete/) +- [`Expression`](..#expressions) diff --git a/docs/content/v2.25/api/ycql/dml_transaction.md b/docs/content/v2.25/api/ycql/dml_transaction.md new file mode 100644 index 000000000000..8dc690fd455b --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_transaction.md @@ -0,0 +1,156 @@ +--- +title: TRANSACTION statement [YCQL] +headerTitle: TRANSACTION +linkTitle: TRANSACTION +description: Use the TRANSACTION statement block to make changes to multiple rows in one or more tables in a distributed ACID transaction. +menu: + preview_api: + parent: api-cassandra + weight: 1330 +aliases: + - /preview/api/cassandra/dml_transaction + - /preview/api/ycql/dml_transaction +type: docs +--- + +## Synopsis + +Use the TRANSACTION statement block to make changes to multiple rows in one or more tables in a [distributed ACID transaction](../../../architecture/transactions/distributed-txns). + +## Syntax + +### Diagram + +BEGINTRANSACTIONinsertupdatedelete;ENDTRANSACTION; + +### Grammar + +```ebnf +transaction_block ::= BEGIN TRANSACTION + ( insert | update | delete ) ';' + [ ( insert | update | delete ) ';' ...] + END TRANSACTION ';' +``` + +Where `insert`, `update`, and `delete` are [INSERT](../dml_insert), [UPDATE](../dml_update/), and [DELETE](../dml_delete/) statements. + +- When using `BEGIN TRANSACTION`, you don't use a semicolon. End the transaction block with `END TRANSACTION ;` (with a semicolon). +- There is no `COMMIT` for transactions started using `BEGIN`. + +### SQL syntax + +YCQL also supports SQL `START TRANSACTION` and `COMMIT` statements. + +```ebnf +transaction_block ::= START TRANSACTION ';' + ( insert | update | delete ) ';' + [ ( insert | update | delete ) ';' ...] + COMMIT ';' +``` + +- When using `START TRANSACTION`, you must use a semicolon. End the transaction block with `COMMIT ;`. +- You can't use `END TRANSACTION` for transactions started using `START`. + +## Semantics + +- An error is raised if transactions are not enabled in any of the tables inserted, updated, or deleted. +- Currently, an error is raised if any of the `INSERT`, `UPDATE`, or `DELETE` statements contains an `IF` clause. +- If transactions are enabled for a table, its indexes must have them enabled as well, and vice versa. +- There is no explicit rollback. To rollback a transaction, abort, or interrupt the client session. +- DDLs are always executed outside of a transaction block, and like DMLs outside a transaction block, are committed immediately. +- Inside a transaction block only insert, update, and delete statements are allowed. Select statements are not allowed. +- The insert, update, and delete statements inside a transaction block cannot have any [if_expression](../grammar_diagrams/#if-expression). + +## Examples + +### Create a table with transactions enabled + +```sql +ycqlsh:example> CREATE TABLE accounts (account_name TEXT, + account_type TEXT, + balance DOUBLE, + PRIMARY KEY ((account_name), account_type)) + WITH transactions = { 'enabled' : true }; +``` + +### Insert some data + +```sql +ycqlsh:example> INSERT INTO accounts (account_name, account_type, balance) + VALUES ('John', 'savings', 1000); +ycqlsh:example> INSERT INTO accounts (account_name, account_type, balance) + VALUES ('John', 'checking', 100); +ycqlsh:example> INSERT INTO accounts (account_name, account_type, balance) + VALUES ('Smith', 'savings', 2000); +ycqlsh:example> INSERT INTO accounts (account_name, account_type, balance) + VALUES ('Smith', 'checking', 50); +``` + +```sql +ycqlsh:example> SELECT account_name, account_type, balance, writetime(balance) FROM accounts; +``` + +```output + account_name | account_type | balance | writetime(balance) +--------------+--------------+---------+-------------------- + John | checking | 100 | 1523313964356489 + John | savings | 1000 | 1523313964350449 + Smith | checking | 50 | 1523313964371579 + Smith | savings | 2000 | 1523313964363056 +``` + +### Update 2 rows with the same partition key + +You can do this as follows: + +```sql +ycqlsh:example> BEGIN TRANSACTION + UPDATE accounts SET balance = balance - 200 WHERE account_name = 'John' AND account_type = 'savings'; + UPDATE accounts SET balance = balance + 200 WHERE account_name = 'John' AND account_type = 'checking'; + END TRANSACTION; +``` + +```sql +ycqlsh:example> SELECT account_name, account_type, balance, writetime(balance) FROM accounts; +``` + +```output + account_name | account_type | balance | writetime(balance) +--------------+--------------+---------+-------------------- + John | checking | 300 | 1523313983201270 + John | savings | 800 | 1523313983201270 + Smith | checking | 50 | 1523313964371579 + Smith | savings | 2000 | 1523313964363056 +``` + +### Update 2 rows with the different partition keys + +```sql +ycqlsh:example> BEGIN TRANSACTION + UPDATE accounts SET balance = balance - 200 WHERE account_name = 'John' AND account_type = 'checking'; + UPDATE accounts SET balance = balance + 200 WHERE account_name = 'Smith' AND account_type = 'checking'; + END TRANSACTION; +``` + +```sql +ycqlsh:example> SELECT account_name, account_type, balance, writetime(balance) FROM accounts; +``` + +```output + account_name | account_type | balance | writetime(balance) +--------------+--------------+---------+-------------------- + John | checking | 100 | 1523314002218558 + John | savings | 800 | 1523313983201270 + Smith | checking | 250 | 1523314002218558 + Smith | savings | 2000 | 1523313964363056 +``` + +{{< note title="Note" >}} +`BEGIN/END TRANSACTION` doesn't currently support `RETURNS STATUS AS ROW`. +{{< /note >}} + +## See also + +- [`INSERT`](../dml_insert) +- [`UPDATE`](../dml_update/) +- [`DELETE`](../dml_delete/) diff --git a/docs/content/v2.25/api/ycql/dml_truncate.md b/docs/content/v2.25/api/ycql/dml_truncate.md new file mode 100644 index 000000000000..4070f24a4649 --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_truncate.md @@ -0,0 +1,96 @@ +--- +title: TRUNCATE statement [YCQL] +headerTitle: TRUNCATE +linkTitle: TRUNCATE +description: Use the TRUNCATE statement to remove all rows from a specified table. +menu: + preview_api: + parent: api-cassandra + weight: 1330 +aliases: + - /preview/api/cassandra/dml_truncate + - /preview/api/ycql/dml_truncate +type: docs +--- + +## Synopsis + +Use the `TRUNCATE` statement to remove all rows from a specified table. + +## Syntax + +### Diagram + +TRUNCATETABLEtable_name + +### Grammar + +```ebnf +truncate ::= TRUNCATE [ TABLE ] table_name; +``` + +Where + +- `table_name` is an identifier (possibly qualified with a keyspace name). + +## Semantics + +- An error is raised if the specified `table_name` does not exist. + +## Examples + +### Truncate a table + +```sql +ycqlsh:example> CREATE TABLE employees(department_id INT, + employee_id INT, + name TEXT, + PRIMARY KEY(department_id, employee_id)); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe'); +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 2 | 1 | Joe + 1 | 1 | John + 1 | 2 | Jane +``` + +Remove all rows from the table. + +```sql +ycqlsh:example> TRUNCATE employees; +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ +``` + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) +- [`DELETE`](../dml_delete/) diff --git a/docs/content/v2.25/api/ycql/dml_update.md b/docs/content/v2.25/api/ycql/dml_update.md new file mode 100644 index 000000000000..20b023cc7b19 --- /dev/null +++ b/docs/content/v2.25/api/ycql/dml_update.md @@ -0,0 +1,283 @@ +--- +title: UPDATE statement [YCQL] +headerTitle: UPDATE +linkTitle: UPDATE +description: Use the UPDATE statement to update one or more column values for a row in table. +menu: + preview_api: + parent: api-cassandra + weight: 1320 +aliases: + - /preview/api/cassandra/dml_update/ + - /preview/api/ycql/dml_update/ + - /preview/api/ysql/dml_update/ +type: docs +--- + +## Synopsis + +Use the `UPDATE` statement to update one or more column values for a row in table. + +{{< note title="Note" >}} + +YugabyteDB can only update one row at a time. Updating multiple rows is currently not supported. + +{{< /note >}} + +## Syntax + +### Diagram + +UPDATEtable_nameUSINGusing_expressionSET,assignmentWHEREwhere_expressionIFif_expressionNOTEXISTSif_expressionORNOTEXISTSRETURNS STATUS AS ROW + +### using_expression + +```ebnf +using_expression = ttl_or_timestamp_expression { 'AND' ttl_or_timestamp_expression }; +``` + +ANDttl_or_timestamp_expression + +### ttl_or_timestamp_expression + +```ebnf +ttl_or_timestamp_expression = 'TTL' ttl_expression | 'TIMESTAMP' timestamp_expression; +``` + +TTLttl_expressionTIMESTAMPtimestamp_expression + +```ebnf +update ::= UPDATE table_name [ USING using_expression ] SET assignment + [ , ... ] WHERE where_expression + [ IF { if_expression + | [ NOT ] EXISTS + | if_expression OR [ NOT ] EXISTS } ] + [ RETURNS STATUS AS ROW ] + + +assignment ::= { column_name | column_name'['index_expression']' } '=' expression +``` + +Where + +- `table_name` is an identifier (possibly qualified with a keyspace name). +- Restrictions for `ttl_expression`, `where_expression`, and `if_expression` are covered in the Semantics section. +- See [Expressions](..#expressions) for more information on syntax rules. + +## Semantics + +- An error is raised if the specified `table_name` does not exist. +- Update statement uses _upsert semantics_, meaning it inserts the row being updated if it does not already exists. +- The `USING TIMESTAMP` clause indicates you would like to perform the UPDATE as if it was done at the + timestamp provided by the user. The timestamp is the number of microseconds since epoch. +- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of + them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to + very confusing results. +- Updating rows `USING TTL` is not supported on tables with [transactions enabled](./../ddl_create_table#table-properties-1). +- You cannot update the columns in the primary key. As a workaround, you have to delete the row and insert a new row. +- `UPDATE` is always done at `QUORUM` consistency level irrespective of setting. + +### `WHERE` clause + +- The `where_expression` and `if_expression` must evaluate to boolean values. +- The `where_expression` must specify conditions for all primary-key columns. +- The `where_expression` must not specify conditions for any regular columns. +- The `where_expression` can only apply `AND` and `=` operators. Other operators are not yet supported. + +### `IF` clause + +- The `if_expression` can only apply to non-key columns (regular columns). +- The `if_expression` can contain any logical and boolean operators. + +### `USING` clause + +- `ttl_expression` must be an integer value (or a bind variable marker for prepared statements). +- `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). + +## Examples + +### Update a value in a table + +```sql +ycqlsh:example> CREATE TABLE employees(department_id INT, + employee_id INT, + name TEXT, + age INT, + PRIMARY KEY(department_id, employee_id)); +``` + +```sql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name, age) VALUES (1, 1, 'John', 30); +``` + +Update the value of a non primary-key column. + +```sql +ycqlsh:example> UPDATE employees SET name = 'Jack' WHERE department_id = 1 AND employee_id = 1; +``` + +Using upsert semantics to update a non-existent row (that is, insert the row). + +```sql +ycqlsh:example> UPDATE employees SET name = 'Jane', age = 40 WHERE department_id = 1 AND employee_id = 2; +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+----- + 1 | 1 | Jack | 30 + 1 | 2 | Jane | 40 +``` + +### Conditional update using the `IF` clause + +The supported expressions are allowed in the 'SET' assignment targets. + +```sql +ycqlsh:example> UPDATE employees SET age = age + 1 WHERE department_id = 1 AND employee_id = 1 IF name = 'Jack'; +``` + +```output + [applied] +----------- + True +``` + +Using upsert semantics to add a row, age is not set so will be 'null'. + +```sql +ycqlsh:example> UPDATE employees SET name = 'Joe' WHERE department_id = 2 AND employee_id = 1 IF NOT EXISTS; +``` + +```output + [applied] +----------- + True +``` + +```sql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 2 | 1 | Joe | null + 1 | 1 | Jack | 31 + 1 | 2 | Jane | 40 +``` + +### Update with expiration time using the `USING TTL` clause + +The updated values will persist for the TTL duration. + +```sql +ycqlsh:example> UPDATE employees USING TTL 10 SET age = 32 WHERE department_id = 1 AND employee_id = 1; +``` + +```sql +ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id = 1; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 1 | 1 | Jack | 32 +``` + +11 seconds after the update (value will have expired). + +```sql +ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id = 1; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 1 | 1 | Jack | null +``` + +### Update row with the `USING TIMESTAMP` clause + +You can do this as follows: + +```sql +ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name, age) VALUES (1, 4, 'Jeff', 20) USING TIMESTAMP 1000; +``` + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 1 | 1 | Jack | null + 1 | 2 | Jane | 40 + 1 | 4 | Jeff | 20 + 2 | 1 | Joe | null + +(4 rows) +``` + +Now update the employees table. + +```sql +ycqlsh:foo> UPDATE employees USING TIMESTAMP 500 SET age = 30 WHERE department_id = 1 AND employee_id = 4; +``` + +Not applied since timestamp is lower than 1000. + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 1 | 1 | Jack | null + 1 | 2 | Jane | 40 + 1 | 4 | Jeff | 20 + 2 | 1 | Joe | null + +(4 rows) +``` + +```sql +ycqlsh:foo> UPDATE employees USING TIMESTAMP 1500 SET age = 30 WHERE department_id = 1 AND employee_id = 4; +``` + +Applied since timestamp is higher than 1000. + +```sql +ycqlsh:foo> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name | age +---------------+-------------+------+------ + 1 | 1 | Jack | null + 1 | 2 | Jane | 40 + 1 | 4 | Jeff | 30 + 2 | 1 | Joe | null + +(4 rows) +``` + +### RETURNS STATUS AS ROW + +When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. + +See examples in [batch docs](../batch#row-status). + +## See also + +- [`CREATE TABLE`](../ddl_create_table) +- [`DELETE`](../dml_delete/) +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`Expression`](..#expressions) diff --git a/docs/content/v2.25/api/ycql/explain.md b/docs/content/v2.25/api/ycql/explain.md new file mode 100644 index 000000000000..89e0966c0dd8 --- /dev/null +++ b/docs/content/v2.25/api/ycql/explain.md @@ -0,0 +1,217 @@ +--- +title: EXPLAIN statement [YCQL] +headerTitle: EXPLAIN +linkTitle: EXPLAIN +description: Use the EXPLAIN statement to show the execution plan for a YCQL statement. +menu: + preview_api: + parent: api-cassandra + weight: 1320 +aliases: + - /preview/api/ycql/explain/ +type: docs +--- + +## Synopsis + +Use the `EXPLAIN` statement to show the execution plan for a statement. + +## Syntax + +### Diagram + +EXPLAINselectupdateinsertdelete + +### Grammar + +``` +explain ::= EXPLAIN { select | update | insert | delete } +``` + + +## Semantics + +Where the target statement is one of the following: [SELECT](../dml_select/), [UPDATE](../dml_update/), [INSERT](../dml_insert), or [DELETE](../dml_delete/). + +## Examples +Create the keyspace, tables and indexes. + +### Setup Table and indexes +```CQL +cqlsh> CREATE KEYSPACE IF NOT EXISTS imdb; +cqlsh> CREATE TABLE IF NOT EXISTS imdb.movie_stats ( + movie_name text, + movie_genre text, + user_name text, + user_rank int, + last_watched timestamp, + PRIMARY KEY (movie_genre, movie_name, user_name) + ) WITH transactions = { 'enabled' : true }; +cqlsh> CREATE INDEX IF NOT EXISTS most_watched_by_year + ON imdb.movie_stats((movie_genre, last_watched), movie_name, user_name) + INCLUDE(user_rank); +cqlsh> CREATE INDEX IF NOT EXISTS best_rated + ON imdb.movie_stats((user_rank, movie_genre), movie_name, user_name) + INCLUDE(last_watched); +``` + +Insert some rows. +```CQL +cqlsh> USE imdb; +cqlsh:imdb> INSERT INTO movie_stats(movie_name, movie_genre, user_name, user_rank, last_watched) + VALUES ('m1', 'g1', 'u1', 5, '2019-01-18'); +cqlsh:imdb> INSERT INTO movie_stats(movie_name, movie_genre, user_name, user_rank, last_watched) + VALUES ('m2', 'g2', 'u1', 4, '2019-01-17'); +cqlsh:imdb> INSERT INTO movie_stats(movie_name, movie_genre, user_name, user_rank, last_watched) + VALUES ('m3', 'g1', 'u2', 5, '2019-01-18'); +cqlsh:imdb> INSERT INTO movie_stats(movie_name, movie_genre, user_name, user_rank, last_watched) + VALUES ('m4', 'g1', 'u1', 2, '2019-02-27'); +``` +### Explain query plans +If movie_genre, or movie_genre & movie_name, or movie_genre & movie_name & user_name are specified, the query should be served efficiently from the primary table. + +```CQL +cqlsh:imdb> EXPLAIN SELECT * + FROM movie_stats + WHERE movie_genre = 'g1'; + +QUERY PLAN +---------------------------------------- + Range Scan on imdb.movie_stats + Key Conditions: (movie_genre = 'g1') +``` +If movie_genre & last_watched are specified, then the query should be served efficiently from the `most_watched_by_year` index. + +```CQL +cqlsh:imdb> EXPLAIN SELECT * + FROM movie_stats + WHERE movie_genre = 'g1' and last_watched='2019-02-27'; + +QUERY PLAN +-------------------------------------------------------------------------- + Index Only Scan using imdb.most_watched_by_year on imdb.movie_stats + Key Conditions: (movie_genre = 'g1') AND (last_watched = '2019-02-27') + +``` + +If user_rank and movie_genre are specified then the query should be served efficiently from the `best_rated` index. + +```CQL +cqlsh:imdb> EXPLAIN SELECT * + FROM movie_stats + WHERE movie_genre = 'g2' and user_rank=5; + +QUERY PLAN +-------------------------------------------------------------- + Index Only Scan using imdb.best_rated on imdb.movie_stats + Key Conditions: (user_rank = '5') AND (movie_genre = 'g2') + +``` +Create non-covering index. +```CQL +cqlsh:imdb> DROP INDEX best_rated; +cqlsh:imdb> CREATE INDEX IF NOT EXISTS best_rated + ON imdb.movie_stats((user_rank, movie_genre), movie_name, user_name); +``` +2-Step select. Using Index Scan as opposed to Index Only Scan. +```CQL +cqlsh:imdb> EXPLAIN SELECT * + FROM movie_stats + WHERE movie_genre = 'g2' and user_rank=5; + + QUERY PLAN +-------------------------------------------------------------- + Index Scan using imdb.best_rated on imdb.movie_stats + Key Conditions: (user_rank = '5') AND (movie_genre = 'g2') +``` + +{{< note title="Note" >}} + +**INDEX SCAN**: Filters rows using the index and then fetches the columns from the main table. + +**INDEX ONLY SCAN**: Returns results by only consulting the index. + +{{< /note >}} + + +### Other EXPLAIN SELECT types +`QLName()` for these expressions is not supported. +```CQL +cqlsh:imdb> EXPLAIN SELECT * FROM movie_stats where movie_genre in ('g1', 'g2'); + + QUERY PLAN +------------------------------------------- + Range Scan on imdb.movie_stats + Key Conditions: (movie_genre IN 'expr') +``` + + +```CQL +cqlsh:imdb> EXPLAIN SELECT COUNT(*) FROM movie_stats WHERE movie_genre = 'g2' and user_rank=5; + + QUERY PLAN +-------------------------------------------------------------------- + Aggregate + -> Index Only Scan using imdb.best_rated on imdb.movie_stats + Key Conditions: (user_rank = '5') AND (movie_genre = 'g2') +``` + + +```CQL +cqlsh:imdb> EXPLAIN SELECT * FROM movie_stats WHERE movie_genre = 'g2' and user_rank = 5 LIMIT 5; + + QUERY PLAN +-------------------------------------------------------------------- + Limit + -> Index Only Scan using imdb.best_rated on imdb.movie_stats + Key Conditions: (user_rank = '5') AND (movie_genre = 'g2') +``` +### INSERT example + +```CQL +cqlsh:imdb> EXPLAIN INSERT INTO movie_stats(movie_name, movie_genre, user_name, user_rank, last_watched) + VALUES ('m4', 'g1', 'u1', 2, '2019-02-27'); + + QUERY PLAN +---------------------------- + Insert on imdb.movie_stats +``` +### DELETE examples + +```CQL +cqlsh:imdb> explain delete from movie_stats where movie_genre = 'g1' and movie_name = 'm1'; + + QUERY PLAN +---------------------------------------------- + Delete on imdb.movie_stats + -> Range Scan on imdb.movie_stats + Key Conditions: (movie_genre = 'g1') + Filter: (movie_name = 'm1') +``` +```CQL +cqlsh:imdb> explain delete from movie_stats where movie_genre = 'g1'; + + QUERY PLAN +---------------------------------------------- + Delete on imdb.movie_stats + -> Range Scan on imdb.movie_stats + Key Conditions: (movie_genre = 'g1') +``` +### UPDATE example + +```CQL +cqlsh:imdb> EXPLAIN UPDATE movie_stats SET user_rank = 1 WHERE movie_name = 'm1' and movie_genre = 'g1' and user_name = 'u1'; + + QUERY PLAN +--------------------------------------------------------------------------------------------- + Update on imdb.movie_stats + -> Primary Key Lookup on imdb.movie_stats + Key Conditions: (movie_genre = 'g1') AND (movie_name = 'm1') AND (user_name = 'u1') +``` + +## See also + +- [`INSERT`](../dml_insert) +- [`SELECT`](../dml_select/) +- [`UPDATE`](../dml_update/) +- [`DELETE`](../dml_delete/) diff --git a/docs/content/v2.25/api/ycql/expr_fcall.md b/docs/content/v2.25/api/ycql/expr_fcall.md new file mode 100644 index 000000000000..0d72dd1593ba --- /dev/null +++ b/docs/content/v2.25/api/ycql/expr_fcall.md @@ -0,0 +1,196 @@ +--- +title: Built-in function call [YCQL] +headerTitle: Built-in function call +linkTitle: Function call +description: Use a function call expression to apply the specified function to given arguments between parentheses and return the result of the computation. +menu: + preview_api: + parent: api-cassandra + weight: 1350 +aliases: + - /preview/api/cassandra/expr_fcall +type: docs +--- + +## Synopsis + +Use a function call expression to apply the specified function to given arguments between parentheses and return the result of the computation. + +## Syntax + +```sql +function_call ::= function_name '(' [ arguments ... ] ')' +``` + +## Built-in Functions + +| Function | Return Type | Argument Type | Description | +|----------|-------------|---------------|-------------| +| BlobAs\ | \ | ([`BLOB`](../type_blob)) | Converts a value from `BLOB` | +| \AsBlob | [`BLOB`](../type_blob) | (\) | Converts a value to `BLOB` | +| [DateOf](../function_datetime/#dateof) | [`TIMESTAMP`](../type_datetime/) | ([`TIMEUUID`](../type_uuid)) | Conversion | +| [MaxTimeUuid](../function_datetime/#maxtimeuuid-timestamp) | [`TIMEUUID`](../type_uuid) | ([`TIMESTAMP`](../type_datetime)) | Returns the associated max time UUID | +| [MinTimeUuid](../function_datetime/#mintimeuuid-timestamp) | [`TIMEUUID`](../type_uuid) | ([`TIMESTAMP`](../type_datetime)) | Returns the associated min time UUID | +| [CurrentDate](../function_datetime/#currentdate-currenttime-and-currenttimestamp) | [`DATE`](../type_datetime/) | () | Return the system current date | +| [CurrentTime](../function_datetime/#currentdate-currenttime-and-currenttimestamp) | [`TIME`](../type_datetime/) | () | Return the system current time of day | +| [CurrentTimestamp](../function_datetime/#currentdate-currenttime-and-currenttimestamp) | [`TIMESTAMP`](../type_datetime/) | () | Return the system current timestamp | +| [Now](../function_datetime/#now) | [`TIMEUUID`](../type_uuid) | () | Returns the UUID of the current timestamp | +| [TTL](#ttl-function) | [`BIGINT`](../type_int) | (\) | Get time-to-live of a column | +| [ToDate](../function_datetime/#todate) | [`DATE`](../type_datetime/) | ([`TIMESTAMP`](../type_datetime/)) | Conversion | +| [ToDate](../function_datetime/#todate) | [`DATE`](../type_datetime/) | ([`TIMEUUID`](../type_uuid)) | Conversion | +| ToTime | [`TIME`](../type_datetime/) | ([`TIMESTAMP`](../type_datetime/)) | Conversion | +| ToTime | [`TIME`](../type_datetime/) | ([`TIMEUUID`](../type_uuid) | Conversion | +| [ToTimestamp](../function_datetime/#totimestamp) | ([`TIMESTAMP`](../type_datetime/)) | ([`DATE`](../type_datetime/)) | Conversion | +| [ToTimestamp](../function_datetime/#totimestamp) | ([`TIMESTAMP`](../type_datetime/)) | (`TIMEUUID`) | Conversion | +| [ToUnixTimestamp](../function_datetime/#tounixtimestamp) | [`BIGINT`](../type_int) | ([`DATE`](../type_datetime/)) | Conversion | +| [ToUnixTimestamp](../function_datetime/#tounixtimestamp) | [`BIGINT`](../type_int) | ([`TIMESTAMP`](../type_datetime/)) | Conversion | +| [ToUnixTimestamp](../function_datetime/#tounixtimestamp) | [`BIGINT`](../type_int) | ([`TIMEUUID`](../type_uuid)) | Conversion | +| [UnixTimestampOf](../function_datetime/#unixtimestampof) | [`BIGINT`](../type_int) | ([`TIMEUUID`](../type_uuid)) | Conversion | +| [UUID](../function_datetime/#uuid) | [`UUID`](../type_uuid) | () | Returns a version 4 UUID | +| [WriteTime](#writetime-function) | [`BIGINT`](../type_int) | (\) | Returns the timestamp when the column was written | +| [partition_hash](#partition-hash-function) | [`BIGINT`](../type_int) | () | Computes the partition hash value (uint16) for the partition key columns of a row | + +## Aggregate Functions + +| Function | Description | +|----------|-------------| +| COUNT | Returns number of selected rows | +| SUM | Returns sums of column values | +| AVG | Returns the average of column values | +| MIN | Returns the minimum value of column values | +| MAX | Returns the maximum value of column values | + +## Semantics + +- The argument data types must be convertible to the expected type for that argument that was specified by the function definition. +- Function execution will return a value of the specified type by the function definition. +- YugabyteDB allows function calls to be used any where that expression is allowed. + +## CAST function + +CAST function converts the value returned from a table column to the specified data type. + +### Syntax + +```sql +cast_call ::= CAST '(' column AS type ')' +``` + +The following table lists the column data types and the target data types. + +| Source column type | Target data type | +|--------------------|------------------| +| `BIGINT` | `SMALLINT`, `INT`, `TEXT` | +| `BOOLEAN` | `TEXT` | +| `DATE` | `TEXT`, `TIMESTAMP` | +| `DOUBLE` | `BIGINT`, `INT`, `SMALLINT`, `TEXT` | +| `FLOAT` | `BIGINT`, `INT`, `SMALLINT`, `TEXT` | +| `INT` | `BIGINT`, `SMALLINT`, `TEXT` | +| `SMALLINT` | `BIGINT`, `INT`, `TEXT` | +| `TIME` | `TEXT` | +| `TIMESTAMP` | `DATE`, `TEXT` | +| `TIMEUUID` | `DATE`, `TIMESTAMP` | + +### Example + +```sql +ycqlsh:example> CREATE TABLE test_cast (k INT PRIMARY KEY, ts TIMESTAMP); +``` + +```sql +ycqlsh:example> INSERT INTO test_cast (k, ts) VALUES (1, '2018-10-09 12:00:00'); +``` + +```sql +ycqlsh:example> SELECT CAST(ts AS DATE) FROM test_cast; +``` + +```output + cast(ts as date) +------------------ + 2018-10-09 +``` + +## partition_hash function + +`partition_hash` is a function that takes as arguments the partition key columns of the primary key of a row and +returns a `uint16` hash value representing the hash value for the row used for partitioning the table. +The hash values used for partitioning fall in the `0-65535` (uint16) range. +Tables are partitioned into tablets, with each tablet being responsible for a range of partition values. +The `partition_hash` of the row is used to decide which tablet the row will reside in. + +`partition_hash` can be beneficial for querying a subset of the data to get approximate row counts or to break down +full-table operations into smaller sub-tasks that can be run in parallel. + +### Querying a subset of the data + +One use of `partition_hash` is to query a subset of the data and get approximate count of rows in the table. +For example, suppose you have a table `t` with partitioning columns `(h1,h2)` as follows: + +```sql +create table t (h1 int, h2 int, r1 int, r2 int, v int, + primary key ((h1, h2), r1, r2)); +``` + +You can use this function to query a subset of the data (in this case, 1/128 of the data) as follows: + +```sql +select count(*) from t where partition_hash(h1, h2) >= 0 and + partition_hash(h1, h2) < 512; +``` + +The value `512` comes from dividing the full hash partition range by the number of subsets that you want to query (`65536/128=512`). + +### Parallel full table scans + +To do a distributed scan, you can issue, in this case, 128 queries each using a different hash range as follows: + +```sql +.. where partition_hash(h1, h2) >= 0 and partition_hash(h1, h2) < 512; +``` + +```sql +.. where partition_hash(h1, h2) >= 512 and partition_hash(h1, h2) <1024 ; +``` + +and so on, till the last segment/range of `512` in the partition space: + +```sql +.. where partition_hash(h1, h2) >= 65024; +``` + +Refer to `partition_hash` in [Python 3](https://github.com/yugabyte/yb-tools/blob/main/ycql_table_row_count.py) and [Go](https://github.com/yugabyte/yb-tools/tree/main/ycrc) for full implementation of a parallel table scan. + +## WriteTime function + +The `WriteTime` function returns the timestamp in microseconds when a column was written. +For example, suppose you have a table `page_views` with a column named `views`: + +```sql + SELECT writetime(views) FROM page_views; + + writetime(views) +------------------ + 1572882871160113 + +(1 rows) +``` + +## TTL function + +The TTL function returns the number of seconds until a column or row expires. +Assuming you have a table `page_views` and a column named `views`: + +```sql +SELECT TTL(views) FROM page_views; + + ttl(views) +------------ + 86367 + +(1 rows) +``` + +## See also + +- [All Expressions](../#expressions) diff --git a/docs/content/v2.25/api/ycql/expr_ocall.md b/docs/content/v2.25/api/ycql/expr_ocall.md new file mode 100644 index 000000000000..1f469bcd26e9 --- /dev/null +++ b/docs/content/v2.25/api/ycql/expr_ocall.md @@ -0,0 +1,51 @@ +--- +title: Operators [YCQL] +headerTitle: YCQL operators +linkTitle: Operators +description: Combine multiple expressions using YCQL operators supported in YugabyteDB. +menu: + preview_api: + parent: api-cassandra + weight: 1360 +aliases: + - /preview/api/cassandra/expr_ocall +type: docs +--- + +An expression with operators is a compound expression that combines multiple expressions using built-in operators. The following sections discuss the YCQL operators in YugabyteDB. + +## Null operators + +| Operator | Description | +|----------|-------------| +| `EXISTS`, `NOT EXISTS` | predicate for existence of a row | + +`EXISTS` and `NOT EXISTS` can only be used in the `IF` clause. + +## Unary operators + +| Operator | Description | +|----------|-------------| +| `-` | numeric negation | +| `+` | no-op | +| `NOT` | Logical (boolean) negation | + +Unary `-` and `+` can only be used with constant expressions such as `-77`. + +## Binary operators + +| Operator | Description | +|----------|-------------| +| `OR`, `AND`| Logical (boolean) expression | +| `=`, `!=`, `<`, `<=`, `>`, `>=` | Comparison expression | +| `+` | Addition, append, or prepend | +| `-` | Subtraction or removal | +| `*` | Multiplication. Not yet supported | +| `/` | Division. Not yet supported | +| `ISNULL`, `IS NOT NULL` | Not yet supported comparison expression. | + +- The first argument of comparison operators must be a column. For example, `column_name = 7`. +- Comparing `NULL` with others always yields a `false` value. Operator `ISNULL` or `IS NULL` must be used when comparing with `NULL`. +- When `+` and `-` are applied to a NULL argument of `COUNTER` data type, the NULL expression is replaced with a zero value before the computation. When these operators are applied to a NULL expression of all other numeric data types, the computed value is always NULL. +- Operator `+` either prepends or appends a value to a LIST while operator `-` removes elements from LIST. +- Operator `+` inserts new distinct elements to a MAP or SET while operator `-` removes elements from them. diff --git a/docs/content/v2.25/api/ycql/expr_simple.md b/docs/content/v2.25/api/ycql/expr_simple.md new file mode 100644 index 000000000000..722ec0ca1e91 --- /dev/null +++ b/docs/content/v2.25/api/ycql/expr_simple.md @@ -0,0 +1,39 @@ +--- +title: Simple expressions [YCQL] +headerTitle: Simple expressions +linkTitle: Simple expressions +description: A simple expression can be a column, constant, or NULL. +menu: + preview_api: + parent: api-cassandra + weight: 1331 +aliases: + - /preview/api/cassandra/expr_simple +type: docs +--- + +A simple expression can be a column, a constant, or NULL. + +## Column expression + +A column expression refers to a column in a table by using its name, which can be either a fully qualified name or a simple name. + +``` +column_expression ::= [keyspace_name.][table_name.][column_name] +``` + +## Constant expression + +A constant expression represents a simple value by using literals. + +``` +constant_expression ::= string | number +``` + +## NULL + +When an expression, typically a column, does not have a value, it is represented as NULL. + +``` +null_expression ::= NULL +``` diff --git a/docs/content/v2.25/api/ycql/expr_subscript.md b/docs/content/v2.25/api/ycql/expr_subscript.md new file mode 100644 index 000000000000..691d92d38922 --- /dev/null +++ b/docs/content/v2.25/api/ycql/expr_subscript.md @@ -0,0 +1,34 @@ +--- +title: Subscripted expressions [YCQL] +headerTitle: Subscripted expressions +linkTitle: Subscripted expressions +description: Use subscripted expressions to access elements in a multi-element value, such as a map collection by using the [] operator. +menu: + preview_api: + parent: api-cassandra + weight: 1340 +aliases: + - /preview/api/cassandra/expr_subscript +type: docs +--- + +Use subscripted expressions to access elements in a multi-element value, such as a map collection by using operator `[]`. Subscripted column expressions can be used when writing the same way as a [column expression](../expr_simple/#column-expression). For example, if `ids` refers to a column of type `LIST`, `ids[7]` refers to the third element of the list `ids`, which can be set in an [UPDATE](../dml_update/) statement. + +- Subscripted expression can only be applied to columns of type `LIST`, `MAP`, or user-defined data types. +- Subscripting a `LIST` value with a non-positive index will yield NULL. +- Subscripting a `MAP` value with a non-existing key will yield NULL. Otherwise, it returns the element value that is associated with the given key. +- Apache Cassandra does not allow subscripted expression in the select list of the SELECT statement. + +## Examples + +```sql +ycqlsh:yugaspace> CREATE TABLE t(id INT PRIMARY KEY,yugamap MAP); +``` + +```sql +ycqlsh:yugaspace> UPDATE yugatab SET map_value['key_value'] = 'yuga_string' WHERE id = 7; +``` + +## See also + +- [All Expressions](../#expressions) diff --git a/docs/content/v2.25/api/ycql/function_datetime.md b/docs/content/v2.25/api/ycql/function_datetime.md new file mode 100644 index 000000000000..7f20a877d333 --- /dev/null +++ b/docs/content/v2.25/api/ycql/function_datetime.md @@ -0,0 +1,432 @@ +--- +title: Date and time functions [YCQL] +headerTitle: Date and time functions +linkTitle: Date and time +description: Use date and time functions to work on date and time data types. +menu: + preview_api: + parent: api-cassandra + weight: 1560 +aliases: + - /preview/api/cassandra/function_datetime + - /preview/api/ycql/function_datetime +type: docs +--- + +This section covers the set of YCQL built-in functions that work on the date and time data types: [`DATE`, `TIME`, `TIMESTAMP`](../type_datetime/), or [`TIMEUUID`](../type_uuid). + +## currentdate(), currenttime(), and currenttimestamp() + +Use these functions to return the current system date and time in UTC time zone. + +- They take no arguments. +- The return value is a `DATE`, `TIME`, or `TIMESTAMP`, respectively. + +### Examples + +#### Insert values using currentdate(), currenttime(), and currenttimestamp() + +```sql +ycqlsh:example> CREATE TABLE test_current (k INT PRIMARY KEY, d DATE, t TIME, ts TIMESTAMP); +``` + +```sql +ycqlsh:example> INSERT INTO test_current (k, d, t, ts) VALUES (1, currentdate(), currenttime(), currenttimestamp()); +``` + +#### Comparison using currentdate() and currenttime() + +```sql +ycqlsh:example> SELECT * FROM test_current WHERE d = currentdate() and t < currenttime(); +``` + +```output + k | d | t | ts +---+------------+--------------------+--------------------------------- + 1 | 2018-10-09 | 18:00:41.688216000 | 2018-10-09 18:00:41.688000+0000 +``` + +## now() + +This function generates a new unique version 1 UUID (`TIMEUUID`). + +- It takes in no arguments. +- The return value is a `TIMEUUID`. + +### Examples + +#### Insert values using now() + +```sql +ycqlsh:example> CREATE TABLE test_now (k INT PRIMARY KEY, v TIMEUUID); +``` + +```sql +ycqlsh:example> INSERT INTO test_now (k, v) VALUES (1, now()); +``` + +#### Select using now() + +```sql +ycqlsh:example> SELECT now() FROM test_now; +``` + +```output + now() +--------------------------------------- + b75bfaf6-4fe9-11e8-8839-6336e659252a +``` + +#### Comparison using now() + +```sql +ycqlsh:example> SELECT v FROM test_now WHERE v < now(); +``` + +```output + v +--------------------------------------- + 71bb5104-4fe9-11e8-8839-6336e659252a +``` + +## todate() + +This function converts a timestamp or TIMEUUID to the corresponding date. + +- It takes in an argument of type `TIMESTAMP` or `TIMEUUID`. +- The return value is a `DATE`. + +```sql +ycqlsh:example> CREATE TABLE test_todate (k INT PRIMARY KEY, ts TIMESTAMP); +``` + +```sql +ycqlsh:example> INSERT INTO test_todate (k, ts) VALUES (1, currenttimestamp()); +``` + +```sql +ycqlsh:example> SELECT todate(ts) FROM test_todate; +``` + +```output + todate(ts) +------------ + 2018-10-09 +``` + +## minTimeUUID() + +This function generates corresponding (`TIMEUUID`) with minimum node/clock component so that it includes all regular +`TIMEUUID` with that timestamp when comparing with another `TIMEUUID`. + +- It takes in an argument of type `TIMESTAMP`. +- The return value is a `TIMEUUID`. + +### Examples + +#### Insert values using now() + +```sql +ycqlsh:example> CREATE TABLE test_min (k INT PRIMARY KEY, v TIMEUUID); +``` + +```sql +ycqlsh:example> INSERT INTO test_min (k, v) VALUES (1, now()); +``` + +```sql +ycqlsh:ybdemo> select k, v, totimestamp(v) from test_min; +``` + +```output + k | v | totimestamp(v) +---+--------------------------------------+--------------------------------- + 1 | dc79344c-cb79-11ec-915e-5219fa422f77 | 2022-05-04 07:14:39.205000+0000 + +(1 rows) +``` + +#### Select using minTimeUUID() + +```sql +ycqlsh:ybdemo> SELECT * FROM test_min WHERE v > minTimeUUID('2022-04-04 13:42:00+0000'); +``` + +```output + k | v +---+-------------------------------------- + 1 | dc79344c-cb79-11ec-915e-5219fa422f77 + +(1 rows) +``` + +## maxTimeUUID() + +This function generates corresponding (`TIMEUUID`) with maximum clock component so that it includes all regular +`TIMEUUID` with that timestamp when comparing with another `TIMEUUID`. + +- It takes in an argument of type `TIMESTAMP`. +- The return value is a `TIMEUUID`. + +### Examples + +#### Insert values using now() + +```sql +ycqlsh:example> CREATE TABLE test_max (k INT PRIMARY KEY, v TIMEUUID); +``` + +```sql +ycqlsh:example> INSERT INTO test_max (k, v) VALUES (1, now()); +``` + +```sql +ycqlsh:ybdemo> SELECT k, v, totimestamp(v) from test_max; +``` + +```output + k | v | totimestamp(v) +---+--------------------------------------+--------------------------------- + 1 | e9261bcc-395a-11eb-9edc-112a0241eb23 | 2020-12-08 13:40:18.636000+0000 + +(1 rows) +``` + +#### Select using maxTimeUUID() + +```sql +ycqlsh:ybdemo> SELECT * FROM test_max WHERE v <= maxTimeUUID('2022-05-05 00:34:32+0000'); +``` + +```output + k | v +---+-------------------------------------- + 1 | dc79344c-cb79-11ec-915e-5219fa422f77 + +(1 rows) +``` + +## totimestamp() + +This function converts a date or TIMEUUID to the corresponding timestamp. + +- It takes in an argument of type `DATE` or `TIMEUUID`. +- The return value is a `TIMESTAMP`. + +### Examples + +#### Insert values using totimestamp() + +```sql +ycqlsh:example> CREATE TABLE test_totimestamp (k INT PRIMARY KEY, v TIMESTAMP); +``` + +```sql +ycqlsh:example> INSERT INTO test_totimestamp (k, v) VALUES (1, totimestamp(now())); +``` + +#### Select using totimestamp() + +```sql +ycqlsh:example> SELECT totimestamp(now()) FROM test_totimestamp; +``` + +```output + totimestamp(now()) +--------------------------------- + 2018-05-04 22:32:56.966000+0000 +``` + +#### Comparison using totimestamp() + +```sql +ycqlsh:example> SELECT v FROM test_totimestamp WHERE v < totimestamp(now()); +``` + +```output + v +--------------------------------- + 2018-05-04 22:32:46.199000+0000 +``` + +## dateof() + +This function converts a TIMEUUID to the corresponding timestamp. + +- It takes in an argument of type `TIMEUUID`. +- The return value is a `TIMESTAMP`. + +### Examples + +#### Insert values using dateof() + +```sql +ycqlsh:example> CREATE TABLE test_dateof (k INT PRIMARY KEY, v TIMESTAMP); +``` + +```sql +ycqlsh:example> INSERT INTO test_dateof (k, v) VALUES (1, dateof(now())); +``` + +#### Select using dateof() + +```sql +ycqlsh:example> SELECT dateof(now()) FROM test_dateof; +``` + +```output + dateof(now()) +--------------------------------- + 2018-05-04 22:43:28.440000+0000 +``` + +#### Comparison using dateof() + +```sql +ycqlsh:example> SELECT v FROM test_dateof WHERE v < dateof(now()); +``` + +```output + v +--------------------------------- + 2018-05-04 22:43:18.626000+0000 +``` + +## tounixtimestamp() + +This function converts TIMEUUID, date, or timestamp to a UNIX timestamp (which is +equal to the number of millisecond since epoch Thursday, 1 January 1970). + +- It takes in an argument of type `TIMEUUID`, `DATE` or `TIMESTAMP`. +- The return value is a `BIGINT`. + +### Examples + +#### Insert values using tounixtimestamp() + +```sql +ycqlsh:example> CREATE TABLE test_tounixtimestamp (k INT PRIMARY KEY, v BIGINT); +``` + +```sql +ycqlsh:example> INSERT INTO test_tounixtimestamp (k, v) VALUES (1, tounixtimestamp(now())); +``` + +#### Select using tounixtimestamp() + +```sql +ycqlsh:example> SELECT tounixtimestamp(now()) FROM test_tounixtimestamp; +``` + +```output + tounixtimestamp(now()) +------------------------ + 1525473993436 +``` + +#### Comparison using tounixtimestamp() + +You can do this as follows: + +```sql +ycqlsh:example> SELECT v from test_tounixtimestamp WHERE v < tounixtimestamp(now()); +``` + +```output + v +--------------- + 1525473942979 +``` + +## unixtimestampof() + +This function converts TIMEUUID or timestamp to a unix timestamp (which is +equal to the number of millisecond since epoch Thursday, 1 January 1970). + +- It takes in an argument of type `TIMEUUID` or type `TIMESTAMP`. +- The return value is a `BIGINT`. + +### Examples + +#### Insert values using unixtimestampof() + +```sql +ycqlsh:example> CREATE TABLE test_unixtimestampof (k INT PRIMARY KEY, v BIGINT); +``` + +```sql +ycqlsh:example> INSERT INTO test_unixtimestampof (k, v) VALUES (1, unixtimestampof(now())); +``` + +#### Select using unixtimestampof() + +```sql +ycqlsh:example> SELECT unixtimestampof(now()) FROM test_unixtimestampof; +``` + +```output + unixtimestampof(now()) +------------------------ + 1525474361676 +``` + +#### Comparison using unixtimestampof() + +```sql +ycqlsh:example> SELECT v from test_unixtimestampof WHERE v < unixtimestampof(now()); +``` + +```output + v +--------------- + 1525474356781 +``` + +## uuid() + +This function generates a new unique version 4 UUID (`UUID`). + +- It takes in no arguments. +- The return value is a `UUID`. + +### Examples + +#### Insert values using uuid() + +```sql +ycqlsh:example> CREATE TABLE test_uuid (k INT PRIMARY KEY, v UUID); +``` + +```sql +ycqlsh:example> INSERT INTO test_uuid (k, v) VALUES (1, uuid()); +``` + +#### Selecting the inserted uuid value + +```sql +ycqlsh:example> SELECT v FROM test_uuid WHERE k = 1; +``` + +```output + v +--------------------------------------- + 71bb5104-4fe9-11e8-8839-6336e659252a +``` + +#### Select using uuid() + +```sql +ycqlsh:example> SELECT uuid() FROM test_uuid; +``` + +```output + uuid() +-------------------------------------- + 12f91a52-ebba-4461-94c5-b73f0914284a +``` + +## See also + +- [`DATE`, `TIME` and `TIMESTAMP`](../type_datetime/) +- [`TIMEUUID`](../type_uuid) +- [`UUID`](../type_uuid) diff --git a/docs/content/v2.25/api/ycql/grammar_diagrams.md b/docs/content/v2.25/api/ycql/grammar_diagrams.md new file mode 100644 index 000000000000..4297a3cc2644 --- /dev/null +++ b/docs/content/v2.25/api/ycql/grammar_diagrams.md @@ -0,0 +1,436 @@ +--- +title: Grammar diagrams YCQL +summary: Diagrams of the grammar rules. +--- + +### alter_table +``` +alter_table ::= ALTER TABLE table_name + { ADD { column_name column_type } [ , ... ] + | DROP { column_name [ , ... ] } + | RENAME { column_name TO column_name } [ , ... ] + | WITH { property_name = property_literal } + [ AND ... ] } [ ... ] +``` +ALTERTABLEtable_nameADD,column_namecolumn_typeDROP,column_nameRENAME,column_nameTOcolumn_nameWITHANDproperty_name=property_literal + +### column_type +``` +column_type ::= '' +``` +<type> + +### create_index +```output.ebnf +create_index ::= CREATE [ UNIQUE ] [ DEFERRED ] INDEX + [ IF NOT EXISTS ] index_name ON table_name ( + partition_key_columns , [ clustering_key_columns ] ) + [ covering_columns ] [ index_properties ] + [ WHERE index_predicate ] +``` +CREATEUNIQUEDEFERREDINDEXIFNOTEXISTSindex_nameONtable_name(partition_key_columns,clustering_key_columns)covering_columnsindex_propertiesWHEREindex_predicate + +### partition_key_columns +``` +partition_key_columns ::= index_column | ( index_column [ , ... ] ) +``` +index_column(,index_column) + +### clustering_key_columns +``` +clustering_key_columns ::= index_column [ , ... ] +``` +,index_column + +### index_properties +``` +index_properties ::= WITH + { property_name = property_literal + | CLUSTERING ORDER BY ( + { index_column [ ASC | DESC ] } [ , ... ] ) } + [ AND ... ] +``` +WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,index_columnASCDESC) + +### index_column +``` +index_column ::= column_name | jsonb_attribute +``` +column_namejsonb_attribute + +### jsonb_attribute +``` +jsonb_attribute ::= column_name [ -> ''attribute_name'' [ ... ] ] ->> + ''attribute_name'' +``` +column_name->'attribute_name'->>'attribute_name' + +### covering_columns +``` +covering_columns ::= { COVERING | INCLUDE } ( column_name [ , ... ] ) +``` +COVERINGINCLUDE(,column_name) + +### index_predicate +``` +index_predicate ::= where_expression +``` +where_expression + +### create_keyspace +``` +create_keyspace ::= CREATE { KEYSPACE | SCHEMA } [ IF NOT EXISTS ] + keyspace_name keyspace_properties +``` +CREATEKEYSPACESCHEMAIFNOTEXISTSkeyspace_namekeyspace_properties + +### keyspace_properties +``` +keyspace_properties ::= [ WITH REPLICATION = { keyspace_property + [ , ... ] } ] + [ AND DURABLE_WRITES = { 'true' | 'false' } ] +``` +WITHREPLICATION={,keyspace_property}ANDDURABLE_WRITES=truefalse + +### keyspace_property +``` +keyspace_property ::= property_name = property_literal +``` +property_name=property_literal + +### create_table +``` +create_table ::= CREATE TABLE [ IF NOT EXISTS ] table_name ( + table_schema ) [ table_properties ] +``` +CREATETABLEIFNOTEXISTStable_name(table_schema)table_properties + +### table_schema +``` +table_schema ::= { column_name column_type + { PRIMARY KEY | STATIC } [ ... ] + | PRIMARY KEY ( ( column_name [ , ... ] ) + [ , column_name [ ... ] ] ) } [ , ... ] +``` +,column_namecolumn_typePRIMARYKEYSTATICPRIMARYKEY((,column_name),column_name) + +### table_properties +``` +table_properties ::= WITH + { property_name = property_literal + | CLUSTERING ORDER BY ( + { column_name [ ASC | DESC ] } [ , ... ] ) + | COMPACT STORAGE } [ AND ... ] +``` +WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,column_nameASCDESC)COMPACTSTORAGE + +### create_type +``` +create_type ::= CREATE TYPE [ IF NOT EXISTS ] type_name ( + { field_name field_type } [ , ... ] ) +``` +CREATETYPEIFNOTEXISTStype_name(,field_namefield_type) + +### field_type +``` +field_type ::= '' +``` +<type> + +### drop_keyspace +``` +drop_keyspace ::= DROP { KEYSPACE | SCHEMA } [ IF EXISTS ] + keyspace_name +``` +DROPKEYSPACESCHEMAIFEXISTSkeyspace_name + +### drop_table +``` +drop_table ::= DROP TABLE [ IF EXISTS ] table_name +``` +DROPTABLEIFEXISTStable_name + +### drop_type +``` +drop_type ::= DROP TYPE [ IF EXISTS ] type_name +``` +DROPTYPEIFEXISTStype_name + +### use_keyspace +``` +use_keyspace ::= USE keyspace_name +``` +USEkeyspace_name + +### delete +``` +delete ::= DELETE FROM table_name + [ USING TIMESTAMP timestamp_expression ] WHERE + where_expression [ IF { [ NOT ] EXISTS | if_expression } ] + [ RETURNS STATUS AS ROW ] +``` +DELETEFROMtable_nameUSINGTIMESTAMPtimestamp_expressionWHEREwhere_expressionIFNOTEXISTSif_expressionRETURNS STATUS AS ROW + +### where_expression +``` +where_expression ::= { column_name { < | <= + | = | >= + | > | IN + | NOT IN } expression } + [ AND ... ] +``` +ANDcolumn_name<<==>=>INNOT INexpression + +### if_expression +``` +if_expression ::= { column_name { < | <= | = | >= | > | IN | NOT IN } + expression } [ AND ... ] +``` +ANDcolumn_name<<==>=>INNOT INexpression + +### insert +``` +insert ::= INSERT INTO table_name ( column_name [ , ... ] ) VALUES ( + expression [ , ... ] ) + [ IF { [ NOT ] EXISTS | if_expression } ] + [ USING using_expression ] + [ RETURNS STATUS AS ROW ] +``` +INSERTINTOtable_name(,column_name)VALUES(,expression)IFNOTEXISTSif_expressionUSINGusing_expressionRETURNS STATUS AS ROW + +### using_expression +``` +using_expression ::= ttl_or_timestamp_expression [ AND ... ] +``` +ANDttl_or_timestamp_expression + +### ttl_or_timestamp_expression +``` +ttl_or_timestamp_expression ::= TTL ttl_expression + | TIMESTAMP timestamp_expression +``` +TTLttl_expressionTIMESTAMPtimestamp_expression + +### expression +``` +expression ::= '' +``` +<expression> + +### select +``` +select ::= SELECT [ DISTINCT ] { * | column_name [ , ... ] } FROM + table_name [ WHERE where_expression [ ALLOW FILTERING ] ] + [ IF if_expression ] [ ORDER BY order_expression ] + [ LIMIT limit_expression ] [ OFFSET offset_expression ] +``` +SELECTDISTINCT*,column_nameFROMtable_nameWHEREwhere_expressionALLOW FILTERINGIFif_expressionORDER BYorder_expressionLIMITlimit_expressionOFFSEToffset_expression + +### order_expression +``` +order_expression ::= ( { column_name [ ASC | DESC ] } [ , ... ] ) +``` +(,column_nameASCDESC) + +### update +``` +update ::= UPDATE table_name [ USING using_expression ] SET assignment + [ , ... ] WHERE where_expression + [ IF { if_expression + | [ NOT ] EXISTS + | if_expression OR [ NOT ] EXISTS } ] + [ RETURNS STATUS AS ROW ] +``` +UPDATEtable_nameUSINGusing_expressionSET,assignmentWHEREwhere_expressionIFif_expressionNOTEXISTSif_expressionORNOTEXISTSRETURNS STATUS AS ROW + +### transaction_block +``` +transaction_block ::= BEGIN TRANSACTION + { { insert | update | delete } ; } [ ... ] END + TRANSACTION ; +``` +BEGINTRANSACTIONinsertupdatedelete;ENDTRANSACTION; + +### truncate +``` +truncate ::= TRUNCATE [ TABLE ] table_name +``` +TRUNCATETABLEtable_name + +### assignment +``` +assignment ::= column_name = expression +``` +column_name=expression + +### ttl_expression +``` +ttl_expression ::= '' +``` +<Integer Literal> + +### timestamp_expression +``` +timestamp_expression ::= '' +``` +<Integer Literal> + +### limit_expression +``` +limit_expression ::= '' +``` +<Integer Literal> + +### offset_expression +``` +offset_expression ::= '' +``` +<Integer Literal> + +### keyspace_name +``` +keyspace_name ::= '' +``` +<Text Literal> + +### property_name +``` +property_name ::= '' +``` +<Text Literal> + +### property_literal +``` +property_literal ::= '' +``` +<Text Literal> + +### table_name +``` +table_name ::= [ keyspace_name . ] '' +``` +keyspace_name.<Text Literal> + +### index_name +``` +index_name ::= '' +``` +<Text Literal> + +### column_name +``` +column_name ::= '' +``` +<Text Literal> + +### type_name +``` +type_name ::= [ keyspace_name . ] '' +``` +keyspace_name.<Text Literal> + +### field_name +``` +field_name ::= '' +``` +<Text Literal> + +### alter_role +``` +alter_role ::= ALTER ROLE role_name WITH role_property [ AND ... ] +``` +ALTERROLErole_nameWITHANDrole_property + +### create_role +``` +create_role ::= CREATE ROLE [ IF NOT EXISTS ] role_name + [ WITH role_property [ AND ... ] ] +``` +CREATEROLEIFNOTEXISTSrole_nameWITHANDrole_property + +### role_property +``` +role_property ::= PASSWORD = '' + | LOGIN = '' + | SUPERUSER = '' +``` +PASSWORD=<Text Literal>LOGIN=<Boolean Literal>SUPERUSER=<Boolean Literal> + +### drop_role +``` +drop_role ::= DROP ROLE [ IF EXISTS ] role_name +``` +DROPROLEIFEXISTSrole_name + +### grant_role +``` +grant_role ::= GRANT role_name TO role_name +``` +GRANTrole_nameTOrole_name + +### revoke_role +``` +revoke_role ::= REVOKE role_name FROM role_name +``` +REVOKErole_nameFROMrole_name + +### grant_permission +``` +grant_permission ::= GRANT { all_permissions | permission } ON + resource TO role_name +``` +GRANTall_permissionspermissionONresourceTOrole_name + +### revoke_permission +``` +revoke_permission ::= REVOKE { all_permissions | permission } ON + resource FROM role_name +``` +REVOKEall_permissionspermissionONresourceFROMrole_name + +### all_permissions +``` +all_permissions ::= ALL [ PERMISSIONS ] +``` +ALLPERMISSIONS + +### role_name +``` +role_name ::= '' +``` +<Text Literal> + +### permission +``` +permission ::= { CREATE + | ALTER + | DROP + | SELECT + | MODIFY + | AUTHORIZE + | DESCRIBE + | EXECUTE } [ PERMISSION ] +``` +CREATEALTERDROPSELECTMODIFYAUTHORIZEDESCRIBEEXECUTEPERMISSION + +### resource +``` +resource ::= ALL { KEYSPACES | ROLES } + | KEYSPACE keyspace_name + | [ TABLE ] table_name + | ROLE role_name +``` +ALLKEYSPACESROLESKEYSPACEkeyspace_nameTABLEtable_nameROLErole_name + +### alter_keyspace +``` +alter_keyspace ::= ALTER { KEYSPACE | SCHEMA } keyspae_name + keyspace_properties +``` +ALTERKEYSPACESCHEMAkeyspae_namekeyspace_properties + +### explain +``` +explain ::= EXPLAIN { select | update | insert | delete } +``` +EXPLAINselectupdateinsertdelete diff --git a/docs/content/v2.25/api/ycql/syntax_resources/ycql_grammar.ebnf b/docs/content/v2.25/api/ycql/syntax_resources/ycql_grammar.ebnf new file mode 100644 index 000000000000..bb0f8fe8fbea --- /dev/null +++ b/docs/content/v2.25/api/ycql/syntax_resources/ycql_grammar.ebnf @@ -0,0 +1,148 @@ +alter_table ::= 'ALTER' 'TABLE' table_name ('ADD' ( column_name column_type ) { ',' ( column_name column_type ) } | 'DROP' ( column_name { ',' column_name } ) | 'RENAME' ( column_name 'TO' column_name ) { ',' ( column_name 'TO' column_name ) } | 'WITH' ( property_name '=' property_literal ) { 'AND' ( property_name '=' property_literal ) } )+ ; + +column_type ::= ''; + +create_index ::= 'CREATE' [ 'UNIQUE' ] [ 'DEFERRED' ] 'INDEX' [ 'IF' 'NOT' 'EXISTS' ] index_name 'ON' \ + table_name '(' partition_key_columns ',' [ clustering_key_columns ] ')' \ + [ covering_columns ] [ index_properties ] [ 'WHERE' index_predicate]; + +partition_key_columns = ( index_column | '(' index_column { ',' index_column } ')' ); + +clustering_key_columns = index_column { ',' index_column }; + +index_properties = 'WITH' ( property_name '=' property_literal | 'CLUSTERING' 'ORDER' 'BY' '(' ( index_column [ 'ASC' | 'DESC' ] ) { ',' ( index_column [ 'ASC' | 'DESC' ] ) } ')' ) { 'AND' ( property_name '=' property_literal | 'CLUSTERING' 'ORDER' 'BY' '(' ( index_column [ 'ASC' | 'DESC' ] ) { ',' ( index_column [ 'ASC' | 'DESC' ] ) } ')') } ; + +index_column = column_name | jsonb_attribute; + +jsonb_attribute = column_name {'->' "'attribute_name'" } '->>' "'attribute_name'"; + +covering_columns = ( 'COVERING' | 'INCLUDE' ) '(' column_name { ',' column_name } ')'; + +index_predicate = where_expression; + +create_keyspace ::= 'CREATE' ( 'KEYSPACE' | 'SCHEMA' ) [ 'IF' 'NOT' 'EXISTS' ] keyspace_name keyspace_properties ; + +keyspace_properties ::= [ 'WITH' 'REPLICATION' '=' '{' keyspace_property { ',' keyspace_property } '}' ] [ 'AND' 'DURABLE_WRITES' '=' ( 'true' | 'false' ) ] ; + +keyspace_property ::= property_name '=' property_literal ; + +create_table ::= 'CREATE' 'TABLE' [ 'IF' 'NOT' 'EXISTS' ] table_name '(' table_schema ')' [ table_properties ] ; + +table_schema ::= ( ( column_name column_type ( 'PRIMARY' 'KEY' | 'STATIC' )+ ) | ( 'PRIMARY' 'KEY' '(' '(' column_name {',' column_name } ')' { ',' column_name } ')' ) ) { ',' ( ( column_name column_type ( 'PRIMARY' 'KEY' | 'STATIC' )+ ) | ( 'PRIMARY' 'KEY' '(' '(' column_name {',' column_name } ')' { ',' column_name } ')' ) ) } ; + +table_properties ::= 'WITH' ( property_name '=' property_literal | 'CLUSTERING' 'ORDER' 'BY' '(' ( column_name [ 'ASC' | 'DESC' ] ) { ',' ( column_name [ 'ASC' | 'DESC' ] ) } ')' | 'COMPACT' 'STORAGE' ) { 'AND' ( property_name '=' property_literal | 'CLUSTERING' 'ORDER' 'BY' '(' ( column_name [ 'ASC' | 'DESC' ] ) { ',' ( column_name [ 'ASC' | 'DESC' ] ) } ')' | 'COMPACT' 'STORAGE' ) } ; + +create_type ::= 'CREATE' 'TYPE' [ 'IF' 'NOT' 'EXISTS' ] type_name + '(' (field_name field_type ) { ',' ( field_name field_type ) } ')' ; + +field_type ::= '' ; + +drop_keyspace ::= 'DROP' ( 'KEYSPACE' | 'SCHEMA' ) [ 'IF' 'EXISTS' ] keyspace_name ; + +drop_table ::= 'DROP' 'TABLE' [ 'IF' 'EXISTS' ] table_name ; + +drop_type ::= 'DROP' 'TYPE' [ 'IF' 'EXISTS' ] type_name ; + +use_keyspace ::= 'USE' keyspace_name ; + +delete ::= 'DELETE' 'FROM' table_name + [ 'USING' 'TIMESTAMP' timestamp_expression ] + 'WHERE' where_expression + [ 'IF' ( [ 'NOT' ] 'EXISTS' | if_expression ) ] + [ 'RETURNS STATUS AS ROW' ]; + +where_expression ::= ( column_name ( '<' | '<=' | '=' | '!=' | '>=' | '>' | 'IN' | 'NOT IN' ) expression ) { 'AND' ( column_name ( '<' | '<=' | '=' | '!=' | '>=' | '>' | 'IN' | 'NOT IN' ) expression ) } ; + +if_expression ::= ( column_name ( '<' | '<=' | '=' | '!=' | '>=' | '>' | 'IN' | 'NOT IN' ) expression ) { 'AND' ( column_name ( '<' | '<=' | '=' | '!=' | '>=' | '>' | 'IN' | 'NOT IN' ) expression ) } ; + +insert ::= 'INSERT' 'INTO' table_name '(' column_name { ',' column_name } ')' + 'VALUES' '(' expression { ',' expression } ')' \ + [ 'IF' ( [ 'NOT' ] 'EXISTS' | if_expression ) ] + [ 'USING' using_expression ] + [ 'RETURNS STATUS AS ROW' ]; + +using_expression ::= ( ttl_or_timestamp_expression ) { 'AND' ( ttl_or_timestamp_expression ) } ; + +ttl_or_timestamp_expression ::= ( 'TTL' ttl_expression | 'TIMESTAMP' timestamp_expression ) ; + +expression ::= '' ; + +select ::= 'SELECT' [ 'DISTINCT' ] ( '*' | column_name { ',' column_name } ) 'FROM' table_name \ + [ 'WHERE' where_expression [ 'ALLOW FILTERING' ] ] \ + [ 'IF' if_expression ] \ + [ 'ORDER BY' order_expression ] \ + [ 'LIMIT' limit_expression ] [ 'OFFSET' offset_expression ] ; + +order_expression ::= '(' ( column_name [ 'ASC' | 'DESC' ] ) { ',' ( column_name [ 'ASC' | 'DESC' ] ) } ')'; + +update ::= 'UPDATE' table_name + [ 'USING' using_expression ] + 'SET' assignment { ',' assignment } \ + 'WHERE' where_expression + [ 'IF' ( if_expression | [ 'NOT' ] 'EXISTS' | if_expression 'OR' [ 'NOT' ] 'EXISTS') ] + [ 'RETURNS STATUS AS ROW' ]; + +transaction_block ::= 'BEGIN' 'TRANSACTION' \ + ( ( insert | update | delete ) ';' ) + { ( insert | update | delete ) ';' } \ + 'END' 'TRANSACTION' ';' ; + +truncate ::= 'TRUNCATE' [ 'TABLE' ] table_name; + +assignment ::= column_name '=' expression ; + +ttl_expression ::= '' ; + +timestamp_expression ::= '' ; + +limit_expression ::= '' ; + +offset_expression ::= '' ; + +keyspace_name ::= '' ; + +property_name ::= '' ; + +property_literal ::= '' ; + +table_name ::= [ keyspace_name '.' ] '' ; + +index_name ::= '' ; + +column_name ::= '' ; + +type_name ::= [ keyspace_name '.' ] '' ; + +field_name ::= '' ; + +alter_role ::= 'ALTER' 'ROLE' role_name 'WITH' ( role_property ) { 'AND' ( role_property ) } ; + +create_role ::= 'CREATE' 'ROLE' [ 'IF' 'NOT' 'EXISTS' ] role_name [ 'WITH' ( role_property ) { 'AND' ( role_property ) } ] ; + +role_property ::= 'PASSWORD' '=' '' + | 'LOGIN' '=' '' + | 'SUPERUSER' '=' '' ; + +drop_role ::= 'DROP' 'ROLE' [ 'IF' 'EXISTS' ] role_name ; + +grant_role ::= 'GRANT' role_name 'TO' role_name ; + +revoke_role ::= 'REVOKE' role_name 'FROM' role_name ; + +grant_permission ::= 'GRANT' (all_permissions | permission) 'ON' resource 'TO' role_name ; + +revoke_permission ::= 'REVOKE' (all_permissions | permission) 'ON' resource 'FROM' role_name ; + +all_permissions ::= 'ALL' [ 'PERMISSIONS' ] ; + +role_name ::= '' ; + +permission ::= ('CREATE' | 'ALTER' | 'DROP' | 'SELECT' | 'MODIFY' | 'AUTHORIZE' | 'DESCRIBE' | 'EXECUTE') [ 'PERMISSION' ] ; +resource ::= 'ALL' ('KEYSPACES' | 'ROLES') + | 'KEYSPACE' keyspace_name + | [ 'TABLE' ] table_name + | 'ROLE' role_name ; + +alter_keyspace ::= 'ALTER' ('KEYSPACE' | 'SCHEMA') keyspace_name keyspace_properties; + +explain ::= 'EXPLAIN' (select | update | insert | delete); diff --git a/docs/content/v2.25/api/ycql/type_blob.md b/docs/content/v2.25/api/ycql/type_blob.md new file mode 100644 index 000000000000..ac716edc1f32 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_blob.md @@ -0,0 +1,70 @@ +--- +title: BLOB data type [YCQL] +headerTitle: BLOB data type +linkTitle: BLOB +description: Use the BLOB data type to represent arbitrary binary data of variable length. +menu: + preview_api: + parent: api-cassandra + weight: 1370 +aliases: + - /preview/api/cassandra/type_blob + - /preview/api/ycql/type_blob +type: docs +--- + +## Synopsis + +Use the `BLOB` data type to represent arbitrary binary data of variable length. + +## Syntax + +``` +type_specification ::= BLOB + +blob_literal ::= "0x" [ hex_digit hex_digit ...] +``` + +Where + +- `hex_digit` is a hexadecimal digit (`[0-9a-fA-F]`). + +## Semantics + +- Columns of type `BLOB` can be part of the `PRIMARY KEY`. +- Implicitly, `BLOB` data type is neither convertible nor comparable with other data types. +- Two series of builtin-functions `BlobAs` and `AsBlob` are provided for conversion between `BLOB` and other data types. +- `BLOB` size is virtually unlimited. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE messages(id INT PRIMARY KEY, content BLOB); +``` + +```sql +ycqlsh:example> INSERT INTO messages (id, content) VALUES (1, 0xab00ff); +``` + +```sql +ycqlsh:example> INSERT INTO messages (id, content) VALUES (2, 0x); +``` + +```sql +ycqlsh:example> UPDATE messages SET content = 0x0f0f WHERE id = 2; +``` + +```sql +ycqlsh:example> SELECT * FROM messages; +``` + +``` + id | content +----+---------- + 2 | 0x0f0f + 1 | 0xab00ff +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_bool.md b/docs/content/v2.25/api/ycql/type_bool.md new file mode 100644 index 000000000000..cd6da66c1972 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_bool.md @@ -0,0 +1,66 @@ +--- +title: BOOLEAN data type [YCQL] +headerTitle: BOOLEAN data type +linkTitle: BOOLEAN +description: Use the `BOOLEAN` data type to specify values of either "true" or "false". +menu: + preview_api: + parent: api-cassandra + weight: 1380 +aliases: + - /preview/api/cassandra/type_bool + - /preview/api/ycql/type_bool +type: docs +--- + +## Synopsis + +Use the `BOOLEAN` data type to specify values of either `true` or `false`. + +## Syntax + +``` +type_specification ::= BOOLEAN + +boolean_literal ::= TRUE | FALSE +``` + +## Semantics + +- Columns of type `BOOLEAN` cannot be part of the `PRIMARY KEY`. +- Columns of type `BOOLEAN` can be set, inserted, and compared. +- In `WHERE` and `IF` clause, `BOOLEAN` columns cannot be used as a standalone expression. They must be compared with either `true` or `false`. For example, `WHERE boolean_column = TRUE` is valid while `WHERE boolean_column` is not. +- Implicitly, `BOOLEAN` is neither comparable nor convertible to any other data types. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE tasks (id INT PRIMARY KEY, finished BOOLEAN); +``` + +```sql +ycqlsh:example> INSERT INTO tasks (id, finished) VALUES (1, false); +``` + +```sql +ycqlsh:example> INSERT INTO tasks (id, finished) VALUES (2, false); +``` + +```sql +ycqlsh:example> UPDATE tasks SET finished = true WHERE id = 2; +``` + +```sql +ycqlsh:example> SELECT * FROM tasks; +``` + +``` +id | finished +----+---------- + 2 | True + 1 | False +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_collection.md b/docs/content/v2.25/api/ycql/type_collection.md new file mode 100644 index 000000000000..f5732ca68d68 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_collection.md @@ -0,0 +1,253 @@ +--- +title: Collection data types (MAP, LIST, and SET) [YCQL] +headerTitle: Collection data types (MAP, LIST, and SET) +linkTitle: Collection +description: Use collection data types to specify columns for data objects that can contain more than one value. +menu: + preview_api: + parent: api-cassandra + weight: 1390 +aliases: + - /preview/api/cassandra/type_collection + - /preview/api/ycql/type_collection +type: docs +--- + +## Synopsis + +Use collection data types to specify columns for data objects that can contain more than one value. + +### LIST + +`LIST` is an ordered collection of elements. All elements in a `LIST` must be of the same primitive type. Elements can be prepend or append by `+` operator to a list, removed by `-` operator, and referenced by their indexes of that list by `[]` operator. + +### MAP + +`MAP` is an sorted collection of pairs of elements, a key and a value. The sorting order is based on the key values and is implementation-dependent. With their key values, elements in a `MAP` can be set by the `[]` operator, added by the `+` operator, and removed by the `-` operator. +When queries, the element pairs of a map will be returned in the sorting order. + +### SET + +`SET` is a sorted collection of elements. The sorting order is implementation-dependent. Elements can be added by `+` operator and removed by `-` operator. When queried, the elements of a set will be returned in the sorting order. + +## Syntax + +``` +type_specification ::= { LIST | MAP | SET } + +list_literal ::= '[' [ expression ...] ']' + +map_literal ::= '{' [ { expression ':' expression } ...] '}' + +set_literal ::= '{' [ expression ...] '}' + +``` + +Where + +- Columns of type `LIST`, `MAP`, or `SET` cannot be part of the `PRIMARY KEY`. +- `type` must be a [non-parametric data type](../#data-types) or a [frozen](../type_frozen) data type. +- `key_type` must be any data type that is allowed in a primary key (Currently `FROZEN` and all non-parametric data types except `BOOL`). +- For `map_literal` the left-side `expression` represents the key and the right-side one represents the value. +- `expression` is any well formed YCQL expression. See [Expression](..#expressions) for more information on syntax rules. + +## Semantics + +- Type parameters must be simple types or [frozen types](../type_frozen) (collections and user-defined types must be frozen to be used as collection parameters). +- Columns of type `LIST`, `MAP`, and `SET` cannot be part of the `PRIMARY KEY`. +- Implicitly, values of collection data types are neither convertible nor comparable to other data types. +- Each expression in a collection literal must evaluate to a value convertible to the corresponding parameter data type. +- Comparisons on collection values are not allowed (e.g. in `WHERE` or `IF` clauses). +- Empty collections are treated as null values. + +{{< note title="Note" >}} +Collections are designed for storing small sets of values that are not expected to grow to arbitrary size (such as phone numbers or addresses for a user rather than posts or messages). +While collections of larger sizes are allowed, they may have a significant impact on performance for queries involving them. +In particular, some list operations (insert at an index and remove elements) require a read-before-write. +{{< /note >}} + +## Examples + +### `CREATE TABLE` with collections + +- Collection types are used like simple types (except they are not allowed in primary key). + +```sql +ycqlsh:example> CREATE TABLE users(username TEXT PRIMARY KEY, + emails SET, + phones MAP, + top_cities LIST); +``` + +### `INSERT` collection data + +- Collection values are inserted by setting all their elements at once. + +```sql +ycqlsh:example> INSERT INTO users(username, emails, phones, top_cities) + VALUES ('foo', + {'c@example.com', 'a@example.com'}, + {'home' : '999-9999', 'mobile' : '000-0000'}, + ['New York', 'Paris']); +``` + +Empty collections are the same as nulls. + +```sql +ycqlsh:example> INSERT INTO users(username, emails, phones, top_cities) VALUES ('bar', { }, { }, [ ]); +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+------------------------------------+--------------------------------------------+----------------------- + bar | null | null | null + foo | {'a@example.com', 'c@example.com'} | {'home': '999-9999', 'mobile': '000-0000'} | ['New York', 'Paris'] +``` + +### `UPDATE` collection column + +- Collection values can be updated by setting all their elements at once. + +```sql +ycqlsh:example> UPDATE users SET emails = {'bar@example.com'} WHERE username = 'bar'; +``` + +```sql +ycqlsh:example> UPDATE users SET phones = {'home' : '123-45678'} WHERE username = 'bar'; +``` + +```sql +ycqlsh:example> UPDATE users SET top_cities = ['London', 'Tokyo'] WHERE username = 'bar'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+------------------------------------+--------------------------------------------+----------------------- + bar | {'bar@example.com'} | {'home': '123-45678'} | ['London', 'Tokyo'] + foo | {'a@example.com', 'c@example.com'} | {'home': '999-9999', 'mobile': '000-0000'} | ['New York', 'Paris'] +``` + +### Collection expressions + +- Collection elements can be added with `+` or removed with `-`. + +```sql +ycqlsh:example> UPDATE users SET emails = emails + {'foo@example.com'} WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> UPDATE users SET emails = emails - {'a@example.com', 'c.example.com'} WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> UPDATE users SET phones = phones + {'office' : '333-3333'} WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+--------------------------------------+------------------------------------------------------------------+----------------------- + bar | {'bar@example.com'} | {'home': '123-45678'} | ['London', 'Tokyo'] + foo | {'c@example.com', 'foo@example.com'} | {'home': '999-9999', 'mobile': '000-0000', 'office': '333-3333'} | ['New York', 'Paris'] +``` + +- To remove map elements only the relevant keys need to be given (as a set). + +```sql +ycqlsh:example> UPDATE users SET phones = phones - {'home'} WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+--------------------------------------+----------------------------------------------+----------------------- + bar | {'bar@example.com'} | {'home': '123-45678'} | ['London', 'Tokyo'] + foo | {'c@example.com', 'foo@example.com'} | {'mobile': '000-0000', 'office': '333-3333'} | ['New York', 'Paris'] +``` + +- List elements can be either prepended or appended. + +```sql +ycqlsh:example> UPDATE users SET top_cities = top_cities + ['Delhi'] WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> UPDATE users SET top_cities = ['Sunnyvale'] + top_cities WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> UPDATE users SET top_cities = top_cities - ['Paris', 'New York'] WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+---------------------+----------------------------------------------+------------------------ + bar | {'bar@example.com'} | {'home': '123-45678'} | ['London', 'Tokyo'] + foo | {'foo@example.com'} | {'mobile': '000-0000', 'office': '333-3333'} | ['Sunnyvale', 'Delhi'] +``` + +### `UPDATE` map and list elements + +- Maps allow referencing elements by key. + +```sql +ycqlsh:example> UPDATE users SET phones['mobile'] = '111-1111' WHERE username = 'foo'; +``` + +```sql +ycqlsh:example> UPDATE users SET phones['mobile'] = '345-6789' WHERE username = 'bar' IF phones['mobile'] = null; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+--------------------------------------+----------------------------------------------+----------------------- + bar | {'bar@example.com'} | {'home': '123-45678', 'mobile': '345-6789'} | ['London', 'Tokyo'] + foo | {'c@example.com', 'foo@example.com'} | {'mobile': '111-1111', 'office': '333-3333'} | ['New York', 'Paris'] +``` + +- Lists allow referencing elements by index (numbering starts from 0). + +```sql +ycqlsh:example> UPDATE users SET top_cities[0] = 'San Francisco' WHERE username = 'bar'; +``` + +```sql +ycqlsh:example> UPDATE users SET top_cities[1] = 'Mumbai' WHERE username = 'bar' IF top_cities[1] = 'Tokyo'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + username | emails | phones | top_cities +----------+--------------------------------------+----------------------------------------------+----------------------------- + bar | {'bar@example.com'} | {'home': '123-45678', 'mobile': '345-6789'} | ['San Francisco', 'Mumbai'] + foo | {'c@example.com', 'foo@example.com'} | {'mobile': '111-1111', 'office': '333-3333'} | ['New York', 'Paris'] +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_datetime.md b/docs/content/v2.25/api/ycql/type_datetime.md new file mode 100644 index 000000000000..62ea631fa711 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_datetime.md @@ -0,0 +1,251 @@ +--- +title: Date and time data types (DATE, TIME, and TIMESTAMP) [YCQL] +headerTitle: Date and time data types (DATE, TIME, and TIMESTAMP) +linkTitle: DATE, TIME, and TIMESTAMP +description: Use the date and time data types (DATE, TIME, and TIMESTAMP) to specify dates and time. +menu: + preview_api: + parent: api-cassandra + weight: 1450 +aliases: + - /preview/api/cassandra/type_datetime + - /preview/api/cassandra/type_timestamp + - /preview/api/ycql/type_datetime + - /preview/api/ycql/type_timestamp +type: docs +--- + +## Synopsis + +Use datetime data types to specify data of date and time at a time zone, `DATE` for a specific date, `TIME` for time of day, and `TIMESTAMP` for the combination of both date and time. + +## Syntax + +```ebnf +type_specification ::= TIMESTAMP | DATE | TIME +``` + +## Semantics + +- Columns of type `DATE`, `TIME` and `TIMESTAMP` can be part of the `PRIMARY KEY`. +- Implicitly, values of datetime types cannot be converted or compared to other data types. +- Values of integer and text data types with the correct format (given above) are convertible to datetime types. +- Supported timestamp range is from year `1900` to year `9999`. +- The default value for hour, minute, second, and millisecond components is `0`. +- The default time zone is `UTC`. + +### DATE + +A date is represented using a 32-bit unsigned integer representing the number of days since epoch (January 1, 1970) with no corresponding time value. +Use [INSERT](../dml_insert) or [UPDATE](../dml_update/) to add values as an integer (days since epoch) or in the string format shown below. + +#### Syntax + +``` +yyyy-mm-dd +``` + +- `yyyy`: four digit year. +- `mm`: two digit month. +- `dd`: two digit day. + +For example, `2020-07-29`. + +### TIME + +Values of the `time` data type are encoded as 64-bit signed integers representing the number of nanoseconds since midnight with no corresponding date value. + +Use [INSERT](../dml_insert) or [UPDATE](../dml_update/) to add values in the following string format, where subseconds (`f`) are optional and if provided, can be less than nanosecond: + +#### Syntax + +``` +hh:mm:ss[.fffffffff] +``` + +- `hh`: two digit hour, using a 24-hour clock. +- `mm`: two digit minutes. +- `ss`: two digit seconds. +- `fffffffff`: (Optional) three digit sub-seconds, or nanoseconds. When excluded, set to `0`. + +For example, `12:34:56` or `12:34:56.789` or `12:34:56.123456789`. + +### TIMESTAMP + +Values of the `timestamp` data type combines date, time, and time zone, in ISO 8601 format. + +Use [INSERT](../dml_insert) or [UPDATE](../dml_update/) to add values in the string format shown below, where milliseconds (`f`) are optional. + +#### Syntax + +``` +yyyy-mm-dd[ (T| )HH:MM[:SS][.fff]][(+|-)NNNN] +``` + +Required date (`yyyy-mm-dd`) where: + +- `yyyy`: four digit year. +- `mm`: two digit month. +- `dd`: two digit day. + +Optional time (HH:MM[:SS][.fff]) where: + +- `HH`: two digit hour, using a 24-hour clock. +- `MM`: two digit minutes. +- `SS`: (Optional) two digit seconds. +- `fff`: (Optional) three digit sub-seconds, or milliseconds. When excluded, set to `0`. + +Optional time zone (`(+|-)NNNN`) where: + +- `+|-`: Add or subtract the NNNN from GMT +- `NNNN`: The 4-digit time zone (RFC 822). For example, `+0000` is GMT and `-0800` is PST. + +NNNN is the RFC-822 4-digit time zone, for example +0000 is GMT and -0800 is PST. + +For example, for July 29, 2020 midnight PST, valid timestamp values include `2020-07-29 12:34:56.789+0000`, `2020-07-29 12:34:56.789`, `2020-07-29 12:34:56`, and `2020-07-29`. + +## Examples + +### Using the date and time types + +```sql +ycqlsh:example> CREATE TABLE orders(customer_id INT, order_date DATE, order_time TIME, amount DECIMAL, PRIMARY KEY ((customer_id), order_date, order_time)); +``` + +Date and time values can be inserted using `currentdate` and `currenttime` standard functions. + +```sql +ycqlsh:example> INSERT INTO orders(customer_id, order_date, order_time, amount) VALUES (1, currentdate(), currenttime(), 85.99); +``` + +```sql +ycqlsh:example> INSERT INTO orders(customer_id, order_date, order_time, amount) VALUES (1, currentdate(), currenttime(), 34.15); +``` + +```sql +ycqlsh:example> INSERT INTO orders(customer_id, order_date, order_time, amount) VALUES (2, currentdate(), currenttime(), 55.45); +``` + +```sql +ycqlsh:example> SELECT * FROM orders; +``` + +```output + customer_id | order_date | order_time | amount +-------------+------------+--------------------+-------- + 1 | 2018-10-09 | 17:12:25.824094000 | 85.99 + 1 | 2018-10-09 | 17:12:56.350031000 | 34.15 + 2 | 2018-10-09 | 17:13:15.203633000 | 55.45 +``` + +Date values can be given using date-time literals. + +```sql +ycqlsh:example> SELECT sum(amount) FROM orders WHERE customer_id = 1 AND order_date = '2018-10-09'; +``` + +```output + system.sum(amount) +-------------------- + 120.14 +``` + +### Using the timestamp type + +You can do this as follows: + +```sql +ycqlsh:example> CREATE TABLE sensor_data(sensor_id INT, ts TIMESTAMP, value FLOAT, PRIMARY KEY(sensor_id, ts)); +``` + +Timestamp values can be given using date-time literals. + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (1, '2017-07-04 12:30:30 UTC', 12.5); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (1, '2017-07-04 12:31 UTC', 13.5); +``` + +Timestamp values can also be given as integers (milliseconds from epoch). + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (2, 1499171430000, 20); +``` + +```sql +ycqlsh:example> SELECT * FROM sensor_data; +``` + +```output + sensor_id | ts | value +-----------+---------------------------------+------- + 2 | 2017-07-04 12:30:30.000000+0000 | 20 + 1 | 2017-07-04 12:30:30.000000+0000 | 12.5 + 1 | 2017-07-04 12:31:00.000000+0000 | 13.5 +``` + +### Supported timestamp literals + +```output +'1992-06-04 12:30' +'1992-6-4 12:30' +'1992-06-04 12:30+04:00' +'1992-6-4 12:30-04:30' +'1992-06-04 12:30 UTC+04:00' +'1992-6-4 12:30 UTC-04:30' +'1992-06-04 12:30.321' +'1992-6-4 12:30.12' +'1992-06-04 12:30.321+04:00' +'1992-6-4 12:30.12-04:30' +'1992-06-04 12:30.321 UTC+04:00' +'1992-6-4 12:30.12 UTC-04:30' +'1992-06-04 12:30:45' +'1992-6-4 12:30:45' +'1992-06-04 12:30:45+04:00' +'1992-6-4 12:30:45-04:30' +'1992-06-04 12:30:45 UTC+04:00' +'1992-6-4 12:30:45 UTC-04:30' +'1992-06-04 12:30:45.321' +'1992-6-4 12:30:45.12' +'1992-06-04 12:30:45.321+04:00' +'1992-6-4 12:30:45.12-04:30' +'1992-06-04 12:30:45.321 UTC+04:00' +'1992-6-4 12:30:45.12 UTC-04:30' +'1992-06-04T12:30' +'1992-6-4T12:30' +'1992-06-04T12:30+04:00' +'1992-6-4T12:30-04:30' +'1992-06-04T12:30 UTC+04:00' +'1992-6-4T12:30TUTC-04:30' +'1992-06-04T12:30.321' +'1992-6-4T12:30.12' +'1992-06-04T12:30.321+04:00' +'1992-6-4T12:30.12-04:30' +'1992-06-04T12:30.321 UTC+04:00' +'1992-6-4T12:30.12 UTC-04:30' +'1992-06-04T12:30:45' +'1992-6-4T12:30:45' +'1992-06-04T12:30:45+04:00' +'1992-6-4T12:30:45-04:30' +'1992-06-04T12:30:45 UTC+04:00' +'1992-6-4T12:30:45 UTC-04:30' +'1992-06-04T12:30:45.321' +'1992-6-4T12:30:45.12' +'1992-06-04T12:30:45.321+04:00' +'1992-6-4T12:30:45.12-04:30' +'1992-06-04T12:30:45.321 UTC+04:00' +'1992-6-4T12:30:45.12 UTC-04:30' +'1992-06-04' +'1992-6-4' +'1992-06-04+04:00' +'1992-6-4-04:30' +'1992-06-04 UTC+04:00' +'1992-6-4 UTC-04:30' + ``` + +## See also + +- [Date and time functions](../function_datetime) +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_frozen.md b/docs/content/v2.25/api/ycql/type_frozen.md new file mode 100644 index 000000000000..16210fef5f8a --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_frozen.md @@ -0,0 +1,68 @@ +--- +title: FROZEN data type [YCQL] +headerTitle: FROZEN data type +linkTitle: FROZEN +description: Use the FROZEN data type to specify columns of binary strings that result from serializing collections, tuples, or user-defined types. +menu: + preview_api: + parent: api-cassandra + weight: 1401 +aliases: + - /preview/api/cassandra/type_frozen + - /preview/api/ycql/type_frozen +type: docs +--- + +## Synopsis + +Use the `FROZEN` data type to specify columns of binary strings that result from serializing collections, tuples, or user-defined types. + +## Syntax + +``` +type_specification ::= FROZEN +``` + +Where + +- `type` is a well-formed YCQL data type (additional restrictions for `type` are covered in the Semantics section below). + +## Semantics + +- Columns of type `FROZEN` can be part of the `PRIMARY KEY`. +- Type parameters of `FROZEN` type must be either [collection types](../type_collection) (`LIST`, `MAP`, or `SET`) or [user-defined types](../ddl_create_type). +- `FROZEN` types can be parameters of collection types. +- For any valid frozen type parameter `type`, values of `type` are convertible into `FROZEN`. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE directory(file FROZEN> PRIMARY KEY, value BLOB); +``` + +```sql +ycqlsh:example> INSERT INTO directory(file, value) VALUES([ 'home', 'documents', 'homework.doc' ], 0x); +``` + +```sql +ycqlsh:example> INSERT INTO directory(file, value) VALUES([ 'home', 'downloads', 'textbook.pdf' ], 0x12ab21ef); +``` + +```sql +ycqlsh:example> UPDATE directory SET value = 0xab00ff WHERE file = [ 'home', 'documents', 'homework.doc' ]; +``` + +```sql +ycqlsh:example> SELECT * FROM directory; +``` + +``` + file | value +---------------------------------------+------------ + ['home', 'downloads', 'textbook.pdf'] | 0x12ab21ef + ['home', 'documents', 'homework.doc'] | 0xab00ff + ``` + +## See also + +- [Data Types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_inet.md b/docs/content/v2.25/api/ycql/type_inet.md new file mode 100644 index 000000000000..4fdcb63af435 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_inet.md @@ -0,0 +1,66 @@ +--- +title: INET data type [YCQL] +headerTitle: INET type +linkTitle: INET +description: Use the INET data type to specify columns for data of IP addresses. +menu: + preview_api: + parent: api-cassandra + weight: 1410 +aliases: + - /preview/api/cassandra/type_inet + - /preview/api/ycql/type_inet +type: docs +--- + +## Synopsis + +Use the `INET` data type to specify columns for data of IP addresses. + +## Syntax + +``` +type_specification ::= INET +``` + +## Semantics + +- Columns of type `INET` can be part of the `PRIMARY KEY`. +- Implicitly, values of type `INET` data types are neither convertible nor comparable to other data types. +- Values of text data types with correct format are convertible to `INET`. +- `INET` value format supports text literals for both IPv4 and IPv6 addresses. + +## Examples + +```sql +example> CREATE TABLE dns_table(site_name TEXT PRIMARY KEY, ip_address INET); +``` + +```sql +example> INSERT INTO dns_table(site_name, ip_address) VALUES ('localhost', '127.0.0.1'); +``` + +```sql +example> INSERT INTO dns_table(site_name, ip_address) VALUES ('example.com', '93.184.216.34'); +``` + +`INET` type supports both ipv4 and ipv6 addresses. + +```sql +example> UPDATE dns_table SET ip_address = '2606:2800:220:1:248:1893:25c8:1946' WHERE site_name = 'example.com'; +``` + +```sql +example> SELECT * FROM dns_table; +``` + +``` + site_name | ip_address +-------------+------------------------------------ + example.com | 2606:2800:220:1:248:1893:25c8:1946 + localhost | 127.0.0.1 +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_int.md b/docs/content/v2.25/api/ycql/type_int.md new file mode 100644 index 000000000000..06412f571e40 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_int.md @@ -0,0 +1,114 @@ +--- +title: Integer and counter data types [YCQL] +headerTitle: Integer and counter data types +linkTitle: Integer and counter +description: There are several different data types for integers of different value ranges. Integers can be set, inserted, incremented, and decremented. +menu: + preview_api: + parent: api-cassandra + weight: 1420 +aliases: + - /preview/api/cassandra/type_int + - /preview/api/ycql/type_int +type: docs +--- + +## Synopsis + +There are several different data types for integers of different value ranges. Integers can be set, inserted, incremented, and decremented while `COUNTER` can only be incremented or decremented. We've extend Apache Cassandra to support increment and decrement operators for integer data types. + +Datatype | Min | Max | +---------|-----|-----| +`TINYINT` | -128 | 127 | +`SMALLINT` | -32,768 | 32,767 | +`INT` or `INTEGER` | -2,147,483,648 | 2,147,483,647 | +`BIGINT` | -9,223,372,036,854,775,808 | 9,223,372,036,854,775,807 | +`COUNTER` | -9,223,372,036,854,775,808 | 9,223,372,036,854,775,807 | +`VARINT` | unbounded | unbounded | + +## Syntax + +The following keywords are used to specify a column of type integer for different constraints including its value ranges. + +``` +type_specification ::= TINYINT | SMALLINT | INT | INTEGER | BIGINT | VARINT | COUNTER + +integer_literal ::= [ + | - ] digit [ { digit | , } ... ] +``` + +## Semantics + +- Columns of type `TINYINT`, `SMALLINT`, `INT`, `INTEGER`, `BIGINT` or `VARINT` can be part of the `PRIMARY KEY`. +- Values of different integer data types are comparable and convertible to one another. +- Values of integer data types are convertible but not comparable to floating point number. +- Values of floating point data types are not convertible to integers. + +### Counter data type + +`COUNTER` is an alias of `BIGINT` but has additional constraints. + +- Columns of type `COUNTER` cannot be part of the`PRIMARY KEY`. +- If a column is of type `COUNTER`, all non-primary-key columns must also be of type `COUNTER`. +- Column of type `COUNTER` cannot be set or inserted. They must be incremented or decremented. +- If a column of type `COUNTER` is NULL, its value is replaced with zero when incrementing or decrementing. + +## Examples + +### Using integer data types + +```sql +example> CREATE TABLE items(id INT PRIMARY KEY, item_count BIGINT); +``` + +```sql +example> INSERT INTO items(id, item_count) VALUES(1, 1); +``` + +```sql +example> INSERT INTO items(id, item_count) VALUES(2, 2); +``` + +```sql +example> UPDATE items SET item_count = 5 WHERE id = 1; +``` + +```sql +example> UPDATE items SET item_count = item_count + 1 WHERE id = 2; +``` + +```sql +example> SELECT * FROM items; +``` + +``` + id | item_count +----+------------ + 2 | 3 + 1 | 5 +``` + +### Using `COUNTER` data type + +```sql +example> CREATE TABLE item_counters(id INT PRIMARY KEY, item_counter COUNTER); +``` + +For counter type, null values are treated as 0. + +```sql +example> UPDATE item_counters SET item_counter = item_counter + 1 WHERE id = 1; +``` + +```sql +example> SELECT * FROM item_counters; +``` + +``` + id | item_counter +----+-------------- + 1 | 1 +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_jsonb.md b/docs/content/v2.25/api/ycql/type_jsonb.md new file mode 100644 index 000000000000..d09730f1f391 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_jsonb.md @@ -0,0 +1,248 @@ +--- +title: JSONB data type [YCQL] +headerTitle: JSONB +linkTitle: JSONB +description: Use the JSONB data type to efficiently model json data. This data type makes it easy to model JSON data which does not have a set schema and might change often. +menu: + preview_api: + parent: api-cassandra + weight: 1470 +aliases: + - /preview/api/ycql/type_jsonb +type: docs +--- + +## Synopsis + +Use the `JSONB` data type to efficiently model JSON data. This data type makes it easy to model +JSON data which does not have a set schema and might change often. This data type is similar to +the [JSONB data type in PostgreSQL](https://www.postgresql.org/docs/15/static/datatype-json.html). +The JSON document is serialized into a format which is easy for search and retrieval. +This is achieved by storing all the JSON keys in sorted order, which allows for efficient binary +search of keys. Similarly, arrays are stored such that random access for a particular array index +into the serialized json document is possible. + +Currently, updates to some attributes of a JSONB column require a full read-modify-write operation. +Note that there are plans to enhance the JSONB data type to support efficient incremental updates in +a future version. + +## Syntax + +```output +type_specification ::= { JSONB } +``` + +## Semantics + +- Columns of type `JSONB` cannot be part of the `PRIMARY KEY`. +- Implicitly, values of type `JSONB` are not convertible to other data types. `JSONB` types can be + compared to `TEXT/VARCHAR` data type as long it represents valid json. +- Values of text data types with correct format are convertible to `JSONB`. +- `JSONB` value format supports text literals which are valid json. + +{{< note title="Note" >}} + +Internally, numbers that appear in a JSONB string (used without quotes. e.g `{'a': 3.14}` ) are stored as floating point values. +Due to the inherent imprecision in storing floating-point numbers, one should avoid comparing them for equality. +Users can either use error bounds while querying for these values in order to perform the correct floating-point comparison, or store them as strings (e.g: `{'a': "3.14"}`). +[#996 issue](https://github.com/yugabyte/yugabyte-db/issues/996) + +{{< /note >}} + +## Operators and functions + +We currently support two operators which can be applied to the `JSONB` data type. The `->` operator +returns a result of type `JSONB` and further json operations can be applied to the result. The `->>` +operator converts `JSONB` to its string representation and returns the same. As a result, you can't +apply further `JSONB` operators to the result of the `->>` operator. These operators can either have +a string (for keys in a json object) or integer (for array indices in a json array) as a parameter. + +In some cases, you would like to process JSON attributes as numerics. For this purpose, you can use +the `CAST` function to convert text retrieved from the `->>` operator to the appropriate numeric +type. + +## Examples + +- Create table with a JSONB column. + + ```sql + ycqlsh> CREATE KEYSPACE store; + ``` + + ```sql + ycqlsh> CREATE TABLE store.books ( id int PRIMARY KEY, details jsonb ); + ``` + +- Insert JSONB documents. + + ```sql + INSERT INTO store.books (id, details) VALUES + (1, '{ "name": "Macbeth", "author": { "first_name": "William", "last_name": "Shakespeare" }, "year": 1623, "editors": ["John", "Elizabeth", "Jeff"] }'); + INSERT INTO store.books (id, details) VALUES + (2, '{ "name": "Hamlet", "author": { "first_name": "William", "last_name": "Shakespeare" }, "year": 1603, "editors": ["Lysa", "Mark", "Robert"] }'); + INSERT INTO store.books (id, details) VALUES + (3, '{ "name": "Oliver Twist", "author": { "first_name": "Charles", "last_name": "Dickens" }, "year": 1838, "genre": "novel", "editors": ["Mark", "Tony", "Britney"] }'); + INSERT INTO store.books (id, details) VALUES + (4, '{ "name": "Great Expectations", "author": { "first_name": "Charles", "last_name": "Dickens" }, "year": 1950, "genre": "novel", "editors": ["Robert", "John", "Melisa"] }'); + INSERT INTO store.books (id, details) VALUES + (5, '{ "name": "A Brief History of Time", "author": { "first_name": "Stephen", "last_name": "Hawking" }, "year": 1988, "genre": "science", "editors": ["Melisa", "Mark", "John"] }'); + ``` + +- Select from JSONB column. + + ```sql + ycqlsh> SELECT * FROM store.books; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------------------- + 5 | {"author":{"first_name":"Stephen","last_name":"Hawking"},"editors":["Melisa","Mark","John"],"genre":"science","name":"A Brief History of Time","year":1988} + 1 | {"author":{"first_name":"William","last_name":"Shakespeare"},"editors":["John","Elizabeth","Jeff"],"name":"Macbeth","year":1623} + 4 | {"author":{"first_name":"Charles","last_name":"Dickens"},"editors":["Robert","John","Melisa"],"genre":"novel","name":"Great Expectations","year":1950} + 2 | {"author":{"first_name":"William","last_name":"Shakespeare"},"editors":["Lysa","Mark","Robert"],"name":"Hamlet","year":1603} + 3 | {"author":{"first_name":"Charles","last_name":"Dickens"},"editors":["Mark","Tony","Britney"],"genre":"novel","name":"Oliver Twist","year":1838} + ``` + +- Select with condition on JSONB object value. + + ```sql + ycqlsh> SELECT * FROM store.books WHERE details->'author'->>'first_name' = 'William' AND details->'author'->>'last_name' = 'Shakespeare'; + ``` + + ```output + id | details + ----+---------------------------------------------------------------------------------------------------------------------------------- + 1 | {"author":{"first_name":"William","last_name":"Shakespeare"},"editors":["John","Elizabeth","Jeff"],"name":"Macbeth","year":1623} + 2 | {"author":{"first_name":"William","last_name":"Shakespeare"},"editors":["Lysa","Mark","Robert"],"name":"Hamlet","year":1603} + ``` + +- Select with condition on JSONB array element. + + ```sql + ycqlsh> SELECT * FROM store.books WHERE details->'editors'->>0 = 'Mark'; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------- + 3 | {"author":{"first_name":"Charles","last_name":"Dickens"},"editors":["Mark","Tony","Britney"],"genre":"novel","name":"Oliver Twist","year":1838} + ``` + +- Select with condition using on JSONB element. + + ```sql + ycqlsh> SELECT * FROM store.books WHERE CAST(details->>'year' AS integer) = 1950; + ``` + + ```output + id | details + ----+-------------------------------------------------------------------------------------------------------------------------------------------------------- + 4 | {"author":{"first_name":"Charles","last_name":"Dickens"},"editors":["Robert","John","Melisa"],"genre":"novel","name":"Great Expectations","year":1950} + ``` + +- Update entire JSONB document. + + ```sql + ycqlsh> UPDATE store.books SET details = '{"author":{"first_name":"Carl","last_name":"Sagan"},"editors":["Ann","Rob","Neil"],"genre":"science","name":"Cosmos","year":1980}' WHERE id = 1; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 1; + ``` + + ```output + id | details + ----+----------------------------------------------------------------------------------------------------------------------------------- + 1 | {"author":{"first_name":"Carl","last_name":"Sagan"},"editors":["Ann","Rob","Neil"],"genre":"science","name":"Cosmos","year":1980} + ``` + +- Update a JSONB object value. + + ```sql + ycqlsh> UPDATE store.books SET details->'author'->>'first_name' = '"Steve"' WHERE id = 4; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 4; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------------ + 4 | {"author":{"first_name":"Steve","last_name":"Dickens"},"editors":["Robert","John","Melisa"],"genre":"novel","name":"Great Expectations","year":1950} + ``` + +- Update a JSONB array element. + + ```sql + ycqlsh> UPDATE store.books SET details->'editors'->>1 = '"Jack"' WHERE id = 4; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 4; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------------ + 4 | {"author":{"first_name":"Steve","last_name":"Dickens"},"editors":["Robert","Jack","Melisa"],"genre":"novel","name":"Great Expectations","year":1950} + ``` + +- Update a JSONB subdocument. + + ```sql + ycqlsh> UPDATE store.books SET details->'author' = '{"first_name":"John", "last_name":"Doe"}' WHERE id = 4; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 4; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------- + 4 | {"author":{"first_name":"John","last_name":"Doe"},"editors":["Robert","Jack","Melisa"],"genre":"novel","name":"Great Expectations","year":1950} + ``` + +- Upsert: Update a missing JSONB document resulting in an insert. + + ```sql + INSERT INTO store.books (id, details) VALUES + (6, '{}'); + ycqlsh> UPDATE store.books SET details->'editors' = '["Adam", "Bryan", "Charles"]' WHERE id = 6; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 6; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------- + 6 | {"editors":["Adam","Bryan","Charles"]} + ``` + +- Upsert: Update a missing JSONB document resulting in an insert of a subdocument. + + ```sql + ycqlsh> UPDATE store.books SET details->'author' = '{"first_name":"Jack", "last_name":"Kerouac"}' WHERE id = 6; + ``` + + ```sql + ycqlsh> SELECT * FROM store.books WHERE id = 6; + ``` + + ```output + id | details + ----+------------------------------------------------------------------------------------------------------------------------------------------------- + 6 | {"author":{"first_name":"Jack","last_name":"Kerouac"},"editors":["Adam","Bryan","Charles"]} + ``` + +Note that JSONB upsert only works for JSON objects and not for other data types like arrays, integers, strings, and so on. Additionally, only the leaf property of an object will be inserted if it is missing. Upsert on non-leaf properties is not supported. + +## See also + +- [Explore JSON documents](../../../explore/ycql-language/jsonb-ycql) +- [Data types](..#data-types) +- [Secondary indexes with JSONB](../../../explore/ycql-language/indexes-constraints/secondary-indexes-with-jsonb-ycql/) diff --git a/docs/content/v2.25/api/ycql/type_number.md b/docs/content/v2.25/api/ycql/type_number.md new file mode 100644 index 000000000000..901544a50556 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_number.md @@ -0,0 +1,81 @@ +--- +title: Non-integer data types (FLOAT, DOUBLE, and DECIMAL) [YCQL] +headerTitle: Non-integer +linkTitle: Non-integer +description: Use the non-integer (floating-point and fixed-point) data types to specify non-integer numbers. +menu: + preview_api: + parent: api-cassandra + weight: 1430 +aliases: + - /preview/api/cassandra/type_number + - /preview/api/ycql/type_number +type: docs +--- + +## Synopsis + +Use the non-integer (floating-point and fixed-point) data types to specify non-integer numbers. Different floating point data types represent different precision numbers. + +Data type | Description | Decimal precision | +---------|-----|-----| +`FLOAT` | Inexact 32-bit floating point number | 7 | +`DOUBLE` | Inexact 64-bit floating point number | 15 | +`DECIMAL` | Arbitrary-precision number | no upper-bound | + +## Syntax + +``` +type_specification ::= { FLOAT | DOUBLE | DOUBLE PRECISION | DECIMAL } + +non_integer_floating_point_literal ::= non_integer_fixed_point_literal | "NaN" | "Infinity" | "-Infinity" + +non_integer_fixed_point_literal ::= [ + | - ] { digit [ digit ...] '.' [ digit ...] | '.' digit [ digit ...] } + +``` + +Where + +- Columns of type `FLOAT`, `DOUBLE`, `DOUBLE PRECISION`, or `DECIMAL` can be part of the `PRIMARY KEY`. +- `DOUBLE` and `DOUBLE PRECISION` are aliases. +- `non_integer_floating_point_literal` is used for values of `FLOAT`, `DOUBLE` and `DOUBLE PRECISION` types. +- `non_integer_fixed_point_literal` is used for values of `DECIMAL` type. + +## Semantics + +- Values of different floating-point and fixed-point data types are comparable and convertible to one another. + - Conversion from floating-point types into `DECIMAL` will raise an error for the special values `NaN`, `Infinity`, and `-Infinity`. +- Values of non-integer numeric data types are neither comparable nor convertible to integer although integers are convertible to them. +- The ordering for special floating-point values is defined as (in ascending order): `-Infinity`, all negative values in order, all positive values in order, `Infinity`, and `NaN`. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE sensor_data (sensor_id INT PRIMARY KEY, float_val FLOAT, dbl_val DOUBLE, dec_val DECIMAL); +``` + +```sql +ycqlsh:example> INSERT INTO sensor_data(sensor_id, float_val, dbl_val, dec_val) + VALUES (1, 321.0456789, 321.0456789, 321.0456789); +``` + +Integers literals can also be used (Using upsert semantics to update a non-existent row). + +```sql +ycqlsh:example> UPDATE sensor_data SET float_val = 1, dbl_val = 1, dec_val = 1 WHERE sensor_id = 2; +``` + +```sql +ycqlsh:example> SELECT * FROM sensor_data; +``` + +``` + sensor_id | float_val | dbl_val | dec_val +-----------+-----------+-----------+------------- + 2 | 1 | 1 | 1 + 1 | 321.04568 | 321.04568 | 321.0456789 +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_text.md b/docs/content/v2.25/api/ycql/type_text.md new file mode 100644 index 000000000000..5e9155785a75 --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_text.md @@ -0,0 +1,70 @@ +--- +title: TEXT data type [YCQL] +headerTitle: TEXT type +linkTitle: TEXT +description: Use the TEXT data type to specify data of a string of Unicode characters. +menu: + preview_api: + parent: api-cassandra + weight: 1440 +aliases: + - /preview/api/cassandra/type_text + - /preview/api/ycql/type_text +type: docs +--- + +## Synopsis + +Use the `TEXT` data type to specify data of a string of Unicode characters. + +## Syntax + +``` +type_specification ::= TEXT | VARCHAR + +text_literal ::= "'" [ letter ...] "'" +``` + +Where + +- `TEXT` and `VARCHAR` are aliases. +- `letter` is any character except for single quote (`[^']`) + +## Semantics + +- Columns of type `TEXT` or `VARCHAR` can be part of the `PRIMARY KEY`. +- Implicitly, value of type `TEXT` data type are neither convertible nor comparable to non-text data types. +- The length of `TEXT` string is virtually unlimited. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE users(user_name TEXT PRIMARY KEY, full_name VARCHAR); +``` + +```sql +ycqlsh:example> INSERT INTO users(user_name, full_name) VALUES ('jane', 'Jane Doe'); +``` + +```sql +ycqlsh:example> INSERT INTO users(user_name, full_name) VALUES ('john', 'John Doe'); +``` + +```sql +ycqlsh:example> UPDATE users set full_name = 'Jane Poe' WHERE user_name = 'jane'; +``` + +```sql +ycqlsh:example> SELECT * FROM users; +``` + +``` + user_name | full_name +-----------+----------- + jane | Jane Poe + john | John Doe +``` + +## See also + +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycql/type_uuid.md b/docs/content/v2.25/api/ycql/type_uuid.md new file mode 100644 index 000000000000..d0afa811ff1c --- /dev/null +++ b/docs/content/v2.25/api/ycql/type_uuid.md @@ -0,0 +1,80 @@ +--- +title: UUID and TIMEUUID data types [YCQL] +headerTitle: UUID and TIMEUUID +linkTitle: UUID and TIMEUUID +summary: UUID types +description: Use the UUID data type to specify columns for data of universally unique ids. TIMEUUID is a universal unique identifier variant that includes time information. +menu: + preview_api: + parent: api-cassandra + weight: 1460 +aliases: + - /preview/api/cassandra/type_uuid + - /preview/api/ycql/type_uuid +type: docs +--- + +## Synopsis + +Use the `UUID` data type to specify columns for data of universally unique IDs. `TIMEUUID` is a universal unique identifier variant that includes time information. + +Data type | Description | +----------|-----| +`UUID` | [UUID (all versions)](https://tools.ietf.org/html/rfc4122) | +`TIMEUUID` | [UUID (version 1)](https://tools.ietf.org/html/rfc4122#section-4.2.2) | + +## Syntax + +``` +type_specification ::= { UUID | TIMEUUID } +uuid_literal ::= 4hex_block 4hex_block '-' 4hex_block '-' 4hex_block '-' 4hex_block '-' 4hex_block 4hex_block 4hex_block +4hex_block ::= hex_digit hex_digit hex_digit hex_digit +``` + +Where + +- `hex_digit` is a hexadecimal digit (`[0-9a-fA-F]`). + +## Semantics + +- Columns of type `UUID` or `TIMEUUID` can be part of the `PRIMARY KEY`. +- Implicitly, values of type `UUID` and `TIMEUUID` data types are neither convertible nor comparable to other data types. +- `TIMEUUID`s are version 1 UUIDs: they include the date and time of their generation and a spatially unique node identifier. +- Comparison of `TIMEUUID` values first compares the time component and then (if time is equal) the node identifier. + +## Examples + +```sql +ycqlsh:example> CREATE TABLE devices(id UUID PRIMARY KEY, ordered_id TIMEUUID); +``` + +```sql +ycqlsh:example> INSERT INTO devices (id, ordered_id) + VALUES (123e4567-e89b-12d3-a456-426655440000, 123e4567-e89b-12d3-a456-426655440000); +``` + +```sql +ycqlsh:example> INSERT INTO devices (id, ordered_id) + VALUES (123e4567-e89b-42d3-a456-426655440000, 123e4567-e89b-12d3-a456-426655440000); +``` + +```sql +ycqlsh:example> UPDATE devices SET ordered_id = 00000000-0000-1000-0000-000000000000 + WHERE id = 123e4567-e89b-42d3-a456-426655440000; +``` + +```sql +ycqlsh:example> SELECT * FROM devices; +``` + +``` +id | ordered_id +--------------------------------------+-------------------------------------- + 123e4567-e89b-12d3-a456-426655440000 | 123e4567-e89b-12d3-a456-426655440000 + 123e4567-e89b-42d3-a456-426655440000 | 00000000-0000-1000-0000-000000000000 +``` + +## See also + +- [`Date and time Functions`](../function_datetime) +- [Data types](..#data-types) diff --git a/docs/content/v2.25/api/ycqlsh.md b/docs/content/v2.25/api/ycqlsh.md new file mode 100644 index 000000000000..ac819e123296 --- /dev/null +++ b/docs/content/v2.25/api/ycqlsh.md @@ -0,0 +1,413 @@ +--- +title: ycqlsh - YCQL shell for YugabyteDB +headerTitle: ycqlsh +linkTitle: ycqlsh +description: Shell for interacting with the YugabyteDB YCQL API. +headcontent: Shell for interacting with the YugabyteDB YCQL API +aliases: + - /develop/tools/cqlsh/ + - /preview/develop/tools/cqlsh/ + - /preview/admin/ycqlsh/ +rightNav: + hideH4: true +type: docs +--- + +## Overview + +The YCQL shell (ycqlsh) is a CLI for interacting with YugabyteDB using [YCQL](../../api/ycql/). + +### Installation + +ycqlsh is installed with YugabyteDB and located in the `bin` directory of the YugabyteDB home directory. + +{{}} +To download and install a standalone version of ycqlsh, refer to [YugabyteDB clients](/preview/releases/yugabyte-clients/). +{{}} + +ycqlsh was previously named cqlsh. Although the cqlsh binary is available in the `bin` directory, it is deprecated and will be removed in a future release. + +### Starting ycqlsh + +```sh +./bin/ycqlsh +``` + +```output +Connected to local cluster at 127.0.0.1:9042. +[ycqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] +Use HELP for help. +ycqlsh> +``` + +### Online help + +Run `ycqlsh --help` to display the online help. + +## Syntax + +```sh +ycqlsh [flags] [host [port]] +``` + +Where + +- `host` is the IP address of the host on which [YB-TServer](../../architecture/yb-tserver) is run. The default is local host at `127.0.0.1`. +- `port` is the TCP port at which YB-TServer listens for YCQL connections. The default is `9042`. + +### Example + +```sh +./bin/ycqlsh --execute "select cluster_name, data_center, rack from system.local" 127.0.0.1 +``` + +```output + cluster_name | data_center | rack +---------------+-------------+------- + local cluster | datacenter1 | rack1 +``` + +### Flags + +| Flag | Short Form | Default | Description | +| :------| :--------- | :------ | :-----------| +| `--color` | `-C` | | Force color output. | +| `--no-color` | | | Disable color output. | +| `--browser` | | | Specify the browser to use for displaying ycqlsh help. This can be one of the [supported browser names](https://docs.python.org/2/library/webbrowser.html) (for example, Firefox) or a browser path followed by `%s` (for example, `/usr/bin/google-chrome-stable %s`). | +| `--ssl` | | | Use SSL when connecting to YugabyteDB. | +| `--user` | `-u` | | Username to authenticate against YugabyteDB with. | +| `--password` | `-p` | | Password to authenticate against YugabyteDB with, should be used in conjunction with `--user`. | +| `--keyspace` | `-k` | | Keyspace to authenticate to, should be used in conjunction with `--user`. | +| `--file` | `-f` | | Execute commands from the given file, then exit. | +| `--debug` | | | Print additional debugging information. | +| `--encoding` | | UTF-8 | Specify a non-default encoding for output. | +| `--cqlshrc` | | | Specify the location for the `cqlshrc` file. The `cqlshrc` file holds configuration options for ycqlsh. By default this is in the user home directory at `~/.cassandra/cqlsh`. | +| `--execute` | `-e` | | Execute the given statement, then exit. | +| `--connect-timeout` | | 2 | Specify the connection timeout in seconds. | +| `--request-timeout` | | 10 | Specify the request timeout in seconds. | +| `--tty` | `-t` | | Force tty mode (command prompt). | +| `--refresh_on_describe` | `-r` | | Force a refresh of the schema metadata on [DESCRIBE](#describe). | + +### Save YCQL output to a file + +To save output from a YCQL statement to a file, run the statement using the --execute flag, and redirect the output to a file. + +For example, to save the output of a `SELECT` statement: + +```sh +./bin/ycqlsh -e "SELECT * FROM mytable" > output.txt +``` + +## Special commands + +In addition to supporting regular YCQL statements, ycqlsh also supports the following special commands. + +### CAPTURE + +Captures command output and appends it to the specified file. Output is not shown at the console while it is captured. + +```sh +CAPTURE '' +CAPTURE OFF +CAPTURE +``` + +- The path to the file to be appended to must be given inside a string literal. The path is interpreted relative to the current working directory. The tilde shorthand notation (`~/mydir`) is supported for referring to `$HOME`. +- Captures query result output only. Errors and output from ycqlsh-only commands are still shown in the ycqlsh session. +- To stop capturing output and show it in the ycqlsh session again, use `CAPTURE OFF`. +- To view the current capture configuration, use `CAPTURE` with no arguments. + +### CLEAR + +Clears the console. + +```cql +CLEAR +CLS +``` + +### CONSISTENCY + +```cql +CONSISTENCY +``` + +Sets the consistency level for the read operations that follow. Valid arguments include: + +| Consistency Level | Description | +| ----------------- | ------------------------------------------------------------ | +| `QUORUM` | Read the strongly consistent results from the tablet's quorum. The read request will be processed by the tablet leader only. This is the default consistency level. | +| `ONE` | Read from a follower with relaxed consistency guarantees. | + +To view the current consistency level, use `CONSISTENCY` with no arguments. + +### COPY FROM + +Copies data from a CSV file to table. + +```cql +COPY
[(, ...)] FROM WITH [AND ...] +``` + +By default, `COPY FROM` copies all columns from the CSV file to the table. To copy a subset of columns, add a comma-separated list of column names enclosed in parentheses after the table name. + +The `file name` should be a string literal (with single quotes) representing a path to the source file. Use the special value `STDIN` (without single quotes) to read the CSV data from stdin. + +| Flags | Default | Description | +| ----------------- | ------- | ------------------------------------------------------------ | +| `INGESTRATE` | 100000 | The maximum number of rows to process per second. | +| `MAXROWS` | -1 | The maximum number of rows to import. -1 means unlimited. | +| `SKIPROWS` | 0 | A number of initial rows to skip. | +| `SKIPCOLS` | | A comma-separated list of column names to ignore. By default, no columns are skipped. | +| `MAXPARSEERRORS` | -1 | The maximum global number of parsing errors to ignore. -1 means unlimited. | +| `MAXINSERTERRORS` | 1000 | The maximum global number of insert errors to ignore. -1 means unlimited. | +| `ERRFILE=` | | A file to store all rows that could not be imported; by default this is `import__
.err` where `` is the keyspace and `
` is the table name. | +| `MAXBATCHSIZE` | 20 | The max number of rows inserted in a single batch. | +| `MINBATCHSIZE` | 2 | The min number of rows inserted in a single batch. | +| `CHUNKSIZE` | 1000 | The number of rows that are passed to child worker processes from the main process at a time. | + +See `COPY TO` for additional flags common to both `COPY TO` and `COPY FROM`. + +### COPY TO + +Copies data from a table to a CSV file. + +```cql +COPY
[(, ...)] TO WITH [AND ...] +``` + +By default, `COPY TO` copies all columns from the table to the CSV file. To copy a subset of columns, add a comma-separated list of column names enclosed in parentheses after the table name. + +The `file name` should be a string literal (with single quotes) representing a path to the destination file. You can also use the special value `STDOUT` (without single quotes) to print the CSV to stdout. + +| Flags | Default | Description | +| ------------------------ | ------- | ------------------------------------------------------------ | +| `MAXREQUESTS` | 6 | The maximum number token ranges to fetch simultaneously. | +| `PAGESIZE` | 1000 | The number of rows to fetch in a single page. | +| `PAGETIMEOUT` | 10 | The timeout in seconds per 1000 entries in the page size or smaller. | +| `BEGINTOKEN`, `ENDTOKEN` | | Token range to export. Defaults to exporting the full ring. | +| `MAXOUTPUTSIZE` | -1 | The maximum size of the output file measured in number of lines; beyond this maximum the output file will be split into segments. -1 means unlimited. | +| `ENCODING` | utf8 | The encoding used for characters. | + +The following flags are common to both `COPY TO` and `COPY FROM`. + +| Flags | Default | Description | +| ----------------- | ------------ | ------------------------------------------------------------ | +| `NULLVAL` | `null` | The string placeholder for null values. | +| `HEADER` | `false` | For `COPY TO`, controls whether the first line in the CSV output file will contain the column names. For `COPY FROM`, specifies whether the first line in the CSV input file contains column names. | +| `DELIMITER` | `,` | The character that is used to separate fields (columns). | +| `DECIMALSEP` | `.` | The character that is used as the decimal point separator. | +| `THOUSANDSSEP` | | The character that is used to separate thousands. Defaults to the empty string. | +| `BOOLSTYlE` | `True,False` | The string literal format for boolean values. | +| `NUMPROCESSES` | | The number of child worker processes to create for `COPY` tasks. Defaults to a max of 4 for `COPY FROM` and 16 for `COPY TO`. However, at most (num_cores - 1) processes will be created. | +| `MAXATTEMPTS` | 5 | The maximum number of failed attempts to fetch a range of data (when using `COPY TO`) or insert a chunk of data (when using `COPY FROM`) before giving up. | +| `REPORTFREQUENCY` | 0.25 | How often status updates are refreshed, in seconds. | +| `RATEFILE` | | An optional file to output rate statistics to. By default, statistics are not output to a file. | + +### DESCRIBE + +Prints a description (typically a series of DDL statements) of a schema element or the cluster. Use DESCRIBE to dump all or portions of the schema. + +```cql +DESCRIBE CLUSTER +DESCRIBE SCHEMA +DESCRIBE KEYSPACES +DESCRIBE KEYSPACE +DESCRIBE TABLES +DESCRIBE TABLE
+DESCRIBE INDEX +DESCRIBE TYPES +DESCRIBE TYPE +``` + +In any of the commands, `DESC` may be used in place of `DESCRIBE`. + +The `DESCRIBE CLUSTER` command prints the cluster name: + +```cql +ycqlsh> DESCRIBE CLUSTER +``` + +```output +Cluster: local cluster +``` + +The `DESCRIBE SCHEMA` command prints the DDL statements needed to recreate the entire schema. Use this command to dump the schema; you can then use the resulting file to clone the cluster or restore from a backup. + +### EXIT + +Ends the current session and terminates the ycqlsh process. + +```cql +EXIT +QUIT +``` + +### EXPAND + +Enables or disables vertical printing of rows. Use EXPAND when fetching many columns, or the contents of a single column are large. + +```cql +EXPAND ON +EXPAND OFF +``` + +To view the current expand setting, use `EXPAND` with no arguments. + +### HELP + +Gives information about ycqlsh commands. To see available topics, enter `HELP` without any arguments. To see help on a topic, use `HELP `. Also see the `--browser` argument for controlling what browser is used to display help. + +```cql +HELP +``` + +### LOGIN + +Authenticate as a specified YugabyteDB user for the current session. + +```cql +LOGIN [] +``` + +### PAGING + +Enables paging, disables paging, or sets the page size for read queries. When paging is enabled, only one page of data is fetched at a time and a prompt appears to fetch the next page. Generally, it's a good idea to leave paging enabled in an interactive session to avoid fetching and printing large amounts of data at once. + +```cql +PAGING ON +PAGING OFF +PAGING +``` + +To view the current paging setting, use `PAGING` with no arguments. + +### SHOW HOST + +Prints the IP address and port of the YB-TServer server that ycqlsh is connected to, and the cluster name. Example: + +```cql +ycqlsh> SHOW HOST +``` + +```output +Connected to local cluster at 127.0.0.1:9042. +``` + +### SHOW VERSION + +Prints the ycqlsh, Cassandra, CQL, and native protocol versions in use. Example: + +```cql +ycqlsh> SHOW VERSION +``` + +```output +[ycqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] +``` + +### SOURCE + +Reads the contents of a file and executes each line as a YCQL statement or special ycqlsh command. + +```sh +SOURCE '' +``` + +Example: + +```cql +ycqlsh> SOURCE '/home/yugabyte/commands.cql' +``` + +### TIMING + +Enables or disables basic request round-trip timing, as measured on a current YCQL shell session. + +```cql +TIMING ON | OFF +``` + +TIMING ON: Enables round-trip timing for all further requests. + +TIMING OFF: Disables timing. + +TIMING (with no arguments): Outputs the current timing status. + +{{< note title = "Feature support" >}} +TIMING for [SELECTs](../../api/ycql/dml_select/) is available starting from YugabyteDB version 2.18.0.0 and later. +TIMING will be available for all DMLs starting from YugabyteDB version 2.19.2.0 and later. +{{< /note >}} + +#### Example + +You can use TIMING and run queries from a YCQL session as follows: + +```cql +ycqlsh> TIMING +``` + +```output +Timing is currently disabled. Use TIMING ON to enable. +``` + +```cql +ycqlsh> TIMING ON +``` + +```output +Now Timing is enabled +``` + +```cql +ycqlsh> use example; +``` + +```output +26.18 milliseconds elapsed +``` + +```cql +ycqlsh:example> CREATE TABLE employees(department_id INT, employee_id INT,name TEXT, PRIMARY KEY(department_id, employee_id)); +``` + +```output +1042.67 milliseconds elapsed +``` + +```cql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); +``` + +```output +16.89 milliseconds elapsed +``` + +```cql +ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); +``` + +```output +11.65 milliseconds elapsed +``` + +```cql +ycqlsh:example> SELECT * FROM employees; +``` + +```output + department_id | employee_id | name +---------------+-------------+------ + 1 | 1 | John + 1 | 2 | Jane +5.76 milliseconds elapsed +(2 rows) +``` + +```cql +ycqlsh:example> TIMING OFF +``` + +```output +Disabled Timing. +``` diff --git a/docs/content/v2.25/api/ysql/_index.md b/docs/content/v2.25/api/ysql/_index.md new file mode 100644 index 000000000000..6642b3c82635 --- /dev/null +++ b/docs/content/v2.25/api/ysql/_index.md @@ -0,0 +1,81 @@ +--- +title: YSQL API reference +headerTitle: YSQL API reference +linkTitle: YSQL +description: Learn about Yugabyte Structured Query Language (YSQL), the distributed SQL API for the PostgreSQL compatible YugabyteDB database. +summary: Reference for the YSQL API +headcontent: PostgreSQL-compatible API +showRightNav: true +type: indexpage +--- + +## Introduction + +Yugabyte Structured Query Language (YSQL) is an ANSI SQL, fully-relational API that is best fit for scale-out RDBMS applications that need ultra resilience, massive write scalability, and geographic data distribution. The YugabyteDB SQL processing layer is built by using the [PostgreSQL](https://www.yugabyte.com/postgresql/) code (version 15) directly. The result of this approach is that [YSQL is fully compatible with PostgreSQL _by construction_](https://www.yugabyte.com/postgresql/postgresql-compatibility/). + +YSQL therefore supports all of the traditional relational modeling features, such as referential integrity (implemented using a foreign key constraint from a child table to a primary key to its parent table), joins, partial indexes, triggers, and stored procedures. It extends the familiar transactional notions into the YugabyteDB Distributed SQL Database architecture. + +If you don't find what you're looking for in the YSQL documentation, you might find answers in the relevant [PostgreSQL documentation](https://www.postgresql.org/docs/15/index.html). Successive YugabyteDB releases honor PostgreSQL syntax and semantics, although some features (for example those that are specific to the PostgreSQL monolithic SQL database architecture) might not be supported for distributed SQL. The YSQL documentation specifies the supported syntax and extensions. + +To find the version of the PostgreSQL processing layer used in YugabyteDB, you can use the `version()` function. The following YSQL query displays only the first part of the returned value: + +```plpgsql +select rpad(version(), 18)||'...' as v; +``` + +```output + v +----------------------- + PostgreSQL 15.2-YB... +``` + +## YSQL components + +The main components of YSQL include: + +- Data definition language (DDL) +- Data manipulation language (DML) +- Data control language (DCL) +- Built-in SQL functions +- PL/pgSQL procedural language for stored procedures + +These components depend on underlying features like the data type system (common for both SQL and PL/pgSQL), expressions, database objects with qualified names, and comments. Other components support purposes such as system control, transaction control, and performance tuning. + +### The SQL language + +The section [The SQL language](./the-sql-language/) describes of all of the YugabyteDB SQL statements. Each statement has its own dedicated page. Each page starts with a formal specification of the syntax: both as a _railroad diagram_; and as a _grammar_ using the PostgreSQL convention. Then it explains the semantics and illustrates the explanation with code examples. + +### Supporting language elements + +This section lists the main elements that support the YugabyteDB SQL language subsystem. + +- [Built-in SQL functions](exprs/). +- [Data types](datatypes/). Most PostgreSQL-compatible data types are supported. +- [Keywords](keywords/). +- Names and Qualifiers. Some names are reserved for the system. List of [reserved names](reserved_names/). + +## Quick Start + +You can explore the basics of the YSQL API using the [Quick Start](/preview/quick-start/macos/). + +It always helps to have access to a sandbox YugabyteDB cluster where you can, when you need to, do whatever you want without considering any risk of doing harm. Here are the kinds of things you'll want to do: + +- Connect as the _postgres_ role and create and drop other _superusers_, and regular roles. +- Create and drop databases +- Create and drop extensions +- Create and drop objects of all other kinds + +With these freedoms, you'll be able to set up any regime that you need to help you illustrate, or test, a hypothesis about how things work. + +Moreover, for some experiments, you'll need operating system access so that you can make changes to various configuration files (like the one that determines the default values for session parameters). + +It also helps to have a vanilla PostgreSQL installation on the same server so that you can confirm for yourself that the SQL systems of each (at least for the functionality that application developers use, and in the overwhelming majority of cases) are syntactically and semantically identical. + +To do all this confidently, you need to be sure that nobody else can use your sandbox so that you know that everything that you observe will be explained by what you deliberately did. Occasionally, you'll even want to destroy a cluster at one version and replace it with a cluster at a different version. + +The simplest way to achieve this ideal sandbox regime is to use your own laptop. The [Quick Start](/preview/quick-start/macos/) shows you how to do this. diff --git a/docs/content/v2.25/api/ysql/cursors.md b/docs/content/v2.25/api/ysql/cursors.md new file mode 100644 index 000000000000..a19bd30a46bf --- /dev/null +++ b/docs/content/v2.25/api/ysql/cursors.md @@ -0,0 +1,530 @@ +--- +title: Cursors [YSQL] +headerTitle: Cursors +linkTitle: Cursors +description: Explains what a cursor is and how you create and use a cursor with either SQL or PL/pgSQL. [YSQL]. +menu: + preview_api: + identifier: cursors + parent: api-ysql + weight: 60 +type: docs +--- + +{{< warning title="YSQL currently supports only fetching rows from a cursor consecutively in the forward direction." >}} +See the section [Beware Issue #6514](#beware-issue-6514) below. +{{< /warning >}} + +This section explains: + +- what a _cursor_ is +- how you can manipulate a _cursor_ explicitly, when your use case calls for this, using either a SQL API or a PL/pgSQL API + +The SQL API is exposed by the _[declare](../the-sql-language/statements/dml_declare/)_, _[move](../the-sql-language/statements/dml_move/)_, _[fetch](../the-sql-language/statements/dml_fetch/)_, and _[close](../the-sql-language/statements/dml_close/)_ statements. Each of these specifies its _cursor_ using an identifier for the _cursor's_ name. + +The functionally equivalent PL/pgSQL API is exposed by the executable statements _[open](../syntax_resources/grammar_diagrams/#plpgsql-open-cursor-stmt)_, _[move](../syntax_resources/grammar_diagrams/#plpgsql-move-in-cursor-stmt)_, _[fetch](../syntax_resources/grammar_diagrams/#plpgsql-fetch-from-cursor-stmt)_, and _[close](../syntax_resources/grammar_diagrams/#plpgsql-close-cursor-stmt)_. Each of these specifies its _cursor_ using an identifier for the name of a value of the dedicated data type _refcursor_. You can declare a _refcursor_, just as you declare other variables, in the PL/pgSQL source code's _[plpgsql_declaration_section](../syntax_resources/grammar_diagrams/#plpgsql-declaration-section)_. Notice that one flavor of the declaration syntax, the so-called _["bound refcursor variable"](../syntax_resources/grammar_diagrams/#plpgsql-bound-refcursor-declaration)_, lets you specify the defining _subquery_ (see below) for the underlying _cursor_ that it denotes. Alternatively, you can specify the data type of a PL/pgSQL subprogram's formal argument as _refcursor_. The value of a variable or argument whose data type is _refcursor_ is _text_ and is simply the name of the underlying _cursor_ that it denotes. + +The section **[Cursor manipulation in PL/pgSQL—the "open", "fetch", and "close" statements](../user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/cursor-manipulation/)** explains, and provides code examples for, this topic. + +## What is a cursor? + +A _cursor_ is an artifact that you create with the _[declare](../the-sql-language/statements/dml_declare/)_ SQL statement—or with the equivalent PL/pgSQL _open_ statement. A _cursor's_ duration is limited to the lifetime of the session that creates it, it is private to that session, and it is identified by a bare name that must conform to the usual rules for SQL names like those of tables, schemas, and so on. In the sense that it's session-private, that its name isn't schema-qualified, that it has at most session duration, and that it has no explicit owner, it resembles a prepared statement. A session's currently extant _cursors_ are listed in the _[pg_cursors](https://www.postgresql.org/docs/15/view-pg-cursors.html)_ catalog view. + +A _cursor_ is defined by a _subquery_ (typically a _select_ statement, but you can use a _values_ statement) and it lets you fetch the rows from the result set that the _subquery_ defines one-at-a-time without (in favorable cases) needing ever to materialize the entire result set in your application's client backend server process—i.e. the process that this query lists: + +```plpgsql +select pid, application_name, backend_type +from pg_stat_activity +where pid = pg_backend_pid(); +``` + +{{< note title="The internal implementation of a cursor." >}} +  + +You don't need to understand this. But it might help you to see how _cursors_ fit into the bigger picture of how the processing of SQL statements like _select_, _values_, _insert_, _update_, and _delete_ is implemented. (These statements can all be used as the argument of the _prepare_ statement—and for this reason, they will be referred to here as _preparable_ statements.) + +At the lowest level, the implementation of all of SQL processing is implemented in PostgreSQL using C. And YSQL uses the same C code. The execution of a preparable statement uses C structure(s) that hold information, in the backend server process, like the SQL statement text, its parsed representation, its execution plan, and so on. Further, when the statement is currently being executed, other information is held, like the values of actual arguments that have been bound to placeholders in the SQL text and the position of the current row in the result set (when the statement produces one). You can manipulate these internal structures, from client side code, using the _[libpq - C Library](https://www.postgresql.org/docs/15/libpq.html)_ API or, at a level of abstraction above that, the _[Embedded SQL in C](https://www.postgresql.org/docs/15/ecpg.html)_ API (a.k.a. the _ECPG_). Moreover, engineers who implement PostgreSQL itself can use the [Server Programming Interface](https://www.postgresql.org/docs/current/spi.html). These APIs have schemes that let the programmer ask for the entire result set from a _subquery_ in a single round trip. And they also have schemes that let you ask for the result set row-by-row, or in batches of a specified size. See, for example, the _libpq_ subsection [Retrieving Query Results Row-By-Row](https://www.postgresql.org/docs/15/libpq-single-row-mode.html). + +In a more abstract, RDBMS-independent, discussion of the SQL processing of the statements that correspond to PostgreSQL's preparable statements, the term "cursor" is used to denote these internal structures. (You'll notice this, for example, with Oracle Database.) + +But in PostgreSQL, and therefore in YSQL, a _cursor_ is a direct exposure into SQL and PL/pgSQL (as _language features_) of just a _subset_ of the internal mechanisms for the processing of preparable statements: the _values_ statement and the _select_ statement when it has no data-modifying side-effects. + +Here's a counter-example where the _select_ statement _does_ modify data. You can begin a _select_ statement using a [_with_ clause](../the-sql-language/with-clause/) that can include a data-modifying statement like _insert_ as long as it has a _returning_ clause. (This ability lets you implement, for example, multi-table insert.) Set the _psql_ variables _db_ and _u_ to, respectively, a convenient sandbox database and a convenient test role that has _connect_ and _create_ on that database. Then create two tables and demonstrate the data-modifying _select_ statement ordinarily: + +```plpgsql +\c :db :u +drop schema if exists s cascade; +create schema s; +set search_path = pg_catalog, pg_temp; + +create table s.t(k int primary key, v text not null); +insert into s.t(k, v) values (1, 'cat'), (2, 'dog'), (3, 'mouse'); +create table s.count(k serial primary key, n int not null); + +with + c(n) as ( + insert into s.count(n) + select count(*) from s.t + returning n) +select v from s.t order by v; +``` + +This is the result set from the final _select_, as expected: + +```output + v +------- + cat + dog + mouse +``` + +Now check that _s.count_ was populated as expected: + +```plpgsql +select n from s.count; +``` + +It does indeed show _3_. + +Now try to declare a cursor using the same _subquery_: + +```plpgsql +declare cur cursor for +with + c(n) as ( + insert into pg_temp.count(n) + select count(*) + from pg_class + where relkind = 'v' + returning n) +select relname from pg_class where relkind = 'v'; +``` + +It fails with the _0A000_ error: _DECLARE CURSOR must not contain data-modifying statements in WITH_. +{{< /note >}} + +Tautologically, then, a _cursor_ is an artifact that (in general, in PostgreSQL) is characterized thus: + +- It is created with the _declare_ SQL statement (or the equivalent _open_ PL/pgSQL statement). +- Its maximum duration is _either_ the session in which it's created _or_ the transaction in which it's created, according to a choice that you make when you create it. +- Its lifetime can be terminated deliberately with the _close_ SQL statement or the same-spelled PL/pgSQL statement. +- It is defined by its name, its _subquery_ and some other boolean attributes. +- Its name and its other attributes are listed in the _pg_cursors_ catalog view. +- It lets you fetch consecutive rows, either one at a time, or in batches whose size you choose, from the result set that its _subquery_ defines. +- It supports the _move_ SQL statement, and the same-spelled PL/pgSQL statement, to let you specify any row, by its position, within the result set as the current row.\* +- It supports the _fetch_ SQL statement, and the same-spelled PL/pgSQL statement, that, in one variant, lets you fetch the row at the current position or a row at any other position relative to the current position (_either_ ahead of it _or_ behind\* it). +- It supports another variant of the _fetch_ statement (but here only in SQL) that lets you fetch a specified number of rows _either_ forward from and including the row immediately after the current position _or_ backward\* from and including the row immediately before the current position. + +**\[\*\]** Notice that YSQL doesn't yet support all of all of these operations. See the section [Beware Issue #6514](#beware-issue-6514) below.. + +The "current position" notion is defined by imagining that the cursor's defining _subquery_ always includes this _select_ list item: + +```plpgsql +row_number() over() as current_position +``` + +Here, _over()_ spans the entire result set. If the overall query has no _order by_ clause, then the hypothetical _over()_ has no such clause either. But if the overall query does have an _order by_ clause, then the hypothetical _over()_ has the same _order by_ clause within its parentheses. + +When you execute the _move_[\*](#beware-issue-6514) statement, the current position is left at the result to which you moved. And when you execute the _fetch_ statement (fetching one or several rows in either the forward or the backward[\*](#beware-issue-6514) direction) the current position is left at the last-fetched result. + +## Simple demonstration + +Create a trivial helper that will delay the delivery of each row from a _subquery_'s result set: + +```plpgsql +\c :db :u +drop schema if exists s cascade; +create schema s; + +create function s.sleep_one_second() + returns boolean + set search_path = pg_catalog, pg_text + language plpgsql +as $body$ +begin + perform pg_sleep(1.0); + return true; +end; +$body$; +``` + +Use it to define a view whose result set is ten rows: + +```plpgsql +create view s.ten_rows(v) as +select a.v from generate_series(1, 10) as a(v) where s.sleep_one_second(); +``` + +Now execute a query in the obvious way at the _ysqlsh_ prompt: + +```plpgsql +select v from s.ten_rows; +``` + +It takes about ten seconds before you see any results—and then you see all ten rows effectively instantaneously. (Use the \\_timing on_ meta-command to time it.) In other words, all ten rows were first materialized in the backend server process's memory before being passed, in a single round trip, to the application. + +Create another helper function to fetch one row from a _cursor_ and to return its value together with the time taken to fetch it: + +```plpgsql +create function s.next_row(cur in refcursor) + returns text + set search_path = pg_catalog, pg_text + language plpgsql +as $body$ +declare + t0 float8 not null := 0; + t1 float8 not null := 0; + t int not null := 0; + v int; +begin + t0 := extract(epoch from clock_timestamp()); + fetch next from cur into v; + t1 := extract(epoch from clock_timestamp()); + + t := (round(t1 - t0)*1000.0)::int; + return rpad(coalesce(v::text, ''), 9)||'-- '||to_char(t, '9999')||' ms'; +end; +$body$; +``` + +Now use a _cursor_ to fetch the ten rows one by one: + +```plpgsql +\t on +start transaction; + declare "My Cursor" no scroll cursor without hold for + select v from s.ten_rows; + + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + select s.next_row('My Cursor'); + +rollback; +\t off +``` + +Now you see the rows delivered one by one, every second. This is the result. (Blank lines were removed manually.) + +```output + 1 -- 1000 ms + 2 -- 1000 ms + 3 -- 1000 ms + 4 -- 1000 ms + 5 -- 1000 ms + 6 -- 1000 ms + 7 -- 1000 ms + 8 -- 1000 ms + 9 -- 1000 ms + 10 -- 1000 ms + -- 0 ms +``` + +When you execute _"select v from ten_rows"_ ordinarily using _ysqlsh_, you have to wait until the entire result set has been materialized in the memory of its backend server process before it's delivered to the client application as a unit. This incurs a memory usage cost as well as a time-delay irritation. But when you declare a _cursor_ for that _select_ statement, you materialize the results one row at a time and deliver each to the client as soon as its available. When you use this approach, no more than a single row needs ever to be concurrently materialized in the backend server process's memory. + +In real applications, you'll use the piecewise result set delivery that a _cursor_ supports only when the result set is vast; and you'll fetch it in batches of a suitable size: small enough that the backend server process's memory isn't over-consumed; but large enough that the round-trip time doesn't dominate the overall cost of fetching a batch. + +## Transactional behavior — holdable and non-holdable cursors + +A cursor can be declared either as so-called _holdable_—or not. See the account of the _with hold_ or _without hold_ choice in the section for the _[declare](../the-sql-language/statements/dml_declare/)_ statement. Try this: + +```plpgsql +\c :db :u +select count(*) from pg_cursors; + +start transaction; + declare "Not Holdable" cursor without hold for select 17; + declare "Is Holdable" cursor with hold for select 42; + select name, is_holdable::text from pg_cursors order by name; +commit; +select name, is_holdable::text from pg_cursors order by name; + +\c :db :u +select count(*) from pg_cursors; +``` + +An invocation of _"select count(*) from pg_cursors"_, immediately after starting a session, will inevitably report that no _cursors_ exist. The first _pg_cursors_ query (within the ongoing transaction) produces this result: + +```output + name | is_holdable +--------------+------------- + Is Holdable | true + Not Holdable | false +``` + +And the _pg_cursors_ query immediately after committing the transaction produces this result: + +```output + name | is_holdable +--------------+------------- + Is Holdable | true +``` + +In other words, a _non-holdable_ _cursor_ will vanish when the transaction within which it was declared ends—even if the transaction is committed. Because a _non-holdable_ _cursor_ cannot exist outside of an ongoing transaction, this stand-alone attempt: + +```plpgsql +declare "Not Holdable" cursor without hold for select 17; +``` + +causes the _25P01_ error: _DECLARE CURSOR can only be used in transaction blocks_. The wording is slightly confusing because this causes no such error: + +```output +declare "Is Holdable" cursor with hold for select 42; +``` + +See the section [The transaction model for top-level SQL statements](../txn-model-for-top-level-sql/). The assumption is that you're running _ysqlsh_ with the default setting of _'on'_ for the _psql_ variable _AUTOCOMMIT_. + +Notice that the transactional behavior of a _cursor_ differs critically from that of a prepared statement: + +```plpgsql +\c :db :u +select count(*) from pg_prepared_statements; + +start transaction; + prepare stmt as select 42; +rollback; +select name from pg_prepared_statements; +``` + +Like is the case for _cursors_, an invocation of _"select count(*) from pg_prepared_statements"_, immediately after starting a session, will inevitably report that no prepared statements exist. But even when a statement is prepared within a transaction that is rolled back, it continues to exist after that until either the session ends or it is _deallocated_. (If you create a _holdable cursor_, within an on going transaction and then roll back the transaction, then it vanishes.) + +{{< tip title="Open a holdable cursor in its own transaction and close it as soon as you have finished using it." >}} +When, as is the normal practice, you don't subvert the behavior that automatically commits a SQL statement that is not executed within an explicitly started transaction, you'll probably _declare_, _move in_[\*](#beware-issue-6514) and _fetch from_ a _holdable cursor_ "ordinarily"—i.e. without explicitly starting, and ending, transactions. + +A _holdable cursor_ consumes resources because it always caches its defining _subquery_'s entire result set. Therefore (and especially in a connection-pooling scheme), you should close a _holdable cursor_ as soon as you have finished using it. +{{< /tip >}} + +{{< note title="A holdable cursor is most useful when you intend to move or to fetch in the backward direction — but YSQL does not yet support this." >}} +See the section [Beware Issue #6514](#beware-issue-6514) below. +{{< /note >}} + +## Scrollable cursors\* + +When you choose, at _cursor_ creation time, either the _scroll_ or the _no scroll_ options, the result of your choice is shown in the _is_scrollable_ column in the _pg_cursors_ view. Try this: + +```plpgsql +start transaction; + declare "Not Scrollable" no scroll cursor without hold for + select g.v from generate_series(1, 5) as g(v); + declare "Is Scrollable" scroll cursor without hold for + select g.v from generate_series(1, 5) as g(v); + + select name, is_scrollable::text from pg_cursors order by name; +rollback; +``` + +This is the result: + +```output + name | is_scrollable +----------------+--------------- + Is Scrollable | true + Not Scrollable | false +``` + +The term of art _scrollable_ reflects a rather unusual meaning of scrollability. In, for example, discussions about GUIs, scrolling means moving forwards or backwards within a window or, say, a list. However: + +- When _pg_cursors.is_scrollable_ is _false_, this means that you can change the current position in the _cursor_'s result set (using either _move_[\*](#beware-issue-6514) or as a consequence of _fetch_) only in the _forward_ direction. +- When _pg_cursors.is_scrollable_ is _true_, this means that you can change the current position in the _cursor_'s result set both in the _forward_ direction and in the _backward_ direction. + +In other words: + +- When you create a cursor and specify _no scroll_, you're saying that you will allow changing the current position in the result set in only the _forward_ direction. +- When you create a cursor and specify _scroll_, you're saying that you will allow changing the current position in the result set in both the _forward_ direction and the _backward_ direction. + +Notice that your choice with the _move_[\*](#beware-issue-6514) statement, to change the current position by just a single row or by many rows is an orthogonal choice to the direction in which you move. Similarly, your choice with the _fetch_ statement, to fetch just a single row or many rows is an orthogonal choice to the direction[\*](#beware-issue-6514) in which you fetch. + +- There is no way to create a _cursor_ so that changing the current position in the result set by more than one row, except by consecutive fetches, is prevented. + + +{{< tip title="Always specify either 'no scroll' or 'scroll' explicitly." >}} +If you specify neither _no scroll_ nor _scroll_ when you create a _cursor_, then you don't get an error. However, the outcome is that sometimes _backwards_ movement in the result set is allowed, and sometimes it causes the _55000_ error: _cursor can only scan forward_.[\*](#beware-issue-6514) + +Yugabyte recommends that you always specify your scrollability choice explicitly to honor the requirements that you must meet. Notice that while [Issue #6514](https://github.com/yugabyte/yugabyte-db/issues/6514) remains open, your only viable choice is _no scroll_. +{{< /tip >}} + +### "no scroll" cursor demonstration + +Do this: + +```plpgsql +start transaction; + declare cur no scroll cursor without hold for + select g.v from generate_series(1, 10) as g(v); + fetch next from cur; + fetch forward 2 from cur; + fetch forward 3 from cur; +rollback; +``` + +This runs without error and produces these results, as expected: + +```output + 1 + + 2 + 3 + + 4 + 5 + 6 +``` + +## Caching a cursor's result set + +- The result set for a _with hold_ _cursor_ is always cached when the transaction that creates it commits. + +- The result set for a _without hold_ _cursor_ might, or might not, be cached. + + - If the execution plan for the _cursor_'s defining _subquery_ can be executed in either the forward or the backward direction, then the result set will not be cached. + + - But if the plan can be executed only in the forward direction, then the result set must be cached if you specify _scroll_ when you create the cursor. + +PostgreSQL, and therefore YSQL, do not expose metadata to report whether or not a _cursor_'s result set is cached. Nor does the documentation for either RDBMS attempt to specify the rules that determine whether caching will be done. However, it's possible to reason, about certain specific _select_ statements, that their plans cannot be executed backward. For example the plan for a query that includes _row_number()_ in the _select_ list cannot be run backward because the semantics of _row_number()_ is to assign an incrementing rank to each new row in the result set as it is produced when the plan is executed in the forward direction—and the planner cannot predict how many rows will be produced to allow _row_number()_ to be calculated by decrementing from this for each successive row when the plan is run backward. If the _select_ statement has no _order by_, then the rows are produced in _physical order_ (i.e. in an order that's determined by how the table data is stored). + + +{{< note title="The physical order cannot be predicted." >}} +The physical order cannot be predicted; and it might even change between repeat executions of the same _select_ statement. However, if you use a cluster on a developer laptop and ensure that the backend process that supports the session that you use to do the tests is the _only_ process whose type is _client backend_, then it's very likely indeed that successive repeats of the same _select_ statement will produce the same physical order—at least over the timescale of typical _ad hoc_ experiments. +{{< /note >}} + +You can, however, support pedagogy by including a user-defined function in the _where_ clause that always returns _true_ and that uses _raise info_ to report when it's invoked. + +First, do this set-up. All the caching tests will use it: + +```plpgsql +drop schema if exists s cascade; +create schema s; + +create table s.t(k serial primary key, v int not null); +insert into s.t(v) select generate_series(1, 5); +create view s.v(pos, v) as select row_number() over(), v from s.t; + +create function s.f(i in int) + returns boolean + set search_path = pg_catalog, pg_temp + volatile + language plpgsql +as $body$ +begin + raise info 'f() invoked'; + return i = i; +end; +$body$; +``` + +### "with hold", "no scroll" cursor + +Do this: + +```plpgsql +start transaction; + declare cur no scroll cursor with hold for + select pos, v from s.v where s.f(v); +``` + +It completes silently without error. Now do this: + +```plpgsql +commit; +``` + +This is when you see the _raise info_ output—five times in total, i.e. once for each row that's tested: + +```output +INFO: f() invoked +INFO: f() invoked +INFO: f() invoked +INFO: f() invoked +INFO: f() invoked +``` + +This demonstrates that the result set has been cached. Now fetch all the rows and close the cursor; + +```plpgsql +fetch all from cur; +close cur; +``` + +The _fetch all_ statement brings this SQL output: + +```output + 1 | 5 + 2 | 1 + 3 | 4 + 4 | 2 + 5 | 3 +``` + +### "without hold", "no scroll" cursor + +Do this: + +```plpgsql +start transaction; + declare cur no scroll cursor without hold for + select pos, v from s.v where s.f(v); + + fetch next from cur; + fetch next from cur; + fetch next from cur; + fetch next from cur; + fetch next from cur; +rollback; +``` + +It's easier to distinguish the _raise info_ output and the SQL output from the statements that bring these if you save this code to, say, _t.sql_, and then execute it at the _ysqlsh_ prompt. This is what you see: + +```output +INFO: f() invoked + 1 | 5 + +INFO: f() invoked + 2 | 1 + +INFO: f() invoked + 3 | 4 + +INFO: f() invoked + 4 | 2 + +INFO: f() invoked + 5 | 3 +``` + +Notice that the same _v_ values are paired with the same _pos_ values as with the _"with hold", "no scroll" cursor_ test. But here, nothing suggests that results are cached—and they don't need to be because the _cursor_ doesn't allow moving backwards in the result set. (However, you can't design a test to demonstrate that results are not cached.) + +## \* Beware Issue #6514 + +{{< warning title="YSQL currently supports only fetching rows from a cursor consecutively in the forward direction." >}} +  + +[Issue 6514](https://github.com/yugabyte/yugabyte-db/issues/6514) tracks the problem that the SQL statements _fetch_ and _move_, together with their PL/pgSQL counterparts, are not yet fully functional. This is reflected by errors that occur under these circumstances: + +- Every _move_ flavor causes the _0A000_ error with messages like _"MOVE not supported yet"_. + +- Many _fetch_ flavors draw the _0A000_ error with messages like _"FETCH FIRST not supported yet"_, _"FETCH LAST not supported yet"_, _"FETCH BACKWARD not supported yet"_, and the like. + +These are the _only_ _fetch_ flavors that do not cause an error: + +- _fetch next_ +- bare _fetch_ +- _fetch :N_ +- bare _fetch forward_ +- _fetch forward :N_ +- _fetch all_ +- and _fetch forward all_ + +_:N_ must be a positive integer. + +Until [Issue 6514](https://github.com/yugabyte/yugabyte-db/issues/6514) is fixed, there is no point in declaring a _cursor_ using _scroll_ (i.e. so that _pg_cursors.is_scrollable_ is _true_). And doing this can harm performance and memory consumption because it can cause a _cursor_'s result set to be cached when the execution plan cannot be run backward. + +While [Issue 6514](https://github.com/yugabyte/yugabyte-db/issues/6514) is open, **you should therefore always declare a _cursor_ with _no scroll_.** +{{< /warning >}} diff --git a/docs/content/v2.25/api/ysql/datatypes/_index.md b/docs/content/v2.25/api/ysql/datatypes/_index.md new file mode 100644 index 000000000000..5d7a132d9cc6 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/_index.md @@ -0,0 +1,72 @@ +--- +title: Data types [YSQL] +headerTitle: Data types +linkTitle: Data types +description: Data types +summary: YSQL data type overview and specification. +image: /images/section_icons/api/subsection.png +menu: + preview_api: + identifier: api-ysql-datatypes + parent: ysql-language-elements + weight: 80 +type: indexpage +--- + +The following table lists the primitive and compound data types in YSQL. + +| Data type | Alias | Description | +|-----------|-------|-------------| +| [array](type_array/) | | One-dimensional or multidimensional rectilinear array of any data type payload | +| [bigint](type_numeric) | [int8](type_numeric) | Signed eight-byte integer | +| [bigserial](type_serial) | [serial8](type_serial) | Autoincrementing eight-byte integer | +| `bit [ (n) ]` 1 | | Fixed-length bit string | +| `bit varying [ (n) ]` 1 | `varbit [ (n) ]` | Variable-length bit string | +| [boolean](type_bool) | [bool](type_bool) | Logical boolean (true/false) | +| `box` 1 | | Rectangular box | +| [bytea](type_binary) | | Binary data | +| [character [ (n) ]](type_character) | [char [ (n) ]](type_character) | Fixed-length character string | +| [character varying [ (n) ]](type_character) | [varchar [ (n) ]](type_character) | Variable-length character string | +| `cidr` 1 | | IPv4 or IPv6 network address | +| `circle` 1 | | Circle on a plane | +| [date](type_datetime/) | | Calendar date (year, month, day) | +| [double precision](type_numeric) | [float8](type_numeric) | Double precision floating-point number (8 bytes) | +| `inet` 1 | | IPv4 or IPv6 host address | +| [integer](type_numeric) | [int, int4](type_numeric) | Signed four-byte integer | +| [interval [ fields ] [ (p) ]](type_datetime/) | | Time span | +| [json](type_json/) 1 | | Textual JSON data | +| [jsonb](type_json/) 1 | | JSON data, stored as decomposed binary | +| `line` 1 | | Infinite line on a plane | +| `lseg` 1 | | Line segment on a plane | +| `macaddr` 1 | | Media Access Control (MAC) address | +| `macaddr8` 1 | | Media Access Control (MAC) address (EUI-64 format) | +| [money](type_money) | | Currency amount | +| [numeric [ (p, s) ]](type_numeric) | [decimal [ (p, s) ]](type_numeric) | Exact fixed-point numeric | +| `path` 1 | | Geometric path on a plane | +| `pg_lsn` 1 | | Log Sequence Number | +| `point` 1 | | Geometric point | +| `polygon` 1 | | Closed geometric path | +| [real](type_numeric) | [float4](type_numeric) | Floating-point number (4 bytes) | +| [smallint](type_numeric) | [int2](type_numeric) | Signed two-byte integer | +| [int4range](type_range#synopsis) | | `integer` range | +| [int8range](type_range#synopsis) | | `bigint` range | +| [numrange](type_range#synopsis) | | `numeric` range | +| [tsrange](type_range#synopsis) | | `timestamp without time zone` range | +| [tstzrange](type_range#synopsis) | | `timestamp with time zone` range | +| [daterange](type_range#synopsis) | | `date` range | +| [smallserial](type_serial) | [serial2](type_serial) | Autoincrementing two-byte integer | +| [serial](type_serial) | [serial4](type_serial) | Autoincrementing four-byte integer | +| [text](type_character) | | Variable-length character string | +| [time [ (p) ] [ without time zone ]](type_datetime/) | | Time of day (no time zone) | +| [time [ (p) ] with time zone](type_datetime/) | [timetz](type_datetime/) | Time of day, including time zone | +| [timestamp [ (p) ] [ without time zone ]](type_datetime/) | | Date and time (no time zone) | +| [timestamp [ (p) ] with time zone](type_datetime/) | [timestamptz](type_datetime/) | Date and time, including time zone | +| `tsquery` 1 | | Text search query | +| `tsvector` 1 | | Text search document | +| `txid_snapshot` 1 | | Transaction ID snapshot | +| [uuid](type_uuid) | | Universally unique identifier | +| `xml` 2 | | XML data | + +1 Table columns of this type cannot be part of an `INDEX` `KEY`. + +2 Under development. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/_index.md b/docs/content/v2.25/api/ysql/datatypes/type_array/_index.md new file mode 100644 index 000000000000..778b5e66793a --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/_index.md @@ -0,0 +1,373 @@ +--- +title: YSQL array +linkTitle: Array +headerTitle: Array data types and functionality +description: YSQL lets you construct an array data type, of any dimensionality, of any built-in or user-defined data type. You can use this constructed data type for a table column and for a variable or formal parameter in a PL/pgSQL procedure. +image: /images/section_icons/api/subsection.png +menu: + preview_api: + identifier: api-ysql-datatypes-array + parent: api-ysql-datatypes +aliases: + - /preview/api/ysql/datatypes/type_array +type: indexpage +showRightNav: true +--- + +## Synopsis + +A multidimensional array lets you store a large composite value in a single field (row-column intersection) in a table; and it lets you assign such a value to a PL/pgSQL variable, or pass it via a procedure's, or a function's, formal parameter. + +You can see from the declarations below that every value in an array is non-negotiably of the same data type—either a primitive data type like `text` or `numeric`, or a user-defined scalar or composite data type (like a _"row"_ type). + +An array is, by definition, a rectilinear N-dimensional set of "cells". You can picture a one-dimensional array as a line of cells, a two-dimensional array as a rectangle of cells, and a three-dimensional array as a cuboid of cells. The terms "line", "rectangle", and "cuboid" are the only specific ones. The generic term "N-dimensional array" includes these and all others. The meaning of "rectilinear" is sometimes captured by saying that the shape has no ragged edges or surfaces. If you try to create an array value that is not rectilinear, then you get an error whose detail says _"Multidimensional arrays must have sub-arrays with matching dimensions"_. The number of dimensions that an array has is called its _dimensionality_. + +{{< note title="Ragged arrays" >}} +Sometimes, a ragged structure is useful. Here's an example: +- a one-dimensional array of "payload" one-dimensional arrays, each of which might have a different length + +This structure is crucially different from a rectilinear two-dimensional array. A `DOMAIN` lets you create such a structure by providing the means to give the payload array data type a name. [Using an array of `DOMAIN` values](./array-of-domains/) shows how to do this. +{{< /note >}} + +A value within an array is specified by a tuple of _index_ values, like this (for a four-dimensional array): +``` +arr[13][7][5][17] +``` +The index is the cell number along the dimension in question. The index values along each dimension are consecutive—in other words, you cannot delete a cell within a array. This reflects the fact that an array is rectilinear. However, a value in a cell can, of course, be `NULL`. + +The leftmost value (`13` in the example) is the index along the first dimension; the rightmost value (`17` in this example) is the index along the Nth dimension—that is, the fourth dimension in this example. The value of the index of the first cell along a particular dimension is known as the _lower bound_ for that dimension. If you take no special steps when you create an array value, then the lower bound of each dimension is `1`. But, if you find it useful, you can specify any positive or negative integer, or zero, as the lower bound of the specified dimension. The lower bounds of an array are fixed at creation time, and so is its dimensionality. + +Correspondingly, each dimension has an upper bound. This, too, is fixed at array creation time. The index values along each dimension are consecutive. The fact that each dimension has a single value for its upper and lower bound reflects the fact that an array is rectilinear. + +If you read a within-array value with a tuple of index values that put it outside of the array bounds, then you silently get `NULL`. But if you attempt to set such an out-of-bounds value, then, because this is an implicit attempt to change the array's bounds, you get the _"array subscript out of range"_ error. + +Notice that you can create an array, using a single assignment, as a so-called "slice" of an array, by specifying desired lower and upper index values along each axis of the source array. The new array cannot have a different dimensionality than its source. You should specify the lower and upper index values for the slice, along each dimension of the source array, to lie within (or, maximally, coincide with) the bounds of that dimension. If you specify the slice with a lower bound less than the corresponding lower bound of the source array, then the new lower bound is silently interpreted as the extant corresponding source lower bound. The same is true for the upper bounds. The syntax of this method means that the lower bounds of the new array inevitably all start at `1`. Here is an example (in PL/pgSQL syntax) using a two-dimensional source array: + +``` +new_arr := source_arr[3:4][7:9]; +``` +**Note:** A one-dimensional array is a special case because, uniquely among N-dimensional shapes, it is tautologically rectilinear. You can increase the length of such an array implicitly, by setting a value in a cell that has a lower index value than the present lower bound or a higher index value than the present upper bound. Once you've done this, there is no way to reduce the length because there is no explicit operation for this and no "unset" operation for a specified cell. You can, however, create a slice so that the new array has the source array's original size. + +The following properties determine the shape of an array. Each can be observed using the listed dedicated function. The first formal parameter (with data type `anyarray`) is the array of interest . When appropriate, there's a second formal parameter (with data type `int`) that specifies the dimension of interest. The return is an `int` value, except in one case where it's a `text` value, as detailed below. + +- [`array_ndims()`](functions-operators/properties/#array-ndims) returns the dimensionality of the specified array. + +- [`array_lower()`](functions-operators/properties/#array-lower) returns the lower bound of the specified array on the specified dimension. + +- [`array_upper()`](functions-operators/properties/#array-upper) returns the upper bound of the specified array on the specified dimension. + +- [`array_length()`](functions-operators/properties/#array-length) returns the length of the specified array on the specified dimension. The length, the upper bound, and the lower bound, for a particular dimension, are mutually related, thus: +``` + "length" = "upper bound" - "lower bound" + 1 +``` + +- [`cardinality()`](functions-operators/properties/#cardinality) returns the total number of cells (and therefore values) in the specified array. The cardinality and length along each dimension are mutually related, thus: +``` + "cardinality" = "length 1" * "length 2" * ... * "length N" +``` + +- [`array_dims()`](functions-operators/properties/#array-dims) returns a text representation of the same information as `array_lower()` and `array_length()` return, for all dimension in a single `text` value, showing the upper and lower bounds like this: `[3:4][7:9][2:5]` for a three-dimensional array. Use this for human consumption. Use `array_lower()` and `array_length()` for programmatic consumption. + +Arrays are special because (unlike is the case for, for example, numeric data types like `decimal` and `int`, or character data types like `text` and `varchar`) there are no ready-made array data types. Rather, you construct the array data type that you need using an array _type constructor_. Here's an example: + +```plpgsql +create table t1(k int primary key, arr text array[4]); +``` +This syntax conforms to the SQL Standard. Notice that `array` is a reserved word. (You cannot, for example, create a table with that name.) It appears to let you specify just a one-dimensional array and to specify how many values it holds. But both of these apparent declarations of intent are ignored and act, therefore, only as potentially misleading documentation. + +The following illustrates the PostgreSQL extension to the Standard that YSQL, therefore, inherits.: + +```plpgsql +create table t2( + k int primary key, + one_dimensional_array int[], + two_dimensional_array int[10][10]); +``` +Notice that it appears, optionally, to let you specify how many values each dimension holds. (The Standard syntax allows the specification of the length of just one dimension.) However, these apparent declarations of intent, too, are silently ignored. Moreover, even the _dimensionality_ is ignored. The value, in a particular row, in a table column with an array data type (or its cousin, a variable in a PL/pgSQL program) can hold an array value of _any_ dimensionality. This is demonstrated by example in [Multidimensional array of `int` values](./literals/array-of-primitive-values/#multidimensional-array-of-int-values). This means that declaring an array using the reserved word `array`, which apparently lets you define only a one-dimensional array, and declaring an array using `[]`, which apparently lets you define array of any dimensionality, where one, some, or all of the dimensions are nominally constrained, are entirely equivalent. + +The possibility that different rows in the same table column can hold array values of different dimensionality is explained by picturing the implementation. Array values are held, in an opaque internal representation, as a linear "ribbon" of suitably delimited values of the array's data type. The array's actual dimensionality, and the upper and lower bound of the index along each dimension, is suitably represented in a header. This information is used, in a trivial arithmetic formula, to translate an address specification like `arr[13][7][5][17]` into the position of the value, as a single integer, along the ribbon of values. Understanding this explains why, except for the special case of a one-dimensional array, the dimensionality and the bounds of an array value are fixed at creation time. It also explains why a few of the array functions are supported only for one-dimensional arrays. + +Yugabyte recommends that, for uniformity, you choose to declare arrays only with this syntax: + +``` +create table t2( + k int primary key, + one_dimensional_array int[], + two_dimensional_array int[]); +``` + +The `array_ndims()` function lets you define a table constraint to insist that the array dimensionality is fixed for every row in a table column with such a data type. The `array_length()` function lets you insist that each dimension of a multidimensional array has a specified length for every row, or that its length doesn't exceed a specified limit for any row. + +## Atomically null vs having all values null + +Here is a minimal example: +```plpgsql +create table t(k int primary key, v int[]); +insert into t(k) values(1); +insert into t(k, v) values (2, '{null}'::int[]); +\pset null '' +select k, v, array_dims(v) as dims from t order by k; +``` +It shows this: + +``` + k | v | dims +---+-----------+----------- + 1 | | + 2 | {NULL} | [1:1] +``` + +Because _"v"_ has no constraint, it can be `NULL`, just like when its data type is scalar. This is the case for the row with _"k = 1"_. Here, _"v"_ is said to be _atomically null_. (This term is usually used only when the data type is composite to distinguish the outcome from what is seen for the row with _"k = 2"_ where _"v"_ is not atomically null. The array properties of the first row's _"v"_, like its dimensionality, are all `NULL`. But for the second row, they have meaningful, `not null`, values. Now try this: +```plpgsql +update t set v = v||'{null}'::int[] where k = 2; +select k, v, array_dims(v) as dims from t where k = 2; +``` +The `||` operator is explained in [Array concatenation functions and operators](./functions-operators/concatenation/#the-160-160-160-160-operator). The query shows this: + +``` + k | v | dims +---+-------------+------- + 2 | {NULL,NULL} | [1:2] +``` +Here, _"v"_ for the second row, while not atomically null, has all of its values `NULL`. Its dimensionality cannot be changed, but because it is a one dimensional array, its length can be extended, as was explained above. This is allowed: +```plpgsql +update t set v[0] = 17 where k = 2; +select k, v, array_dims(v) as dims from t where k = 2; +``` +It shows this: +``` + k | v | dims +---+---------------------------+------- + 2 | [0:3]={17,NULL,NULL,NULL} | [0:3] +``` + This, too, is allowed: +```plpgsql +update t set v[1] = 42 where k = 1; +select k, v, array_dims(v) as dims from t where k = 1; +``` +It shows this: +``` + k | v | dims +---+------+------- + 1 | {42} | [1:1] +``` + +The dimensionality of _"v"_ for this first row has now been irrevocably established. + + +## Type construction + +Arrays are not the only example of type construction. So, also, are _"row"_ types and `DOMAIN`s: + +```plpgsql +create type rec_t as(f1 int, f2 text); + +create domain medal_t as text +check( + length(value) <= 6 and + value in ('gold', 'silver', 'bronze') +); + +create table t3(k int primary key, rec rec_t, medal medal_t); +``` + +Notice that you must define a _"row"_ type or a `DOMAIN` as a schema object. But you define the data type of an array "in place" when you create a table or write PL/pgSQL code, as was illustrated above. To put this another way, you _cannot_ name a constructed array type. Rather, you can use it only "on the fly" to define the data type of a column, a PL/pgSQL variable, or a PL/pgSQL formal parameter. The consequence of this is that while you _can_ define, for example, the data type of a named field in a _"row"_ type as an array of a specified data type, you _cannot_ define an array of a specified array data type. (If you try to write such a declaration, you'll see, as you type it, that you have no way to express what you're trying to say.) + +## Informal sketch of array functionality + +This sections within this "Array data types and functionality" major section carefully describe what is sketched here. + +_First_, create a table with an `int[]` column and populate it with a two-dimensional array by using an array literal. +```plpgsql +create table t( + k int primary key, v int[]); + +insert into t(k, v) values(1, + '{ + {11, 12, 13}, + {21, 22, 23} + } + '::int[]); +``` +_Next_, look at a direct `::text` typecast of the value that was inserted: + +```plpgsql +select v::text from t where k = 1; +``` +It shows this: +``` + v +------------------------- + {{11,12,13},{21,22,23}} +``` +Notice that, apart from the fact that it has no whitespace, this representation is identical to the literal that defined the inserted array. It can therefore be used in this way. + +_Next_ check that the inserted array value has the expected properties: +```plpgsql +select + array_ndims(v), + array_length(v, 1), + array_length(v, 2), + array_dims(v) +from t where k = 1; +``` +It shows this: +``` + array_ndims | array_length | array_length | array_dims +-------------+--------------+--------------+------------ + 2 | 2 | 3 | [1:2][1:3] +``` + +The `array_ndims()` function reports the dimensionality of the array; `array_length()` reports the length of the specified dimension (that is, the number of values that this dimension has); and `array_dims()` presents the same information, as a single `text` value, as using `array_length()` in turn for each dimension does. Notice that `array_length()` returns a _single_ `int` value for the specified dimension. Its design rests upon a rule, exemplified by saying that a two-dimensional array must be a rectangle (it cannot have a ragged edge). In the same way, a three-dimensional array must be a cuboid (it cannot have an uneven surface). This notion, though its harder to visualise, continues to apply as the number of dimensions increases. + +Here's an example that violates the rule: +```plpgsql +insert into t(k, v) values(2, + '{ + {11, 12, 13}, + {21, 22, 23, 24} + } + '::int[]); +``` + +The formatting emphasizes that its edge is ragged. It causes a _"22P02: malformed array literal"_ error whose detail says _"Multidimensional arrays must have sub-arrays with matching dimensions"_. + +Finally, in this sketch, this `DO` block shows how you can visualise the values in a two-dimensional array as a rectangular grid. + +```plpgsql +do $body$ +declare + arr constant int[] not null:= '{ + {11, 12, 13, 14}, + {21, 22, 23, 24}, + {31, 32, 33, 34} + }'::int[]; + + ndims constant int not null := array_ndims(arr); + line text; +begin + if array_ndims(arr) <> 2 then + raise exception 'This code handles only a two-dimensional array.'; + end if; + + declare + len1 constant int not null := array_length(arr, 1); + len2 constant int not null := array_length(arr, 2); + begin + for row in 1..len1 loop + line := ' '; + for col in 1..len2 loop + line := line||lpad(arr[row][col]::text, 5); + end loop; + raise info '%', line; + end loop; + end; +end; +$body$; +``` +It produces this result (after manually stripping the _"INFO:"_ prompts): +``` + 11 12 13 14 + 21 22 23 24 + 31 32 33 34 +``` +This approach isn't practical for an array with higher dimensionality or for a two-dimensional array whose second dimension is large. Rather, this code is included here to show how you can address individual elements. The names of the implicitly declared `FOR` loop variables _"row"_ and _"col"_ correspond intuitively to how the values are laid out in the literal that defines the array value. The nested loops are designed to visit the values in so-called row-major order (the last subscript varies most rapidly). + +The term _"row-major order"_ is explained in [Joint semantics](./functions-operators/properties/#joint-semantics) within the section _"Functions for reporting the geometric properties of an array"_. + +When, for example, the values of same-dimensioned multidimensional arrays are compared, they are visited in this order and compared pairwise in just the same way that scalar values are compared. + +**Note:** The term "_row-major order"_ is explained in [Joint semantics](./functions-operators/properties/#joint-semantics)) within the _"Functions for reporting the geometric properties of an array"_ section. it contains a an example PL/pgSQL procedure that shows how to traverse an arbitrary two-dimensional array's values, where the lower bounds and lengths along each dimension are unknown beforehand, in this order. + +Notice that, in the example above, the first value in each dimension has index value 1. This is the case when an array value is created using a literal and you say nothing about the index values. The next example shows how you can control where the index values for each dimension start and end. +```plpgsql +\pset null '' +with v as ( + select '[2:4][5:8]= + { + {25, 26, 27, 28}, + {35, 36, 37, 38}, + {45, 46, 47, 48} + }'::int[] as arr) +select + arr[0][0] as "[0][0]", + arr[2][5] as "[2][5]", + arr[2][8] as "[2][8]", + arr[4][5] as "[4][5]", + arr[4][8] as "[4][8]", + arr[9][9] as "[9][9]" +from v; +``` +In this syntax, `[2:4]` says that the index runs from 2 through 4 on the first dimension; and `[5:8]` says that runs from 5 through 8 on the second dimension. The values have been chosen to illustrate this. Of course, you must provide the right number of values for each dimension. The query produces this result: +``` + [0][0] | [2][5] | [2][8] | [4][5] | [4][8] | [9][9] +-----------+--------+--------+--------+--------+----------- + | 25 | 28 | 45 | 48 | +``` +Notice that if you access an element whose index values put it outside the ranges of the defined values, then, as mentioned, you silently get `NULL`. + +The values in an array are stored by laying out their internal representations consecutively in row-major order. This term is explained in [Joint semantics](./functions-operators/properties/#joint-semantics)) within the _"Functions for reporting the geometric properties of an array"_ section. Because every value has the same data type, this means that a value of interest can be addressed quickly, without index support, by calculating its offset. The value itself knows its dimensions. This explains how arrays of different dimensionality can be stored in a single table column. Even when the representations are of variable length (as is the case with, for example, `text` values), each knows its length so that the value boundaries can be calculated. + +## Uses of arrays + +You can use a one-dimensional array to store a graph, like temperature readings as a function of time. But the time axis is implicit: it's defined by each successive value's index. The application decides how to translate the integral index value to a time value. + +You can use a two-dimensional array to store a surface. For example you could decide to interpret the first index as an increment in latitude, and the second index as an increment in longitude. You might, then, use the array values to represent, say, the average temperature, over some period, at a location measured at points on a rectangular grid. + +A trained machine learning model is likely to be either a single array with maybe five or six dimensions and with fixed size. Or might be a collection of such arrays. It's useful, for various practical reasons, to store several of such models, corresponding to different stages of training or to different detailed use areas. The large physics applications at the Lawrence Livermore National Laboratory represent, and store, observations as multi-dimensional arrays. + +In these uses, your requirement is to persist the data and then to retrieve it (possibly retrieving just a slice) for programmatic analysis of the kind for which SQL is at best cumbersome or at worst inappropriate. For example, a one-dimensional array might be used to represent a path on a horizontal surface, where the value is a row representing the _(x, y)_ coordinate pair, and you might want to fit a curve through the data points to smooth out measurement inaccuracies. The [GPS trip data](./#example-use-case-gps-trip-data) use case, described below, typifies this use of arrays. + +Some use cases call for a multidimensional _ragged_ array-like structure. Such a structure doesn't qualify for the name "array" because it isn't rectilinear. The note above points to [Using an array of `DOMAIN` values](./array-of-domains/) which shows how to implement such a ragged structure. + +## Example use case: GPS trip data + +Amateur cyclists like to record their trips using a GPS device and then to upload the recorded data to one of no end of Internet sites, dedicated to that purpose, so that they can review their trips, and those of others, whenever they want to into the indefinite future. Such a site might use a SQL database to store all these trips. + +The GPS device lets the cyclist split the trip into successive intervals, usually called laps, so that they can later focus their review attention on particular laps of interest like, for example, a notorious steep hill. So each trip has one or many laps. A lap is typically no more than about 100 km—and often more like 5-10 km. But it could be as large as, say, 300 km. The resolution of modern devices is typically just a few paces under good conditions—say 3m. So a lap could have as many as 100,000 GPS data points, each of which records the timestamp, position, and no end of other associated instantaneous values of facts like, for example, heart rate. + +This sounds like a classic three table design, with foreign key constraints to capture the notion that a GPS data point belongs to a lap and that a lap belongs to a trip. The array data type allows all of the GPS data points that belong to a lap to be recorded in a single row in the _"laps"_ table—in other words as a multivalued field, thus: + +```plpgsql +create type gps_data_point_t as ( + ts timestamp, + lat numeric, + long numeric, + alt numeric, + cadence int, + heart_rate int + ... + ); + +create table laps( + lap_start_ts timestamp, + trip_start_ts timestamp, + userid uuid, + gps_data_points gps_data_point_t[], + + constraint laps_pk primary key (lap_start_ts, trip_start_ts, userid), + + constraint laps_fk foreign key (trip_start_ts, userid) + references trips(trip_start_ts, userid) + match full on delete cascade on update restrict); +``` +**Note:** In PostgreSQL, the maximum number of values that an array of any dimensionality can hold is `(2^27 - 1)` (about 137 million). If you exceed this limit, then you get a clear _"54000: array size exceeds the maximum allowed (134217727)"_ error. This maps to the PL/pgSQL exception _"program_limit_exceeded"_. In PostgreSQL, array values are stored out of line. However, in the YugabyteDB YSQL subsystem, they are stored in line, just like, for example, a `json` or `jsonb` value. As a consequence, the maximum number of values that a YSQL array can accommodate is smaller than the PostgreSQL limit. Moreover, the actual YSQL limit depends on circumstances—and when it's exceeded you get a "time out" error. Experiment shows that the limit is about 30 million values. You can test this for yourself using [`array_fill()`](./functions-operators/array-fill/)) function. + +With about 100,000 GPS data points, a 300 km trip is easily accommodated. + +The design that stores the GPS points in an array certainly breaks one of the time-honored rules of relational design: that column data types should be scalars. It does, however, bring definite advantages without the correctness risk and loss of functionality that it might in other use cases. + +For example, in the classic _"orders"_ and _"order_lines"_ design, an order line is for a quantity of a particular item from the vendor's catalog. And order lines for many different users will doubtless refer to the same catalog item. The catalog item has lots of fields; and some of them (especially the price) sometimes must be updated. Moreover, the overall business context implies queries like this: _find the total number of a specified catalog item that was ordered, by any user, during a specified time period_. Clearly a fully normal Codd-and-Date design is called for here. + +It's different with GPS data. The resolution of modern devices is so fine (typically just a few paces, as mentioned) that it's hugely unlikely that two different GPS data points would have the same position. It's even less likely that different point would share the same heart rate and all the other facts that are recorded at each position. In other words it's inconceivable that a query like the example given for the *"orders"* use case (_find the trips, by any user, that all share a common GPS data point_) would be useful. Moreover, all typical uses require fetching a trip and all its GPS data in a single query. One obvious example is to plot the transit of a lap on a map. Another example is to compute the generous containing envelope for a lap so that the set of coinciding lap envelopes can be discovered and analyzed to generate leader board reports and the like. SQL is not up to this kind of computation. Rather, you need procedural code—either in a stored procedure or in a client-side program. + +The is use case is taken one step, by using a ragged array-like structure, in [Example use case: GPS trip data (revisited)](./array-of-domains/#example-use-case-gps-trip-data-revisited). + +## Organization of the remaining array functionality content + +The following sections explain the details about array data types and functionality: + +- [The `array[]` value constructor](./array-constructor/) +- [Creating an array value using a literal](./literals/) +- [Built-in SQL functions and operators for arrays](./functions-operators/) +- [Using an array of `DOMAIN` values](./array-of-domains) diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/array-constructor.md b/docs/content/v2.25/api/ysql/datatypes/type_array/array-constructor.md new file mode 100644 index 000000000000..5b3defde34ec --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/array-constructor.md @@ -0,0 +1,191 @@ +--- +title: The array[] value constructor +linkTitle: array[] constructor +headerTitle: The array[] value constructor +description: The array[] value constructor +menu: + preview_api: + identifier: array-constructor + parent: api-ysql-datatypes-array + weight: 10 +type: docs +--- + +The `array[]` value constructor is a special variadic function. Uniquely among all the functions described in this _"Array data types and functionality"_ major section, it uses square brackets (`[]`) to surround its list of actual arguments. + +## Purpose and signature + +**Purpose:** Create an array value from scratch using an expression for each of the array's values. Such an expression can itself use the `array[]` constructor or an [array literal](../literals/). + +**Signature** +``` +input value: [anyarray | [ anyelement, [anyelement]* ] +return value: anyarray +``` +**Note:** You can meet the goal of creating an array from directly specified values, instead, by using an [array literal](../literals/). + +These thee ordinary functions also create an array value from scratch: + +- [`array_fill()`](../functions-operators/array-fill/) creates a "blank canvas" array of the specified shape with all values set the same to what you want. +- [`array_agg()`](../functions-operators/array-agg-unnest/#array-agg) creates an array (of, in general, an implied _"row"_ type) from a SQL subquery. +- [`text_to_array()`](../functions-operators/string-to-array/) creates a `text[]`array from a single `text` value that uses a specifiable delimiter to beak it into individual values. + +**Example:** +```plpgsql +create type rt as (f1 int, f2 text); +select array[(1, 'a')::rt, (2, 'b')::rt, (3, 'dog \ house')::rt]::rt[] as arr; +``` +This is the result: +``` + arr +-------------------------------------------- + {"(1,a)","(2,b)","(3,\"dog \\\\ house\")"} +``` +Whenever an array value is shown in `ysqlsh`, it is implicitly `::text` typecast. This `text` value can be used immediately by enquoting it and typecasting it to the appropriate array data type to recreate the starting value. The YSQL documentation refers to this form of the literal as its _canonical form_. It is characterized by its complete lack of whitespace except within `text` scalar values and within date-time scalar values. This term is defined formally in [Defining the canonical form of a literal](../literals/text-typecasting-and-literals/#defining-the-canonical-form-of-a-literal). + +To learn why you see four consecutive backslashes, see [Statement of the rules](../literals/array-of-rows/#statement-of-the-rules). + +Users who are familiar with the rules that are described in that section often find it expedient, for example when prototyping code that builds an array literal, to create an example value first, _ad hoc_, using the `array[]` constructor, like the code above does, to see an example of the syntax that their code must create programmatically. + +## Using the array[] constructor in PL/pgSQL code + +The example below attempts to make many teaching points in one piece of code. + +- The actual syntax, when the expressions that the `array[]` constructor uses are all literals, is far simpler than the syntax that governs how to construct an array literal. +- You can use all of the YSQL array functionality in PL/pgSQL code, just as you can in SQL statements. The code creates and invokes a table function, and not just a `DO` block, to emphasize this interoperability point. +- Array-like functionality is essential in any programming language. +- The `array[]` constructor is most valuable when the expressions that it uses are composed using declared variables, and especially formal parameters, that are used to build whatever values are intended. In this example, the values have the user-defined data type _"rt"_. In other words, the `array[]` constructor is particularly valuable when you build an array programmatically from scalar values that you know first at run time. +- It vividly demonstrates the semantic effect of the `array[]` constructor like this: +``` +declare + r rt[]; + two_d rt[]; +begin + ... + assert (array_dims(r) = '[1:3]'), 'assert failed'; + one_d_1 := array[r[1], r[2], r[3]]; + assert (one_d_1 = r), 'assert failed'; +``` +[`array_dims()`](../functions-operators/properties/#array-dims) is documented in the _"Functions for reporting the geometric properties of an array"_ section. + +Run this to create the required user-defined _"row"_ type and the table function and then to invoke it. + +```plpgsql +-- Don't create "type rt" if it's still there following the previous example. +create type rt as (f1 int, f2 text); + +create function some_arrays() + returns table(arr text) + language plpgsql +as $body$ +declare + i1 constant int := 1; + t1 constant text := 'a'; + r1 constant rt := (i1, t1); + + i2 constant int := 2; + t2 constant text := 'b'; + r2 constant rt := (i2, t2); + + i3 constant int := 3; + t3 constant text := 'dog \ house'; + r3 constant rt := (i3, t3); + + a1 constant rt[] := array[r1, r2, r3]; +begin + arr := a1::text; + return next; + + declare + r rt[]; + one_d_1 rt[]; + one_d_2 rt[]; + one_d_3 rt[]; + two_d rt[]; + n int not null := 0; + begin + ---------------------------------------------- + -- Show how arrays are useful, in the classic + -- sense, as what EVERY programming language + -- needs to handle a number of items when the + -- number isn't known until run time. + for j in 1..3 loop + n := j + 100; + r[j] := (n, chr(n)); + end loop; + + -- This further demonstrates the semantics + -- of the array[] constructor. + assert (array_dims(r) = '[1:3]'), 'assert failed'; + one_d_1 := array[r[1], r[2], r[3]]; + assert (one_d_1 = r), 'assert failed'; + ---------------------------------------------- + + one_d_2 := array[(104, chr(104)), (105, chr(105)), (106, chr(106))]; + one_d_3 := array[(107, chr(107)), (108, chr(108)), (109, chr(109))]; + + -- Show how the expressions that define the outcome + -- of the array[] constructor can themselves be arrays. + two_d := array[one_d_1, one_d_2, one_d_3]; + arr := two_d::text; + return next; + end; + +end; +$body$; + +select arr from some_arrays(); +``` +It produces two rows. This is the first: + +``` + arr +-------------------------------------------- + {"(1,a)","(2,b)","(3,\"dog \\\\ house\")"} +``` + +And this is the second row. The readability was improved by adding some whitespace manually: + +``` +{ + {"(101,e)","(102,f)","(103,g)"}, + {"(104,h)","(105,i)","(106,j)"}, + {"(107,k)","(108,l)","(109,m)"} +} +``` + +## Using the array[] constructor in a prepared statement + +This example emphasizes the value of using the `array[]` constructor over using an array literal because it lets you use expressions like `chr()` within it. +```plpgsql +-- Don't create "type rt" if it's still there followng the previous examples. +create type rt as (f1 int, f2 text); +create table t(k serial primary key, arr rt[]); + +prepare stmt(rt[]) as insert into t(arr) values($1); + +-- It's essential to typecast the individual "rt" values. +execute stmt(array[(104, chr(104))::rt, (105, chr(105))::rt, (106, chr(106))::rt]); +``` +This execution of the prepared statement, using an array literal as the actual argument, is semantically equivalent: +```plpgsql +execute stmt('{"(104,h)","(105,i)","(106,j)"}'); +``` +But here, of course, you just have to know in advance that `chr(104)` is `h`, and so on. Prove that the results of the two executions of the prepared statement are identical thus: + +```plpgsql +select + ( + (select arr from t where k = 1) + = + (select arr from t where k = 2) + )::text as result; +``` + +It shows this: + +``` + result +-------- + true +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/array-of-domains.md b/docs/content/v2.25/api/ysql/datatypes/type_array/array-of-domains.md new file mode 100644 index 000000000000..69d1b831fec2 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/array-of-domains.md @@ -0,0 +1,656 @@ +--- +title: Using an array of DOMAIN values +linkTitle: array of DOMAINs +headerTitle: Using an array of DOMAIN values +description: Using an array of DOMAIN values +menu: + preview_api: + identifier: array-of-domains + parent: api-ysql-datatypes-array + weight: 40 +type: docs +--- +An array of `DOMAIN` values lets you create, for example, a one-dimensional array whose values are themselves one-dimensional arrays of _different_ lengths. Stating this generally, it lets you implement a ragged multidimensional array, thereby overcoming the restriction that an array must normally be rectilinear. It meets other use cases too. + +**Note:** The understanding of this section depends on the principles that are established in these sections: + +- [Array data types and functionality](../../type_array/) + +- [The `array[]` value constructor](../array-constructor/) + +- [Creating an array value using a literal](../literals/) + +## Example use case: GPS trip data (revisited) + +There are use cases for which a ragged structure is essential. Most programming languages, therefore, have constructs that support this. + +Look at [Example use case: GPS trip data](../../type_array/#example-use-case-gps-trip-data). It considers the representation of the GPS trips whose recording is broken up into laps, thus: + +- Each trip is made up of one or many laps. +- Each lap is typically made up of a large number GPS data points. + +The representation that was explained in that section met a modest ambition level: + +- Each lap was represented as a row in the _"laps"_ table" that had a multivalued field implemented as an array of GPS data points. +- But each trip was represented classically by a row in the _"trips"_ table whose set of laps were child rows in the _"laps"_ table. The master-child relationship was supported ordinarily by a foreign key constraint. Notice that each lap has a different number of GPS points from other laps. + +The next level of ambition dispenses with the separate _"laps"_ table by representing the entire trip as a ragged array of arrays in the _"trips"_ table. This scheme requires the ability to represent a trip as an array of the _"GPS data point array"_ data type—in other words, it depends on the ability to create such a named data type. The `DOMAIN` brings this ability. + +## Creating a ragged array of arrays + +### The apparent paradox + +The syntax for defining a table column, or a PL/pgSQL variable or formal parameter, seems at first glance to present a paradox that thwarts the goal. For example, this PL/pgSQL declaration defines _"v"_ as, apparently, a one-dimensional array. + +``` +declare + v int[]; +``` +However, this executable section first assigns a two-dimensional array value (with size three-by-three) to _"v"_, setting each of the array's values initially to `17`. Then it assigns a three-dimensional array value (with size two-by-two-by-two) to _"v"_, setting each of the array's values initially to `42`: +``` +begin + v := array_fill(17, '{3, 3}'); + ... + v := array_fill(42, '{2, 2, 2}'); +``` +See [`array_fill()`](../functions-operators/array-fill/). + +The property of the declaration of an array variable that it cannot fix the dimensionality of a value that is subsequently assigned to the variable was pointed out in [Array data types and functionality](../../type_array/). A column in a table with an array data type shares this property so that the column can hold arrays of different dimensionality in different rows. This goes hand-in-hand with the fact that the following declarations of _"v1"_ and "_v2"_, though apparently different, define identical semantics. + +``` +declare + v1 int[]; + v2 int[][]; +``` + +This syntactical quirk pinpoints the paradox. Is _"v2"_ a two-dimensional array or an array of the type `int[]`? The answer is it's neither. Rather, it's an array of _any_ dimensionality, and as such it is _rectilinear_. + +How then _would_ you write the declaration of an array of arrays You might guess that this would work: +``` +declare + v (int[])[]; +``` + +But this causes a compilation error. The paradox is exactly that `int[]` is anonymous. + +### The solution + +The `DOMAIN` brings the functionality that overcomes the apparent restriction. First, do this: + +```plpgsql +set client_min_messages = warning; +drop domain if exists int_arr_t cascade; +create domain int_arr_t as int[]; +create table t(k serial primary key, v1 int_arr_t[], typecast text, v2 int_arr_t[]); +``` +Notice that the use of the `CREATE DOMAIN` statement is an example of _type construction_. And the user-defined data type _"int_arr_t"_ is an example of such a constructed data type. + +The columns _"v1"_ and _"v2"_ are now ready to store ragged arrays of arrays. Prove it like this: + +```plpgsql +do $body$ +declare + arr_1 constant int_arr_t := array[1, 2]; + arr_2 constant int_arr_t := array[3, 4, 5]; + ragged_arr constant int_arr_t[] := array[arr_1, arr_2]; +begin + insert into t(v1) values(ragged_arr); +end; +$body$; +``` +By using a `DO` block to set the value of _"ragged_arr"_ by building it bottom-up, you emphasize the fact that it really is a one-dimensional array of one-dimensional arrays of different lengths. It is, then, clearly _not_ a rectilinear two-dimensional array. + +Now use the technique that [The non-lossy round trip: value to text typecast and back to value](../literals/text-typecasting-and-literals/#the-non-lossy-round-trip-value-to-text-typecast-and-back-to-value) explained to inspect the `::text` typecast of the ragged array and then to show that, by typecasting this back to a value of the original data type, it can serve as the literal for the original value. First, do this: +```plpgsql +update t +set typecast = v1::text +where k = 1; + +select typecast from t where k = 1; +``` +This is the result: + +``` + typecast +--------------------- + {"{1,2}","{3,4,5}"} +``` + +This sentence is copied from [The non-lossy round trip: value to text typecast and back to value](../literals/text-typecasting-and-literals/#the-non-lossy-round-trip-value-to-text-typecast-and-back-to-value): + +> Notice how the syntax for the _array of arrays_ `text` value compares with the syntax for the _2-d array_ `text` value. Because the _array of arrays_ is ragged, the two inner `{}` pairs contain respectively two and three values. To distinguish between this case and the ordinary rectilinear case, the inner `{}` pairs are surrounded by double quotes. + +Now do this: + +```plpgsql +update t +set v2 = typecast::int_arr_t[] +where k = 1; + +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` +This is the result: + +``` + v1 = v2 +--------- + true +``` +The original value has been recreated. + +### Addressing values in a ragged array of arrays + +First, consider this counter example: +```plpgsql +\pset null ' +with v as ( + select '{{1,2},{3,4}}'::int[] as two_d_arr) +select + two_d_arr[2][1] as "[2][1] -- meaningful", + two_d_arr[2] as "[2] -- meaningless" +from v; +``` +This is the result: + +``` + [2][1] -- meaningful | [2] -- meaningless +----------------------+-------------------- + 3 | +``` + +This reminds you how to address a single value in a rectilinear multidimensional array: you use this general scheme: + +``` +[idx_1][idx_2]...[idx_n] +``` +And it reminds you that you must supply exactly as many index values as the array has dimensions. Further, it shows you that if you do this wrong, and supply too many or too few index values, then you don't see an error but, rather, silently get `NULL`. This reminder prompts the obvious question: + +- How do you address, for example, the _first_ value in the array that is itself the _second_ array in the ragged array of arrays? + +You know before typing it that this can't be right: +```plpgsql +select v1[2][1] as "v1[2][1]" from t where k = 1; +``` +Sure enough, it shows this: +``` + v1[2][1] +----------- + +``` +You don't get an error. But neither do you get what you want. Try this instead: +```plpgsql +select v1[2] as "v1[2]" from t where k = 1; +``` +This is the result: +``` + v1[2] +--------- + {3,4,5} +``` +This is the clue. You have identified the leaf array, in the array of arrays, that you want to. Now you have to identify the desired value in _that_ array. Try this: +```plpgsql +select (v1[2])[1] as "(v1[2])[1]" from t where k = 1; +``` +This is the result: +``` + (v1[2])[1] +------------ + 3 +``` +In the same way, you can inspect the geometric properties of the leaf array like this: +```plpgsql +select + array_lower(v1[1], 1) as v1_lb, + array_upper(v1[1], 1) as v1_ub, + array_lower(v1[2], 1) as v2_lb, + array_upper(v1[2], 1) as v2_ub +from t where k = 1; +``` +This is the result: +``` + v1_lb | v1_ub | v2_lb | v2_ub +-------+-------+-------+------- + 1 | 2 | 1 | 3 +``` +Finally, try this counter example: + +```plpgsql +with v as ( + select '{{1,2},{3,4}}'::int[] as two_d_arr) +select + (two_d_arr[2])[1] +from v; +``` +It causes a SQL compilation error. You must know whether the value at hand, within which you want to address a value, is a rectilinear multidimensional array or a ragged array of arrays. + +### Using FOREACH with an array of DOMAINs + +This example demonstrates the problem. + +```plpgsql +set client_min_messages = warning; +drop domain if exists array_t cascade; +drop domain if exists arrays_t cascade; + +create domain array_t as int[]; +create domain arrays_t as array_t[]; + +\set VERBOSITY verbose +do $body$ +declare + arrays arrays_t := array[ + array[1, 2]::array_t, array[3, 4, 5]::array_t]; + + runner array_t not null := '{}'; +begin + -- Error 42804 here. + foreach runner in array arrays loop + raise info '%', runner::text; + end loop; +end; +$body$; +``` +It causes this syntax error: + +``` +42804: FOREACH expression must yield an array, not type arrays_t +``` +The error text might confuse you. It really means that the argument of the `ARRAY` keyword in the loop header must, literally, be an explicit array—and _not_ a domain that names such an array. + +A simple workaround is to declare _"arrays"_ explicitly as +_"array_t[]"_ rather that use _"arrays_t"_ as a shorthand for this. + +```plpgsql +\set VERBOSITY default +do $body$ +declare + arrays array_t[] := array[ + array[1, 2]::array_t, array[3, 4, 5]::array_t]; + + runner array_t not null := '{}'; +begin + foreach runner in array arrays loop + raise info '%', runner::text; + end loop; +end; +$body$; +``` + +It shows this: + +``` +INFO: {1,2} +INFO: {3,4,5} +``` + +What if you really _do_ need to use a `DOMAIN`? For example, you might want to define a constraint like this: + +```plpgsql +set client_min_messages = warning; +drop domain if exists arrays_t cascade; +create domain arrays_t as array_t[] +check ((cardinality(value) = 2)); +``` + +The workaround is to typecast the argument of the `ARRAY` keyword _in situ_ to an array of the appropriate element data type. + +```plpgsql +do $body$ +declare + arrays arrays_t := array[ + array[1, 2]::array_t, array[3, 4, 5]::array_t]; + + runner array_t not null := '{}'; +begin + foreach runner in array arrays::array_t[] loop + raise info '%', runner::text; + end loop; +end; +$body$; +``` +Once again, it shows this: + +``` +INFO: {1,2} +INFO: {3,4,5} +``` + +### Using array_agg() to produce an array of DOMAIN values + +See [array_agg()](../functions-operators/array-agg-unnest/#array-agg-first-overload). It turns out that directly aggregating `DOMAIN` values that represent a ragged array is not supported. But a simple PL/pgSQL function provides the workaround. This sets up to demonstrate the problem: + +```plpgsql +set client_min_messages = warning; +drop table if exists t cascade; +drop domain if exists array_t cascade; +drop domain if exists arrays_t cascade; + +create domain array_t as int[]; +create domain arrays_t as array_t[]; + +create table t(k serial primary key, v array_t); + +insert into t(v) values + ('{2,6}'), + ('{1,4,5,6}'), + ('{4,5}'), + ('{2,3}'), + ('{4,5}'), + ('{3,5,7}'); + +select v from t order by k; +``` +It shows the raggedness thus: + +``` + v +----------- + {2,6} + {1,4,5,6} + {4,5} + {2,3} + {4,5} + {3,5,7} +``` + +And this demonstrates the problem: + +```plpgsql +\set VERBOSITY verbose +select + array_agg(v order by k) +from t; +``` + +It causes this error: + +``` +2202E: cannot accumulate arrays of different dimensionality +``` + +Typecasting cannot come to the rescue here. But this function produces the required result: + +```plpgsql +\set VERBOSITY default +create or replace function array_agg_v() + returns arrays_t + language plpgsql +as $body$ +<>declare + v array_t not null := '{}'; + n int not null := 0; + r array_t[] not null := '{}'; +begin + for b.v in (select t.v from t order by k) loop + n := n + 1; + r[n] := b.v; + end loop; + return r; +end b; +$body$; +``` +Do this: + +```plpgsql +select array_agg_v(); +``` + +This is the result: + +``` + array_agg_v +--------------------------------------------------------- + {"{2,6}","{1,4,5,6}","{4,5}","{2,3}","{4,5}","{3,5,7}"} +``` + +## Creating a matrix of matrices + +The Wikipedia article entitled [Matrix (mathematics)](https://en.wikipedia.org/wiki/Matrix_(mathematics)) defines the term _"matrix"_ like this: + +> a matrix... is a rectangular array... of numbers, symbols, or expressions, arranged in rows and columns. + +Look for the heading [Matrices with more general entries](https://en.wikipedia.org/wiki/Matrix_%28mathematics%29#Matrices_with_more_general_entries) and in particular for this sentence: + +> One special but common case is block matrices, which may be considered as matrices whose entries themselves are matrices. + +Various disciplines in mathematics, physics, and the like, use block matrices. [Uses of arrays](../../type_array/#uses-of-arrays) explains how such cases generate various kinds of arrays in client-side programs and need to use these values later, again in client-side programs. This brings the requirement, in the present use case, to persist and to retrieve block matrices. + +Though this use case is relatively exotic, the techniques that are used to implement the required structures (and in particular, the dependency of a viable solution upon user-defined `DOMAIN` data types) are of general utility. Its for this reason that the approach is explained here. + +### Defining the required data types + +First, define the data type for the "payload" matrix. To make it interesting, assume that the following rules need to be enforced: +- The payload matrix must not be atomically null. +- By definition, it must be two-dimensional. +- It must be exactly three-by-three. +- The lower bound along each dimension must be `1`. +- Each of the matrix's values must be `NOT NULL`. + +The motivating requirement for the `DOMAIN` type constructor is that it must allow arbitrary constraints to be defined on values of the constructed type, like this: +```plpgsql +create domain matrix_t as text[] +check ( + (value is not null) and + (array_ndims(value) = 2) and + (array_lower(value, 1) = 1) and + (array_lower(value, 2) = 1) and + (array_upper(value, 1) = 3) and + (array_upper(value, 2) = 3) and + (value[1][1] is not null ) and + (value[1][2] is not null ) and + (value[1][3] is not null ) and + (value[2][1] is not null ) and + (value[2][2] is not null ) and + (value[2][3] is not null ) and + (value[3][1] is not null ) and + (value[3][2] is not null ) and + (value[3][3] is not null ) +); +``` +Next, define the block matrix as a matrix of _"matrix_t"_. Assume that similar following rules need to be enforced: + +- The block matrix must not be atomically null. +- By definition, it must be two-dimensional. +- It must be exactly two-by-two. +- The lower bound along each dimension must be `1`. +- Each of the matrix's values must be `NOT NULL`. + +The `CREATE DOMAIN` statement for _"block_matrix_t"_ is therefore similar to that for _"matrix_t"_: +```plpgsql +create domain block_matrix_t as matrix_t[] +check ( + (value is not null) and + (array_ndims(value) = 2) and + (array_lower(value, 1) = 1) and + (array_lower(value, 2) = 1) and + (array_upper(value, 1) = 2) and + (array_upper(value, 2) = 2) and + (value[1][1] is not null ) and + (value[1][2] is not null ) and + (value[2][1] is not null ) and + (value[2][2] is not null ) +); +``` +These two `CREATE DOMAIN` statements are uncomfortably verbose and repetitive. But they are sufficient to illustrate the basis of the approach. It would be better, for use in a real application, to encapsulate all the `CHECK` rules in a PL/pgSQL function that takes the `DOMAIN` value as input and that returns a `boolean`, and to use this as a single `CHECK` predicate. The function could use the `array_lower()` and `array_length()` functions to compute the ranges of two nested `FOR` loops to check that the array's individual values all satisfy the `NOT NULL` rule. + +### Using the "block_matrix_t" DOMAIN + +Next, create a block matrix value, insert it, and its `::text` typecast, into a table, and inspect the typecast's value. + +```plpgsql +create table block_matrices_1(k int primary key, v block_matrix_t, text_typecast text); + +do $body$ +declare + -- The definitions of the two domains imply "not null" constraints + -- on each of the variables "matrix_t" and "block_matrix_t". + m matrix_t := array_fill('00'::text, array[3, 3], array[1, 1]); + b block_matrix_t := array_fill(m, array[2, 2], array[1, 1]); + + n int not null := 0; + ms matrix_t[]; +begin + -- Define four matrix_t values so that, for readability of the result, + -- the in-total 24 values are taken from an increasing dense series. + for i in 1..4 loop + for j in 1..3 loop + for k in 1..3 loop + n := n + 1; + m[j][k] := ltrim(to_char(n, '09')); + end loop; + end loop; + ms[i] := m; + end loop; + + n := 0; + for j in 1..2 loop + for k in 1..2 loop + n := n + 1; + b[j][k] := ms[n]; + end loop; + end loop; + + insert into block_matrices_1(k, v, text_typecast) + values(1, b, b::text); +end; +$body$; + +select text_typecast +from block_matrices_1 +where k = 1; +``` +This is the result (after manual whitespace formatting): +``` +{ + { + "{ + {01,02,03}, block_matrix[1][1] + {04,05,06}, + {07,08,09} + }", + "{ + {10,11,12}, block_matrix[1][2] + {13,14,15}, + {16,17,18} + }" + }, + { + "{ + {19,20,21}, block_matrix[2][1] + {22,23,24}, + {25,26,27} + }", + "{ + {28,29,30}, block_matrix[2][2] + {31,32,33}, + {34,35,36} + }" + } +} +``` +**Note:** The annotations _"block_matrix"_, and so on, are just that. Because they are _within_ the `text` value, they are part of that value and therefore render it illegal. They were added manually just to highlight the meaning of the overall `text` value. + +Finally, check that even this exotic structure conforms to the universal rule, copied from [The non-lossy round trip: value to text typecast and back to value](../literals/text-typecasting-and-literals/#the-non-lossy-round-trip-value-to-text-typecast-and-back-to-value): + +> - Any value of any data type, primitive or composite, can be `::text` typecast. Similarly, there always exists a `text` value that, when properly spelled, can be typecast to a value of any desired data type, primitive or composite. +> - If you `::text` typecast a value of any data type and then typecast that `text` value to the original value's data type, then the value that you get is identical to the original value. + +```plpgsql +create table block_matrices_2(k int primary key, v block_matrix_t); + +insert into block_matrices_2(k, v) +select k, text_typecast::block_matrix_t +from block_matrices_1 +where k = 1; + +with a as ( + select k, t1.v as v1, t2.v as v2 + from + block_matrices_1 as t1 + inner join + block_matrices_2 as t2 + using (k) + ) +select (v1 = v2)::text as "v1 = v2" +from a +where k = 1; +``` +This is the result: +``` + v1 = v2 +--------- + true +``` +The rule holds. + +### Using unnest() on an array of arrays +First, produce the list of _"matrix_t"_ values, in row-major order: +```plpgsql +with matrices as ( + select unnest(v) as m + from block_matrices_1 + where k = 1) +select + row_number() over(order by m) as r, + m +from matrices order by m; +``` +See [`unnest()`](../functions-operators/array-agg-unnest/#unnest). + +The term _"row-major order"_ is explained in [Joint semantics](../functions-operators/properties/#joint-semantics) within the section _"Functions for reporting the geometric properties of an array"_.. + +This is the result: + +``` + r | m +---+------------------------------------ + 1 | {{01,02,03},{04,05,06},{07,08,09}} + 2 | {{10,11,12},{13,14,15},{16,17,18}} + 3 | {{19,20,21},{22,23,24},{25,26,27}} + 4 | {{28,29,30},{31,32,33},{34,35,36}} +``` +Now unnest a _"matrix_t"_ value of interest: +```plpgsql +select unnest(v[2][1]) as val +from block_matrices_1 +where k = 1 +order by val; +``` +This is the result: +``` + val +----- + 19 + 20 + ... + 26 + 27 +``` +Use this query if you want to see _all_ of the leaf values in row-major order: +```plpgsql +with + matrixes as ( + select unnest(v) as m + from block_matrices_1 + where k = 1), + vals as ( + select unnest(m) as val + from matrixes) +select + row_number() over(order by val) as r, + val +from vals +order by 1; +``` +This is the result: +``` + r | val +----+----- + 1 | 01 + 2 | 02 + 3 | 03 + ... + 34 | 34 + 35 | 35 + 36 | 36 +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/_index.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/_index.md new file mode 100644 index 000000000000..9197f6573aec --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/_index.md @@ -0,0 +1,117 @@ +--- +title: Array functions and operators +linkTitle: Functions and operators +headerTitle: Array functions and operators +description: Array functions and operators +image: /images/section_icons/api/subsection.png +menu: + preview_api: + identifier: array-functions-operators + parent: api-ysql-datatypes-array + weight: 90 +type: indexpage +showRightNav: true +--- + +**Note:** For an alphabetical listing of the array functions and operators, see the listing in the navigation bar. + +Most of the functions and operators listed here can use an array of any dimensionality, but four of the functions accept, or produce, only a one-dimensional array. This property is called out by the second column _"1-d only?"_ in the tables that follow. The restricted status is indicated by _"1-d"_ in that function's row. When the field is blank, there is no dimensionality restriction. + +## Functions for creating arrays from scratch + +The `array[]` constructor, and the three functions, create an array from scratch. + +| Function or operator | 1-d only? | Description | +| ---- | ---- | ---- | +| [`array[]`](./../array-constructor/) | | The array[] value constructor is a special variadic function that creates an array value from scratch using an expression for each of the array's values. Such an expression can itself use the `array[]` constructor or an [array literal](../literals/). | +| [`array_fill()`](./array-fill/) | | Returns a new "blank canvas" array of the specified shape with all cells set to the same specified value. | +| [`array_agg()`](./array-agg-unnest/#array-agg) | | Returns an array (of an implied _"row"_ type) from a SQL subquery. | +| [`string_to_array()`](./string-to-array/) | 1-d | Returns a one-dimensional `text[]` array by splitting the input `text` value into subvalues using the specified `text` value as the delimiter. Optionally, allows a specified `text` value to be interpreted as `NULL`. | + +## Functions for reporting the geometric properties of an array + +| Function | 1-d only? | Description | +| ---- | ---- | ---- | +| [`array_ndims()`](./properties/#array-ndims) | | Returns the dimensionality of the specified array. | +| [`array_lower()`](./properties/#array-lower) | | Returns the lower bound of the specified array along the specified dimension. | +| [`array_upper()`](./properties/#array-upper) | | Returns the upper bound of the specified array along the specified dimension. | +| [`array_length()`](./properties/#array-length) | | Returns the length of the specified array along the specified dimension. | +| [`cardinality()`](./properties/#cardinality) | | Returns the total number of values in the specified array. | +| [`array_dims()`](./properties/#array-dims) | | Returns a text representation of the same information as `array_lower()` and `array_length()`, for all dimensions, in a single text value. | + +## Functions to find a value in an array + +| Function | 1-d only? | Description | +| ---- | ---- | ---- | +| [`array_position()`](./array-position/#array-position) | 1-d | Returns the index, in the supplied array, of the specified value. Optionally starts searching at the specified index. | +| [`array_positions()`](./array-position/#array-positions) | 1-d | Returns the indexes, in the supplied array, of all occurrences the specified value. | + +## Operators to test whether a value is in an array + +These operators require that the [LHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) is a scalar and that +the [RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) is an array of that LHS's data type. + +| Operator | 1-d only? | Description | +| ---- | ---- | ---- | +| [`ANY`](./any-all/) | | Returns `TRUE` if _at least one_ of the specified inequality tests between the LHS element and each of the RHS array's elements evaluates to `TRUE`. | +| [`ALL`](./any-all/) | | Returns `TRUE` if _every one_ of the specified inequality tests between the LHS element and each of the RHS array's elements evaluates to `TRUE`. | + +## Operators for comparing two arrays + +These operators require that the [LHS and RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) arrays have the same data type. + +| Operator | 1-d only? | Description | +| ---- | ---- | ---- | +| [`=`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS and RHS arrays are equal. | +| [`<>`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS and RHS arrays are not equal. | +| [`>`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is greater than the RHS array. | +| [`>=`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is greater than or equal to the RHS array. | +| [`<=`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is less than or equal to the RHS array. | +| [`<`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is less than the RHS array. | +| [`@>`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators-1) | | Returns `TRUE` if the LHS array contains the RHS array—that is, if every distinct value in the RHS array is found among the LHS array's distinct values. | +| [`<@`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators-1) | | Returns `TRUE` if the LHS array is contained by the RHS array—that is, if every distinct value in the LHS array is found among the RHS array's distinct values. | +| [`&&`](./comparison/#the-160-160-160-160-operator) | | Returns `TRUE` if the LHS and RHS arrays overlap—that is, if they have at least one value in common. | + + +## The slice operator + +| Operator | 1-d only? | Description | +| ---- | ---- | ---- | +|[`[lb1:ub1]...[lbN:ubN]`](./slice-operator/) | | Returns a new array whose length is defined by specifying the slice's lower and upper bound along each dimension. These specified slicing bounds must not exceed the source array's bounds. The new array has the same dimensionality as the source array and its lower bound is `1` on each axis. | + +## Functions and operators for concatenating an array with an array or an element + +These functions require that the two arrays have the same data type and compatible dimensionality. + +| Function or operator | 1-d only? | Description | +| ---- | ---- | ---- | +| [`||`](./concatenation/#the-160-160-160-160-operator) | | Returns the concatenation of any number of compatible `anyarray` and `anyelement` values. | +| [`array_cat()`](./concatenation/#array-cat) | | Returns the concatenation of two compatible `anyarray` values. | +| [`array_append()`](./concatenation/#array-append) | | Returns an array that results from appending a scalar value to (that is, _after_) an array value. | +| [`array_prepend()`](./concatenation/#array-prepend) | | Returns an array that results from prepending a scalar value to (that is, _before_) an array value. | + +## Functions and operators to change values in an array + +| Function or operator | 1-d only? | Description | +| ---- | ---- | ---- | +| [`array_replace()`](./replace-a-value/#array-replace) | | Returns a new array where every occurrence of the specified value in the input array has been replaced by the specified new value. | +| [`arr[idx_1]...[idx_N] := val`](./replace-a-value/#setting-an-array-value-explicitly-and-in-place) | | Update a value in an array "in place". | +| [`array_remove()`](./array-remove) | 1-d | Returns a new array where _every_ occurrence of the specified value has been removed from the specified input array. | + +## Function to convert an array to a text value + +| Function | 1-d only? | Description | +| ---- | ---- | ---- | +| [`array_to_string()`](./array-to-string) | | Returns a `text` value computed by representing each array value, traversing these in row-major order, by its `::text` typecast, using the supplied delimiter between each such representation. (The result, therefore, loses all information about the arrays geometric properties.) Optionally, represent `NULL` by the supplied `text` value. | + +## Table function to transform an array into a SETOF anyelement + +| Function | 1-d only? | Description | +| ---- | ---- | ---- | +| [`unnest()`](./array-agg-unnest/#unnest) | | Use in the `FROM` clause of a `SELECT` statement. The simple overload accepts a single `anyarray` value and returns a `SETOF anyelement`. The exotic overload accepts a variadic list of `anyarray` values and returns a `SETOF` with many columns where each, in turn, has the output of the corresponding simple overload. | + +## Table function to transform an array into a SETOF index values + +| Function | 1-d only? | Description | +| ---- | ---- | ---- | +| [`generate_subscripts()`](./array-agg-unnest/#generate-subscripts) | | Use in the `FROM` clause of a `SELECT` statement. Returns the values of the indexes along the specified dimension of the specified array. | diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/any-all.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/any-all.md new file mode 100644 index 000000000000..7e67c554a410 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/any-all.md @@ -0,0 +1,402 @@ +--- +title: ANY and ALL—test if an element is in an array +linkTitle: ANY and ALL +headerTitle: ANY and ALL — test if an element is in an array +description: The ANY and ALL operators compare an element with each of the elements in an array. +menu: + preview_api: + identifier: any-all + parent: array-functions-operators + weight: 10 +type: docs +--- + +## Overview + +**Signature** + +Each of the `ANY` and `ALL` operators has the same signature, thus: + +``` +input value: anyelement, anyarray +return value: boolean +``` +`SOME` is a synonym for `ANY`. Therefore this section will make no further mention of `SOME`. + +**Note:** The term of art _element_ is used in this section for what the pseudo-type name `anyelement` represents. Each of these operators requires that the data type of the _element_ value to which the LHS expression evaluates corresponds to the data type of the array value to which the RHS expression evaluates. For example, when the data type of the LHS _element_ is, say _"t"_, then the RHS array's data type must be _"t[]"_. The RHS array can have any dimensionality. The operators are sensitive only to the actual _elements_ in the array. + +The abbreviations [LHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) and [RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) have their usual meanings. + +**Purpose:** Return `TRUE`, `FALSE`, or `NULL` according to the outcome of the specified set of comparisons. This is the general form of the invocation: + +``` +any_all_boolean_expression ::= + lhs_element { = | <> | < | <= | >= | > } { ANY | ALL} rhs_array +``` + +Notice that `!=` was omitted from the list of equality and inequality operators because it's just an alternative spelling of the `<>` inequality operator. + +Because _"any_all_boolean_expression"_ is just that, a `boolean` expression, it can optionally be preceded with the `NOT` unary operator and it can be conjoined with other `boolean` expressions using `AND`, and `OR` in the normal way. + +The evaluation of _"any_all_boolean_expression"_ visits each of the array's elements in turn (as mentioned, the array's dimensionality doesn't matter) and it performs the specified equality or inequality comparison between the LHS _element_ and the current array _element_. + +## Semantics + +The accounts of `ANY` and `ALL` are symmetrical and complementary. It's therefore most effective to describe the semantics jointly in a single account. + +Notice that the use of _element_ and _array_ (in this overall section on `ANY` and `AND`) acknowledges the fact that the LHS and the RHS are each, in general, expressions. So _element_ acts as shorthand for the value to which the LHS expression evaluates. and _array_ acts as shorthand for the value to which the RHS evaluates. + +### Simple scenario + +The simple scenario is restricted to the case that the LHS _element_ `IS NOT NULL`, the RHS array `IS NOT NULL`, and the RHS array's cardinality is at least one. + +When the LHS _element_ is compared with each of the RHS array's _elements_, the comparisons use the appropriate data type overload of the particular equality or inequality operator that is used in the _"any_all_boolean_expression"_. + +- If `ANY` is used, then the result is `TRUE` only if at least one of the successive comparisons evaluates to `TRUE`. (It doesn't matter if zero or more of the other comparisons evaluates to `NULL`.) If every one of the comparisons evaluates to `FALSE`, then the result is `FALSE`. If every one of the comparisons evaluates to `NULL` is the result `NULL`. + +- If `ALL` is used, then the result is `TRUE` only if every one of the successive comparisons evaluates to `TRUE`. If at least one of the comparisons evaluates to `FALSE` and not one evaluates to `NULL`, then the result is `FALSE`. If at least one of the comparisons evaluates to `NULL` then the result is `NULL`. + +Notice that `ANY` is comparable to `OR` and that `ALL` is comparable to `AND` in this way: + +- If `ANY` is used, then the result is the `OR` combination of the successive individual comparisons. + +- If `ALL` is used, then the result is the `AND` combination of the successive individual comparisons. + +This `DO` block demonstrates the semantics of `OR` and `AND`: + +```plpgsql +do $body$ +begin + -- OR + assert (true or false or null), 'assert failed'; + assert not (false or false or false), 'assert failed'; + assert (false or false or null) is null, 'assert failed'; + + -- AND + assert (true and true and true), 'assert failed'; + assert not (true and true and false), 'assert failed'; + assert (true and true and null) is null, 'assert failed'; +end; +$body$; +``` + +The next `DO` block is a mechanical (manual) re-write of the block that demonstrates the semantics of `OR` and `AND`. It correspondingly demonstrates the semantics of `ANY` and `ALL`. + +```plpgsql +do $body$ +begin + -- ANY + assert (true = any (array[true, false, null ]::boolean[])), 'assert failed'; + assert not (true = any (array[false, false, false]::boolean[])), 'assert failed'; + assert (true = any (array[false, false, null ]::boolean[])) is null, 'assert failed'; + + -- ALL + assert (true = all (array[true, true, true ]::boolean[])), 'assert failed'; + assert not (true = all (array[true, true, false]::boolean[])), 'assert failed'; + assert (true = all (array[true, true, null ]::boolean[])) is null, 'assert failed'; +end; +$body$; +``` + +The combination `= ANY` is functionally equivalent to `IN` (but `IN` is illegal syntax when the RHS is an array). + +The combination `= ALL` has no functional equivalent in the way that `= ANY` is functionally equivalent to `IN`. + +The following small test emphasizes the symmetry between `ANY` and `IN`. + +```plpgsql +select ( + 42 = any (array[17, 42, 53]) + and + 42 in (17, 42, 53) + )::text as b; +``` +See [The `array[]` value constructor](../../array-constructor/). This is the result: +``` + b +------ + true +``` +Notice that here, _"(17, 42, 53)"_ is _not_ a constructed record value because of the semantic effect of preceding it by `IN`. In contrast, in the following example, _"(17, 42, 53)"_ _is_ a constructed record: + +```plpgsql +with v as (select (17, 42, 53) as r) +select pg_typeof(r)::text from v; +``` + +This is the result: + +``` + pg_typeof +----------- + record +``` + +This outcome is due to the semantic effect of using _"(17, 42, 53)"_ directly as a `SELECT` list item. This, therefore, causes a syntax error: + +``` +with v as (select (1, 2, 3, 4) as r) +select 1 in r from v; +``` + +### Exotic scenario + +This section explains the semantics: when one, or both, of the LHS _element_ and the RHS array `IS NULL`; and when neither the LHS or the RHS `IS NULL` and the RHS array's cardinality is zero. + +- If either the LHS _element_ or the RHS array `IS NULL`, then both the `ANY` result `IS NULL` and the `ALL` result `IS NULL`. + +- If both the LHS _element_ `IS NOT NULL`and the RHS array `IS NOT NULL` and the RHS array has zero _elements_, then the `ANY` result is `FALSE`. +- If both the LHS _element_ `IS NOT NULL`and the RHS array `IS NOT NULL` and the RHS array has zero _elements_, then the `ALL` result is `TRUE`. You might think that this is a counter-intuitive rule definition. But the [PostgreSQL documentation for Version 11.2](https://www.postgresql.org/docs/11/functions-comparisons.html#id-1.5.8.28.17) states this clearly. And the first example in [Semantics demonstration](./#semantics-demonstration) is consistent with this definition. Moreover, and as is required to be the case, the behavior of the example is identical using YugabyteDB and PostgreSQL Version 11.2. + +## Semantics demonstration + +The following semantics demonstrations show the use of an array of _atomic elements_, an array of _composite elements_, and an array of _elements_ that are values of a `DOMAIN`. + +### Using an array of atomic elements + +```plpgsql +do $body$ +declare + v1 constant int := 1; + v2 constant int := 2; + v3 constant int := 3; + v4 constant int := 4; + v5 constant int := 5; + v6 constant int := null; + + arr1 constant int[] := array[v1, v1, v1, v1]; + arr2 constant int[] := array[v1, v1, v1, v2]; + arr3 constant int[] := array[v1, v2, v3, v4]; + arr4 constant int[] := array[v1, v1, v1, null]; + arr5 constant int[] := null; + + -- Notice that an array with zero elements is nevertheless NOT NULL. + arr6 constant int[] not null := '{}'; + + b01 constant boolean not null := v1 = all (arr1); + b02 constant boolean not null := not v1 = all (arr3); + b03 constant boolean not null := not v1 = all (arr2); + b04 constant boolean not null := v1 = any (arr3); + b05 constant boolean not null := not v5 = any (arr3); + + b06 constant boolean not null := v1 = any (arr4); + b07 constant boolean not null := (v5 = any (arr4)) is null; + b08 constant boolean not null := (v1 = all (arr4)) is null; + + b09 constant boolean not null := (v1 = any (arr5)) is null; + b10 constant boolean not null := (v6 = any (arr1)) is null; + b11 constant boolean not null := (v1 = all (arr5)) is null; + b12 constant boolean not null := (v6 = all (arr1)) is null; + + b13 constant boolean not null := not (v1 = any (arr6)); + b14 constant boolean not null := (v1 = all (arr6)); +begin + assert + (b01 and b02 and b03 and b04 and b05 and b06 and b07 and b08 + and b09 and b10 and b11 and b12 and b13 and b14), + 'assert failed'; +end; +$body$; +``` +Here is a mechanically derived example from the code above that uses two-dimensional arrays in place of one-dimensional arrays. But some of the tests were removed to help readability. The outcomes of the remaining tests are unchanged because these depend only upon the array's actual elements and not upon its geometry. + +```plpgsql +do $body$ +declare + v1 constant int := 1; + v2 constant int := 2; + v3 constant int := 3; + v4 constant int := 4; + v5 constant int := 5; + + arr1 constant int[] := array[array[v1, v1], array[v1, v1]]; + arr2 constant int[] := array[array[v1, v2], array[v3, v4]]; + arr3 constant int[] := array[array[v1, v1], array[v1, null]]; + + b1 constant boolean not null := v1 = all (arr1); + b2 constant boolean not null := not v1 = all (arr2); + b3 constant boolean not null := v1 = any (arr2); + b4 constant boolean not null := not v5 = any (arr2); + + b5 constant boolean not null := v1 = any (arr3); + b6 constant boolean not null := (v5 = any (arr3)) is null; + b7 constant boolean not null := (v1 = all (arr3)) is null; +begin + assert + (b1 and b2 and b3 and b4 and b5 and b6 and b7), + 'assert failed'; +end; +$body$; +``` + +### Using an array of composite "row" type value elements + +This code was produced by mechanically replacing `int` with _"rt"_ and by changing the spelling of the values that are assigned to _"v1"_ through _"v5_" accordingly. But, again, some of the tests were removed to help readability. The rest of the remaining code is identical to its counterpart in the first example. + +```plpgsql +drop type if exists rt; +create type rt as (a int, b text); + +do $body$ +declare + v1 constant rt := (0, 1); + v2 constant rt := (2, 3); + v3 constant rt := (4, 5); + v4 constant rt := (6, 7); + v5 constant rt := (8, 9); + + arr1 constant rt[] := array[v1, v1, v1, v1]; + arr2 constant rt[] := array[v1, v2, v3, v4]; + arr3 constant rt[] := array[v1, v1, v1, null::rt]; + + b1 constant boolean not null := v1 = all (arr1); + b2 constant boolean not null := not v1 = all (arr2); + b3 constant boolean not null := v1 = any (arr2); + b4 constant boolean not null := not v5 = any (arr2); + + b5 constant boolean not null := v1 = any (arr3); + b6 constant boolean not null := (v5 = any (arr3)) is null; + b7 constant boolean not null := (v1 = all (arr3)) is null; +begin + assert + (b1 and b2 and b3 and b4 and b5 and b6 and b7), + 'assert failed'; +end; +$body$; +``` + +### Using an array of elements that are values of a DOMAIN + +First, consider these two examples: + +```plpgsql +drop domain if exists d1_t; +create domain d1_t as int +default 42 constraint d1_t_chk check(value >= 17); + +drop domain if exists d2_t; +create domain d2_t as int[]; +``` + +It's clear that the values of _"d1_t"_, as a specialized kind of `int`, are _elements_. But what about the values of _"d2_t"_ which is a specialized kind of _array_? Critically, but somewhat counter-intuitively, _"d2_t"_ does _not_ qualify as `anyarray`. Rather, it qualifies as `anyelement`. This code example underlines the point: + +```plpgsql +create or replace procedure p(i in anyelement) + language plpgsql +as $body$ +begin + raise info '%', pg_typeof(i); +end; +$body$; + +call p(53::d1_t); + +call p('{1, 2}'::d2_t); +``` + +The first `CALL` produces this result: + +``` +INFO: d1_t +``` + +and the second `CALL` produces this result: + +``` +INFO: d2_t +``` + +Once again, the following code was produced by mechanically replacing `int` — this time with _"int_arr_t"_ and by changing the spelling of the values that are assigned to _"v1"_ through _"v5"_ accordingly. But, again, some of the tests were removed to help readability. The rest of the remaining code is identical to its counterpart in the first example. + +```plpgsql +drop domain if exists int_arr_t; +create domain int_arr_t as int[]; + +do $body$ +declare + v1 constant int_arr_t := array[1, 1]; + v2 constant int_arr_t := array[2, 3]; + v3 constant int_arr_t := array[4, 5]; + v4 constant int_arr_t := array[5, 7]; + v5 constant int_arr_t := array[8, 9]; + + arr1 constant int_arr_t[] := array[v1, v1, v1, v1]; + arr2 constant int_arr_t[] := array[v1, v2, v3, v4]; + arr3 constant int_arr_t[] := array[v1, v1, v1, null::int_arr_t]; + + b1 constant boolean not null := v1 = all (arr1); + b2 constant boolean not null := not v1 = all (arr2); + b3 constant boolean not null := v1 = any (arr2); + b4 constant boolean not null := not v5 = any (arr2); + + b5 constant boolean not null := v1 = any (arr3); + b6 constant boolean not null := (v5 = any (arr3)) is null; + b7 constant boolean not null := (v1 = all (arr3)) is null; +begin + assert + (b1 and b2 and b3 and b4 and b5 and b6 and b7), + 'assert failed'; +end; +$body$; +``` + +### Test to show that an array's _elements_ are all the same as each other + +This demonstration shows you that you can test whether an array's _elements_ are all the same as each other without needing to know their common value or anything about the array's dimensionality. + +```plpgsql +do $body$ +declare + arr constant int[] not null := ' + [2:3][4:6][7:10]={ + { + {42,42,42,42},{42,42,42,42},{42,42,42,42} + }, + { + {42,42,42,42},{42,42,42,42},{42,42,42,42} + } + }'::int[]; + + val constant int not null := ( + select unnest(arr) limit 1); + + b99 boolean not null := val = all (arr); +begin + assert b99, 'assert failed'; +end; +$body$; +``` +The general syntax for the literal for a multidimensional array that specifies the lower and upper index bounds along each dimension is described in [Multidimensional array of `int` values](../../literals/array-of-primitive-values/#multidimensional-array-of-int-values). + +For the specification of the behavior `unnest()` when its actual argument is a multidimensional array see [Multidimensional array_agg() and unnest() — first overloads](../array-agg-unnest/#multidimensional-array-agg-and-unnest-first-overloads). + +### Using inequality comparisons + +```plpgsql +do $body$ +declare + v1 constant int := 1; + v2 constant int := 2; + v3 constant int := 3; + v4 constant int := 4; + v5 constant int := 5; + + arr1 constant int[] := array[v1, v2, v3, v4]; + + b01 constant boolean not null := not(v5 = any (arr1)); + b02 constant boolean not null := v5 <> all (arr1); + b03 constant boolean not null := v2 > any (arr1); + b04 constant boolean not null := not(v2 > all (arr1)); + b05 constant boolean not null := not(v4 < any (arr1)); + b06 constant boolean not null := v2 < any (arr1); + b07 constant boolean not null := not(v5 <= any (arr1)); + b08 constant boolean not null := v4 >= all (arr1); + b09 constant boolean not null := v5 > all (arr1); + +begin + assert + (b01 and b02 and b03 and b04 and b05 and b06 and b07 and b08 and b09), + 'assert failed'; +end; +$body$; +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-agg-unnest.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-agg-unnest.md new file mode 100644 index 000000000000..72b5c43301b7 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-agg-unnest.md @@ -0,0 +1,1033 @@ +--- +title: array_agg(), unnest(), and generate_subscripts() +linkTitle: array_agg(), unnest(), generate_subscripts() +headerTitle: array_agg(), unnest(), and generate_subscripts() +description: array_agg(), unnest(), and generate_subscripts() +menu: + preview_api: + identifier: array-agg-unnest + parent: array-functions-operators +type: docs +--- + +For one-dimensional arrays, but _only for these_ (see [Multidimensional `array_agg()` and `unnest()`](./#multidimensional-array-agg-and-unnest-first-overloads)), these two functions have mutually complementary effects in the following sense. After this sequence (the notation is informal): + +```output +array_agg of "SETOF tuples #1" => "result array" +unnest of "result array" => "SETOF tuples #3" +``` + +The _"SETOF tuples #3"_ has identical shape and content to that of "_SETOF tuples #1"_. And the data type of _"result array"_ is an array of the data type of the tuples. + +Moreover, and again for the special case of one-dimensional arrays, the function `generate_subscripts()` can be used to produce the same result as `unnest()`. + +For this reason, the three functions, `array_agg()`, `unnest()`, and `generate_subscripts()` are described in the same section. + +## array_agg() + +This function has two overloads. + +### array_agg() — first overload + +**Purpose:** Return a one-dimensional array from a SQL subquery. Its rows might be scalars (that is, the `SELECT` list might be a single column). But, in typical use, they are likely to be of _"row"_ type values. + +**Signature:** + +```output +input value: SETOF anyelement +return value: anyarray +``` + +In normal use, `array_agg()` is applied to the `SELECT` list from a physical table, or maybe from a view that encapsulates the query. This is shown in the _"[Realistic use case](./#realistic-use-case)"_ example below. But first, you can demonstrate the functionality without creating and populating a table by using, instead, the `VALUES` statement. Try this: + +```plpgsql +values + (1::int, 'dog'::text), + (2::int, 'cat'::text), + (3::int, 'ant'::text); +``` + +It produces this result: + +```output + column1 | column2 +---------+--------- + 1 | dog + 2 | cat + 3 | ant +``` + +Notice that YSQL has named the `SELECT` list items _"column1"_ and _"column2_". The result is a so-called `SETOF`. It means a set of rows, just as is produced by a `SELECT` statement. (You'll see the term if you describe the `generate_series()` built-in table function with the `\df` meta-command.) To use the rows that the `VALUES` statement produces as the input for `array_agg()`, you need to use a named `type`, thus: + +```plpgsql +create type rt as (f1 int, f2 text); + +with tab as ( + values + (1::int, 'dog'::text), + (2::int, 'cat'::text), + (3::int, 'ant'::text)) +select array_agg((column1, column2)::rt order by column1) as arr +from tab; +``` + +It produces this result: + +```output + arr +--------------------------------- + {"(1,dog)","(2,cat)","(3,ant)"} +``` + +You recognize this as the text of the literal that represents an array of tuples that are shape-compatible with _"type rt"_. The underlying notions that explain what is seen here are explained in [The non-lossy round trip: value to `text` typecast and back to value](../../literals/text-typecasting-and-literals/#the-non-lossy-round-trip-value-to-text-typecast-and-back-to-value). + +Recall from [`array[]` constructor](../../array-constructor/) that this value doesn't encode the type name. In fact, you could typecast it to any shape compatible type. + +You can understand the effect of `array_agg()` thus: + +- Treat each row as a _"rt[]"_ array with a single-value. +- Concatenate (see the [`||` operator](../concatenation/#the-160-160-160-160-operator)) the values from all the rows in the specified order into a new _"rt[]"_ array. + +This code illustrates this point: + +```plpgsql +-- Consider this SELECT: +with tab as ( + values + ((1, 'dog')::rt), + ((2, 'cat')::rt), + ((3, 'ant')::rt)) +select array_agg(column1 order by column1) as arr +from tab; + +-- It can be seen as equivalent this SELECT: +select + array[(1, 'dog')::rt] || + array[(2, 'cat')::rt] || + array[(3, 'ant')::rt] +as arr; +``` + +Each of the three _"select... as arr"_ queries above produces the same result, as was shown after the first of them. This demonstrates their semantic equivalence. + +To prepare for the demonstration of `unnest()`, save the single-valued result from the most recent of the three queries (but any one of them would do) into a `ysqlsh` variable by using the `\gset` meta-command. This takes a single argument, conventionally spelled with a trailing underscore (for example, _"result_"_) and re-runs the `SELECT` statement that, as the last submitted `ysqlsh` command, is still in the command buffer. (If the `SELECT` doesn't return a single row, then you get a clear error.) In general, when the `SELECT` list has _N_ members, called _"c1"_ through _"cN"_, each of these values is stored in automatically-created variables called _"result_c1"_ through _"result_cN"_. + +if you aren't already familiar with the `\gset` meta-command, you can read a brief account of how it works in [Meta-commands](../../../../../ysqlsh-meta-commands/) within the major section on `ysqlsh`. + +Immediately after running the _"with... select array_agg(...) as arr..."_ query above, do this: + +```plpgsql +\gset result_ +\echo :result_arr +``` + +The `\gset` meta-command is silent. The `\echo` meta-command shows this: + +```output +{"(1,dog)","(2,cat)","(3,ant)"} +``` + +The text of the literal is now available for re-use, as was intended. + +Before considering `unnest()`, look at `array_agg()`'s second overload: + +### array_agg() — second overload + +**Purpose:** Return a (N+1)-dimensional array from a SQL subquery whose rows are N-dimensional arrays. The aggregated arrays must all have the same dimensionality. + +**Signature:** + +```output +input value: SETOF anyarray +return value: anyarray +``` + +Here is a positive example: + +```plpgsql +with tab as ( + values + ('{a, b, c}'::text[]), + ('{d, e, f}'::text[])) +select array_agg((column1)::text[] order by column1) as arr +from tab; +``` + +It produces this result: + +```output + arr +------------------- + {{a,b,c},{d,e,f}} +``` + +And here is a negative example: + +```plpgsql +with tab as ( + values + ('{a, b, c}'::text[]), + ('{d, e }'::text[])) +select array_agg((column1)::text[] order by column1) as arr +from tab; +``` + +It causes this error: + +```output +2202E: cannot accumulate arrays of different dimensionality +``` + +## unnest() + +This function has two overloads. The first is straightforward and has an obvious usefulness. The second is rather exotic. + +### unnest() — simple overload + +**Purpose:** Transform the values in a single array into a SQL table (that is, a `SETOF`) these values. + +**Signature:** + +```output +input value: anyarray +return value: SETOF anyelement +``` + +As the sketch at the start of this page indicated, the input to unnest is an array. To use what the code example in the account of array_agg() set in the `ysqlsh` variable _"result_arr"_ in a SQL statement, you must quote it and typecast it to _"rt[]"_. This can be done with the \set meta-command, thus: + +```plpgsql +\set unnest_arg '\'':result_arr'\'::rt[]' +\echo :unnest_arg +``` + +The `\set` meta-command uses the backslash character to escape the single quote character that it also uses to surround the string that it assigns to the target `ysqlsh` variable. The `\echo` meta-command shows this: + +```output +'{"(1,dog)","(2,cat)","(3,ant)"}'::rt[] +``` + +Now use it as the actual argument for `unnest()` thus: + +```plpgsql +with + rows as ( + select unnest(:unnest_arg) as rec) +select + (rec).f1, + (rec).f2 +from rows +order by 1; +``` + +The parentheses around the column alias _"rec"_ are required to remove what the SQL compiler would otherwise see as an ambiguity, and would report as a _"42P01 undefined_table"_ error. This is the result: + +```output + f1 | f2 +---+----- + 1 | dog + 2 | cat + 3 | ant +``` + +As promised, the original `SETOF` tuples has been recovered. + +### unnest() — exotic overload + +**Purpose:** Transform the values in a variadic list of arrays into a SQL table whose columns each are a `SETOF` the corresponding input array's values. This overload can be used only in the `FROM` clause of a subquery. Each input array might have a different type and a different cardinality. The input array with the greatest cardinality determines the number of output rows. The rows of those input arrays that have smaller cardinalities are filled at the end with `NULL`s. The optional `WITH ORDINALITY` clause adds a column that numbers the rows. + +**Signature:** + +```output +input value: anyarray +return value: many coordinated columns of SETOF anyelement +``` + +```plpgsql +create type rt as (a int, b text); + +\pset null '' +select * +from unnest( + array[1, 2], + array[10, 20, 30, 45, 50], + array['a', 'b', 'c', 'd'], + array[(1, 'p')::rt, (2, 'q')::rt, (3, 'r')::rt, (4, 's')::rt] +) +with ordinality +as result(arr1, arr2, arr3, arr4_a, arr4_n, n); +``` + +It produces this result: + +```output + arr1 | arr2 | arr3 | arr4_a | arr4_n | n +-----------+------+-----------+-----------+-----------+--- + 1 | 10 | a | 1 | p | 1 + 2 | 20 | b | 2 | q | 2 + | 30 | c | 3 | r | 3 + | 45 | d | 4 | s | 4 + | 50 | | | | 5 +``` + +## Multidimensional array_agg() and unnest() — first overloads + +Start by aggregating three `int[]` array instances and by preparing the result as an `int[]` literal for the next step using the same `\gset` technique that was used above: + +```plpgsql +with tab as ( + values + ('{1, 2, 3}'::int[]), + ('{4, 5, 6}'::int[]), + ('{7, 8, 9}'::int[])) +select array_agg(column1 order by column1) as arr +from tab + +\gset result_ +\set unnest_arg '\'':result_arr'\'::int[]' +\echo :unnest_arg +``` + +Notice that the SQL statement, this time, is _not_ terminated with a semicolon. Rather, the `\gset` meta-command acts as the terminator. This makes the `ysqlsh` output less noisy. This is the result: + +```output +'{{1,2,3},{4,5,6},{7,8,9}}'::int[] +``` + +You recognize this as the literal for a two-dimensional array. Now use this as the actual argument for `unnest()`: + +```plpgsql +select unnest(:unnest_arg) as val +order by 1; +``` + +It produces this result: + +```output + val +----- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +``` + +This `SETOF` result lists all of the input array's "leaf" values in row-major order. This term is explained in [Joint semantics](../properties/#joint-semantics)) within the _"Functions for reporting the geometric properties of an array"_ section. + +Notice that, for the multidimensional case, the original input to `array_agg()` was _not_, therefore, regained. This point is emphasized by aggregating the result: + +```plpgsql +with a as + (select unnest(:unnest_arg) as val) +select array_agg(val order by val) from a; +``` + +It produces this result: + +```output + array_agg +--------------------- + {1,2,3,4,5,6,7,8,9} +``` + +You started with a two-dimensional array. But now you have a one-dimensional array with the same values as the input array in the same row-major order. + +This result has the same semantic content that the `array_to_string()` function produces: + +```plpgsql +select array_to_string(:unnest_arg, ','); +``` + +It produces this result: + +```output + array_to_string +------------------- + 1,2,3,4,5,6,7,8,9 +``` + +See [Looping through arrays in PL/pgSQL](../../looping-through-arrays/). This shows how you can use the `FOREACH` loop in procedural code, with an appropriate value for the `SLICE` operand, to unnest an array into a set of subarrays whose dimensionality you can choose. At one end of the range, you can mimmic `unnest()` and produce scalar values. At the other end of the range, you can produce a set of arrays with dimensionality `n - 1` where `n` is the dimensionality of the input array. + +## Realistic use case + +The basic illustration of the functionality of `array_agg()` showed how it can convert the entire contents of a table (or, by extension, the `SETOF` rows defined by a `SELECT` execution) into a single array value. This can be useful to return a large `SELECT` result in its entirety (in other words, in a single round trip) to a client program. + +Another use is to populate a single newly-created _"masters_with_details"_ table from the fully projected and unrestricted `INNER JOIN` of a classic _"masters"_ and _"details"_ pair of tables. The new table has all the columns that the source _"masters"_ table has and all of its rows. And it has an additional _"details"_ column that holds, for each _"masters"_ row, a _"details_t[]"_ array that represents all of the child rows that it has in the source _"details"_ table. The type _"details_t"_ has all of the columns of the _"details"_ table except the _"details.masters_pk"_ foreign key column. This column vanishes because, as the _join_ column, it vanishes in the `INNER JOIN`. The _"details"_ table's "payload" is now held in place in a single multivalued field in the new _"masters_with_details"_ table. + +Start by creating and populating the _"masters"_ and _"details"_ tables: + +```plpgsql +create table masters( + master_pk int primary key, + master_name text not null); + +insert into masters(master_pk, master_name) +values + (1, 'John'), + (2, 'Mary'), + (3, 'Joze'); + +create table details( + master_pk int not null, + seq int not null, + detail_name text not null, + + constraint details_pk primary key(master_pk, seq), + + constraint master_pk_fk foreign key(master_pk) + references masters(master_pk) + match full + on delete cascade + on update restrict); + +insert into details(master_pk, seq, detail_name) +values + (1, 1, 'cat'), (1, 2, 'dog'), + (2, 1, 'rabbit'), (2, 2, 'hare'), (2, 3, 'squirrel'), (2, 4, 'horse'), + (3, 1, 'swan'), (3, 2, 'duck'), (3, 3, 'turkey'); +``` + +Next, create a view that encodes the fully projected, unrestricted _inner join_ of the original data, and inspect the result set that it represents: + +```plpgsql +create or replace view original_data as +select + master_pk, + m.master_name, + d.seq, + d.detail_name +from masters m inner join details d using (master_pk); + +select + master_pk, + master_name, + seq, + detail_name +from original_data +order by +master_pk, seq; +``` + +This is the result: + +```output + master_pk | master_name | seq | detail_name +-----------+-------------+-----+------------- + 1 | John | 1 | cat + 1 | John | 2 | dog + 2 | Mary | 1 | rabbit + 2 | Mary | 2 | hare + 2 | Mary | 3 | squirrel + 2 | Mary | 4 | horse + 3 | Joze | 1 | swan + 3 | Joze | 2 | duck + 3 | Joze | 3 | turkey +``` + +Next, create the type _"details_t"_ and the new table: + +```plpgsql +create type details_t as (seq int, detail_name text); + +create table masters_with_details ( + master_pk int primary key, + master_name text not null, + details details_t[] not null); +``` + +Notice that you made the _"details"_ column `not null`. This was a choice. It adds semantics that are notoriously difficult to capture in the original two table design without tricky, and therefore error-prone, programming of triggers and the like. You have implemented the so-called _"mandatory one-to-many"_ rule. In the present example, the rule says (in the context of the entity-relationship model that specifies the requirements) that an occurrence of a _"Master"_ entity type cannot exist unless it has at least one, but possibly many, child occurrences of a _"Detail"_ entity type. + +Next, populate the new table and inspect its contents: + +```plpgsql +insert into masters_with_details +select + master_pk, + master_name, + array_agg((seq, detail_name)::details_t order by seq) as agg +from original_data +group by master_pk, master_name; + +select master_pk, master_name, details +from masters_with_details +order by 1; +``` + +This is the result: + +```output + master_pk | master_name | details +-----------+-------------+------------------------------------------------------ + 1 | John | {"(1,cat)","(2,dog)"} + 2 | Mary | {"(1,rabbit)","(2,hare)","(3,squirrel)","(4,horse)"} + 3 | Joze | {"(1,swan)","(2,duck)","(3,turkey)"} +``` + +Here's a helper function to show the primitive values that the _"details_t[]"_ array encodes without the clutter of the array literal syntax: + +```plpgsql +create function pretty_details(arr in details_t[]) + returns text + language plpgsql +as $body$ +declare + arr_type constant text := pg_typeof(arr); + ndims constant int := array_ndims(arr); + lb constant int := array_lower(arr, 1); + ub constant int := array_upper(arr, 1); +begin + assert arr_type = 'details_t[]', 'assert failed: ndims = %', arr_type; + assert ndims = 1, 'assert failed: ndims = %', ndims; + declare + line text not null := + rpad(arr[lb].seq::text||': '||arr[lb].detail_name::text, 12)|| + ' | '; + begin + for j in (lb + 1)..ub loop + line := line|| + rpad(arr[j].seq::text||': '||arr[j].detail_name::text, 12)|| + ' | '; + end loop; + return line; + end; +end; +$body$; +``` + +Notice that this is not a general purpose function. Rather, it expects that the input is a _"details_t[]"_ array. So it first checks that this pre-condition is met. It then discovers the lower and upper bounds of the array so that it can loop over its values. It uses these functions for reporting the geometric properties of the input array: [`array_ndims()`](../properties/#array-ndims); [`array_lower()`](../properties/#array-lower); and [`array_upper()`](../properties/#array-upper). + +Invoke it like this: + +```plpgsql +select master_pk, master_name, pretty_details(details) +from masters_with_details +order by 1; +``` + +It produces this result: + +```output + master_pk | master_name | pretty_details +-----------+-------------+-------------------------------------------------------------- + 1 | John | 1: cat | 2: dog | + 2 | Mary | 1: rabbit | 2: hare | 3: squirrel | 4: horse | + 3 | Joze | 1: swan | 2: duck | 3: turkey | +``` + +Next, create a view that uses `unnest()` to re-create the effect of the fully projected, unrestricted _inner join_ of the original data, and inspect the result set that it represents: + +```plpgsql +create or replace view new_data as +with v as ( + select + master_pk, + master_name, + unnest(details) as details + from masters_with_details) +select + master_pk, + master_name, + (details).seq, + (details).detail_name +from v; + +select + master_pk, + master_name, + seq, + detail_name +from new_data +order by +master_pk, seq; +``` + +The result is identical to what the _"original_data"_ view represents. But rather than relying on visual inspection, can check that the _"new_data"_ view and the _"original_data"_ view represent the identical result by using SQL thus: + +```plpgsql +with + original_except_new as ( + select master_pk, master_name, seq, detail_name + from original_data + except + select master_pk, master_name, seq, detail_name + from new_data), + + new_except_original as ( + select master_pk, master_name, seq, detail_name + from new_data + except + select master_pk, master_name, seq, detail_name + from original_data), + + original_except_new_union_new_except_original as ( + select master_pk, master_name, seq, detail_name + from original_except_new + union + select master_pk, master_name, seq, detail_name + from new_except_original) + +select + case count(*) + when 0 then '"new_data" is identical to "original_data."' + else '"new_data" differs from "original_data".' + end as result +from original_except_new_union_new_except_original; +``` + +This is the result: + +```output + result +--------------------------------------------- + "new_data" is identical to "original_data." +``` + +Notice that if you choose the _"masters_with_details"_ approach (either as a migration from a two-table approach in an extant application, or as an initial choice in a new application) you must appreciate the trade-offs. + +**Prerequisite:** + +- You must be confident that the _"details"_ rows are genuinely private each to its own master and do not implement a many-to-many relationship in the way that the _"order_items"_ table does between the _"customers"_ table and the _"items"_ table in the classic sales order entry model that is frequently used to teach table design according to the relational model. + +**Pros:** + +- You can enforce the mandatory one-to-many requirement declaratively and effortlessly. +- Changing and querying the data will be faster because you use single table, single-row access rather than two-table, multi-row access. +- You can trivially recapture the query functionality of the two-table approach by implementing a _"new_data"_ unnesting view as has been shown. So you can still find, for example, rows in the _"masters_with_details"_ table where the _"details"_ array has the specified values like this: + + ```plpgsql + with v as ( + select master_pk, master_name, seq, detail_name + from new_data + where detail_name in ('rabbit', 'horse', 'duck', 'turkey')) + select + master_pk, + master_name, + array_agg((seq, detail_name)::details_t order by seq) as agg + from v + group by master_pk, master_name + order by 1; + ``` + + This is the result: + + ```output + master_pk | master_name | agg + -----------+-------------+---------------------------- + 2 | Mary | {"(1,rabbit)","(4,horse)"} + 3 | Joze | {"(2,duck)","(3,turkey)"} + ``` + +**Cons:** +- Changing the data in the _"details"_ array is rather difficult. Try this (in the two-table regime): + + ```plpgsql + update details + set detail_name = 'bobcat' + where master_pk = 2 + and detail_name = 'squirrel'; + + select + master_pk, + master_name, + seq, + detail_name + from original_data + where master_pk = 2 + order by + master_pk, seq; + ``` + + This is the result: + + ```output + master_pk | master_name | seq | detail_name + -----------+-------------+-----+------------- + 2 | Mary | 1 | rabbit + 2 | Mary | 2 | hare + 2 | Mary | 3 | bobcat + 2 | Mary | 4 | horse + ``` + +- Here's how you achieve the same effect, and check that it worked as intended, in the new regime. Notice that you need to know the value of _"seq"_ for the _"rt"_ object that has the _"detail_name"_ value of interest. This can be done by implementing a dedicated PL/pgSQL function that encapsulates `array_replace()` or that replaces a value directly by addressing it using its index. But it's hard to do without that. (These methods are described in [`array_replace()` and setting an array value explicitly](../../functions-operators/replace-a-value/).) + + ```plpgsql + update masters_with_details + set details = array_replace(details, '(3,squirrel)', '(3,bobcat)') + where master_pk = 2; + + select + master_pk, + master_name, + seq, + detail_name + from new_data + where master_pk = 2 + order by + master_pk, seq; + ``` + + The result is identical to the result shown for querying _"original_data"_ above. + +- Implementing the requirement that the values of _"detail_name"_ must be unique for a given _"masters"_ row is trivial in the old regime: + + ```plpgsql + create unique index on details(master_pk, detail_name); + ``` + +To achieve the effect in the new regime, you'd need to write a PL/pgSQL function, with return type `boolean` that scans the values in the _"details"_ array and returns `TRUE` when there are no duplicates among the values of the _"detail_name"_ field and that otherwise returns `FALSE`. Then you'd use this function as the basis for a check constraint in the definition of the _"details_with_masters"_ table. This is a straightforward programming task, but it does take more effort than the declarative implementation of the business rule that the two-table regime allows. + +## generate_subscripts() + +**Purpose:** Return the index values, along the specified dimension, of an array as a SQL table (that is, a `SETOF`) these `int` values.. + +**Signature:** + +```output +input value: anyarray, integer, boolean +return value: SETOF integer +``` + +### Semantics + +The second input parameter specifies the dimension along which the index values should be generated. The third, optional, input parameter controls the ordering of the values. The default value `FALSE` means generate the index values in ascending order from the lower index bound to the upper index bound; and the value `TRUE` means generate the index values in descending order from the upper index bound to the lower index bound. + +It's useful to use the same array in each of several examples. Make it available thus: + +```plpgsql +drop function if exists arr() cascade; +create function arr() + returns int[] + language sql +as $body$ + select array[17, 42, 53, 67]::int[]; +$body$; +``` + +Now demonstrate the basic behavior _generate_subscripts():_ + +```plpgsql +select generate_subscripts(arr(), 1) as subscripts; +``` + +This is the result: + +```output + subscripts +------------ + 1 + 2 + 3 + 4 +``` + +Asks for the subscripts to be generated in reverse order. + +```plpgsql +select generate_subscripts(arr(), 1, true) as subscripts; +``` + +This is the result: + +```output + subscripts +------------ + 4 + 3 + 2 + 1 +``` + +`generate_series()` can be use to produce the same result as `generate_subscripts()`. Notice that `generate_series()` doesn't have a _"reverse"_ option. This means that, especially when you want the results in reverse order, the syntax is significantly more cumbersome, as this example shows: + +```plpgsql +select array_upper(arr(), 1) + 1 - generate_series( + array_lower(arr(), 1), + array_upper(arr(), 1) + ) +as subscripts; +``` + +The following example creates a procedure that compares the results of `generate_subscripts()` and `generate_series()`, when the latter is invoked in a way that will produce the same results as the former. The procedure's input parameter lets you specify along which dimension you want to generate the index values. To emphasize how much easier it is to write the `generate_subscripts()` invocation, the test uses the reverse index order option. The array is constructed using the array literal notation (see [Multidimensional array of `int` values](../../literals/array-of-primitive-values/#multidimensional-array-of-int-values)) that explicitly sets the lower index bound along each of the array's three dimensions. [`array_agg()`](./#array-agg-first-overload) is used to aggregate the results from each approach so that they can be compared simply by using the [`=` operator](../comparison/#the-160-160-160-160-and-160-160-160-160-operators). + +```plpgsql +create or replace procedure p(dim in int) + language plpgsql +as $body$ +declare + arr constant int[] not null := ' + [2:3][4:6][7:10]={ + { + { 1, 2, 3, 4},{ 5, 6, 7, 8},{ 9,10,11,12} + }, + { + {13,14,15,16},{17,18,19,20},{21,22,23,24} + } + }'::int[]; + + subscripts_1 constant int[] := ( + with v as ( + select generate_subscripts(arr, dim) as s) + select array_agg(s) from v + ); + + lb constant int := array_lower(arr, dim); + ub constant int := array_upper(arr, dim); + subscripts_2 constant int[] := ( + with v as ( + select generate_series(lb, ub) as s) + select array_agg(s) from v + ); + +begin + assert + subscripts_1 = subscripts_2, + 'assert failed'; +end; +$body$; + +do $body$ +begin + call p(1); + call p(2); + call p(3); +end; +$body$; +``` + +Each of the calls finishes silently, showing that the _asserts_ hold. + +### The g(i) table(column) aliasing locution + +Both of the built-ins, `generate_series()` and `generate_subscripts()` are table functions. For this reason, they are amenable to this aliasing locution: + +```plpgsql +select my_table_alias.my_column_alias +from generate_series(1, 3) as my_table_alias(my_column_alias); +``` + +This is the result: + +```output + my_column_alias +----------------- + 1 + 2 + 3 +``` + +The convention among PostgreSQL users is to use `g(i)` with these two built-ins, where _"g"_ stands for _"generate"_ and _"i"_ is the common favorite for a loop iterand in procedural programming. You are very likely, therefore, to see something like this: + +```plpgsql +select g.i +from generate_subscripts('[5:7]={17, 42, 53}'::int[], 1) as g(i); +``` + +with this result: + +```output + i +--- + 5 + 6 + 7 +``` + +This is useful because without the locution, the result of each of these table functions is anonymous. The more verbose alternative is to define the aliases in a `WITH` clause, as was done above: + +```plpgsql +with g(i) as ( + select generate_subscripts('[5:7]={17, 42, 53}'::int[], 1)) +select g.i from g; +``` + +### Some example uses + +The most obvious use is to tabulate the array values along side of the index values, using the immediately preceding example: + +```plpgsql +drop table if exists t cascade; +create table t(k text primary key, arr int[]); +insert into t(k, arr) values + ('Array One', '{17, 42, 53, 67}'), + ('Array Two', '[5:7]={19, 47, 59}'); + +select i, (select arr from t where k = 'Array One')[i] +from generate_subscripts((select arr from t where k = 'Array One'), 1) as g(i); +``` + +It produces this result: + +```output + i | arr +---+----- + 1 | 17 + 2 | 42 + 3 | 53 + 4 | 67 +``` + +Notice that this: + +```output +(select arr from t where k = 1)[i] +``` + +has the same effect as this: + +```output +(select arr[i] from t where k = 1) +``` + +It was written the first way to emphasize the annoying textual repetition of _"(select arr from t where k = 1)"_. +This highlights a critical difference between SQL and a procedural language like PL/pgSQL. The latter allows you so initialize a variable with an arbitrarily complex and verbose expression and then just to use the variable's name thereafter. But SQL has no such notion. + +Notice that the table _t_ has two rows. You can't generalize the SQL shown immediately above to list the indexes with their array values for both rows. This is where the _cross join lateral_ syntax comes to the rescue: + +```plpgsql +with + c(k, a, idx) as ( + select k, arr, indexes.idx + from t + cross join lateral + generate_subscripts(t.arr, 1) as indexes(idx)) +select k, idx, a[idx] +from c +order by k; +``` + +It produces this result: + +```output + k | idx | a +-----------+-----+---- + Array One | 1 | 17 + Array One | 2 | 42 + Array One | 3 | 53 + Array One | 4 | 67 + Array Two | 5 | 19 + Array Two | 6 | 47 + Array Two | 7 | 59 +``` + +Here is the PL/pgSQL re-write. + +```plpgsql +do $body$ +<>declare + arr constant int[] := (select arr from t where k = 'Array Two'); + i int; +begin + for b.i in ( + select g.i from generate_subscripts(arr, 1) as g(i)) + loop + raise info '% | % ', i, arr[i]; + end loop; +end b; +$body$; +``` + +The result (after manually stripping the "INFO:" prompts), is the same as the SQL approach that uses `generate_subscripts()` with _cross join lateral_, shown above, produces: + +```output + 5 | 19 + 6 | 47 + 7 | 59 +``` + +Notice that having made the transition to a procedural approach, there is no longer any need to use +`generate_subscripts()`. Rather, [`array_lower()`](../properties/#array-lower) and [`array_upper()`](../properties/#array-upper) can be used in the +ordinary way to set the bounds of the integer variant of a `FOR` loop: + +```plpgsql +do $body$ +declare + arr constant int[] := (select arr from t where k = 1); +begin + for i in reverse array_upper(arr, 1)..array_lower(arr, 1) loop + raise info '% | % ', i, arr[i]; + end loop; +end; +$body$; +``` + +It produces the same result. + +### Comparing the functionality brought by generate_subscripts() with that brought by unnest() + +Try these two examples: + +```plpgsql +with v as ( + select array[17, 42, 53]::int[] as arr) +select +(select arr[idx] from v) as val +from generate_subscripts((select arr from v), 1) as subscripts(idx); +``` + +and: + +```plpgsql +with v as ( + select array[17, 42, 53]::int[] as arr) +select unnest((select arr from v)) as val; +``` + +Each uses the same array, _"array[1, 2, 3]::int[]"_, and each produces the same result, thus: + +```output + val +----- + 17 + 42 + 53 +``` + +One-dimensional arrays are by far the most common use of the array data type. This is probably because a one-dimensional array of _"row"_ type values naturally models a schema-level table—albeit that an array brings an unavoidable ordering of elements while the rows in a schema-level table have no intrinsic order. In the same way, an array of scalar _elements_ models the values in a column of a schema-level table. Certainly, almost all the array examples in the [PostgreSQL 11.2 documentation](https://www.postgresql.org/docs/11/) use one-dimensional arrays. Further, it is common to want to present an array's _elements_ as a `SETOF` these values. For this use case, and as the two code examples above show, `unnest()` is simpler to use than `generate_subscripts()`. It is far less common to care about the actual dense sequence of index values that address an array's _elements_—for which purpose you would need `generate_subscripts()`. + +Moreover, `unnest()` (as has already been shown in this section) "flattens" an array of any dimensionality into the sequence of its _elements_ in row-major order— but `generate_subscripts()` brings no intrinsic functionality to do this. You can certainly achieve the result, as these two examples show for a two-dimensional array. + +Compare this: + +```plpgsql +select unnest('{{17, 42, 53},{57, 67, 73}}'::int[]) as element; +``` + +with this: + +```plpgsql +with + a as ( + select '{{17, 42, 53},{57, 67, 73}}'::int[] as arr), + s1 as ( + select generate_subscripts((select arr from a), 1) as i), + s2 as ( + select generate_subscripts((select arr from a), 2) as j) +select (select arr from a)[s1.i][s2.j] element +from s1,s2 +order by s1.i, s2.j; +``` + +Again, each uses the same array (this time _'{{17, 42, 53},{57, 67, 73}}'::int[]_) and each produces the same result, thus: + +```output + element +--------- + 17 + 42 + 53 + 57 + 67 + 73 +``` + +You could generalize this approach for an array of any dimensionality. However, the `generate_subscripts()` approach is more verbose, and therefore more error-prone, than the `unnest()` approach. However, because _"order by s1.i, s2.j"_ makes your ordering rule explicit, you could define any ordering that suited your purpose. + +### Comparing the functionality brought by generate_subscripts() with that brought by the FOREACH loop + +See [Looping through arrays in PL/pgSQL](../../looping-through-arrays/). + +The `FOREACH` loop brings dedicated syntax for looping over the contents of an array. The loop construct uses the `SLICE` keyword to specify the subset of the array's elements over which you want to iterate. Typically you specify that the iterand is an array with fewer dimensions than the array over which you iterate. Because this functionality is intrinsic to the `FOREACH` loop, and because it would be very hard to write the SQL statement that produces this kind of slicing, you should use the `FOREACH` loop when you have this kind of requirement. If you want to consume the output in a surrounding SQL statement, you can use `FOREACH` in a PL/pgSQL table function that returns a `SETOF` the sub-array that you need. You specify the `RETURNS` clause of such a table function using the `TABLE` keyword. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-fill.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-fill.md new file mode 100644 index 000000000000..819df336a3d7 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-fill.md @@ -0,0 +1,122 @@ +--- +title: array_fill() +linkTitle: array_fill() +headerTitle: array_fill() +description: array_fill() +menu: + preview_api: + identifier: array-fill + parent: array-functions-operators +type: docs +--- +**Signature:** +``` +input value: anyelement, int[] [, int[]] +return value: anyarray +``` +**Purpose:** Return a new "blank canvas" array of the specified shape with all cells set to the same specified value. + +- The first parameter determines the value and data type for every cell, and therefore the data type of the new array as a whole. It can be a value of a primitive data type, or, for example, a _"row"_ type value. It can also be written `NULL::some_type` if this suits your purpose. You would presumably set a `NOT NULL` value if, for example, you wanted to insert the array into a table column on which you have created a constraint, based upon a PL/pgSQL function, that explicitly tests the array's geometric properties and the `NOT NULL` status of each of its values. Try this: + ```plpgsql + select pg_typeof(array_fill(null::text, '{1}')) as "type of the new array"; + ``` +    This is the result: + ``` + type of the new array + ----------------------- + text[] + ``` +- The second parameter is an `int[]` array. Each of its values specifies the value that `array_length(new_arr, n)` returns—where _"n"_ is the dimension number, starting with the major dimension. So the cardinality of the array that you supply here specifies the value returned by `array_ndims(new_arr)`. +- The third parameter is optional. When supplied, it must be an `int[]` array with the same cardinality as the second parameter. Each of its values specifies the value that `array_lower(new_arr, n)` returns. + +The shape of the new array is, therefore, fully specified by the second and third parameters. + +**Note:** Why does `array_fill()` exist? In other words, why not just set the values that you want by directly indexing each cell and assigning the value you want to it? Recall that, as described in [Synopsis](../../#synopsis), an array value is rectilinear. This means that its shape, when its number of dimensions exceeds one, is non-negotiably fixed at creation time. This `DO` block emphasizes the point. + +```plpgsql +do $body$ +declare + a int[]; + b int[] := array_fill(null::int, '{3, 4}'); +begin + a[1][1] := 42; + begin + -- Causes ERROR: array subscript out of range + a[2][2] := 17; + exception + when array_subscript_error then null; + end; + raise info + 'cardinality(a), cardinality(b): %, %', cardinality(a), cardinality(b); +end; +$body$; +``` + +It shows this (after manually stripping the _"INFO:"_ prompt): + +``` +cardinality(a), cardinality(b): 1, 12 +``` + +So the array _"a"_ is stuck as one dimensional, one-by-one value. + +**Example:** + +Run this: +```plpgsql +create table t(k int primary key, arr text[]); + +insert into t(k, arr) +values(1, array_fill('-----'::text, '{3, 4}', '{2, 7}')::text[]); + +select + array_length(arr, 1) as len_1, + array_length(arr, 2) as len_2, + array_lower(arr, 1) as lb_1, + array_lower(arr, 2) as lb_2, + array_ndims(arr) as ndims, + cardinality(arr) as cardinality +from t +where k = 1; +``` +It shows this: +``` + len_1 | len_2 | lb_1 | lb_2 | ndims | cardinality +-------+-------+------+------+-------+------------- + 3 | 4 | 2 | 7 | 2 | 12 +``` + +Now run this: +```plpgsql +update t +set + arr[2][ 7] = '2---7', + arr[2][10] = '2--10', + arr[4][ 7] = '4---7', + arr[4][10] = '4--10' +where k = 1; + +select arr::text from t where k = 1; +``` +It shows this (after some manual white-space formatting for readability): +``` +[2:4][7:10]= + { + {2---7,-----,-----,2--10}, + {-----,-----,-----,-----}, + {4---7,-----,-----,4--10} + } +``` + +Finally, run this: +```plpgsql +\set VERBOSITY verbose +update t +set + arr[1][17] = 'Hmm...' +where k = 1; +``` +It reports this error, as expected: +``` +2202E: array subscript out of range +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-position.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-position.md new file mode 100644 index 000000000000..5a65825efcb0 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-position.md @@ -0,0 +1,129 @@ +--- +title: array_position() and array_positions() +linkTitle: array_position(), array_positions() +headerTitle: array_position() and array_positions() +description: array_position() and array_positions() +menu: + preview_api: + identifier: array-position + parent: array-functions-operators +type: docs +--- +These functions require that the to-be-searched array is one-dimensional. They return the index values of the specified to-be-searched-for value in the specified to-be-searched array. + +Create a function to return the test array: + +```plpgsql +drop function if exists arr() cascade; +create function arr() + returns text[] + language sql +as $body$ + select array[ + 'mon', 'tue', 'wed', 'thu','fri', 'sat', 'sun', + 'mon', 'tue', 'wed', 'thu','fri', 'sat', 'sun', + 'mon', 'tue', 'wed' + ]; +$body$; +``` + +List the elements of the _days_arr_ column (in the only row that the view produces) by their index number. Use the built-in function _[generate_subscripts()](../array-agg-unnest/#generate-subscripts)_. + +```plpgsql +with c(days, pos) as ( + select a, subscripts.pos + from arr() as a + cross join lateral + generate_subscripts(arr(), 1) as subscripts(pos)) +select pos, days[pos] as day from c order by pos; +``` + +This is the result: + +```output + pos | day +-----+----- + 1 | mon + 2 | tue + 3 | wed + 4 | thu + 5 | fri + 6 | sat + 7 | sun + 8 | mon + 9 | tue + 10 | wed + 11 | thu + 12 | fri + 13 | sat + 14 | sun + 15 | mon + 16 | tue + 17 | wed +``` + +The examples below use the _arr()_ function value. + +## array_position() + +**Purpose:** Return the index, in the supplied array, of the specified value. Optionally, starts searching at the specified index. + +**Signature:** + +```output +input value: anyarray, anyelement [, int] +return value: int +``` +**Note:** The optional third parameter specifies the _inclusive_ index value at which to start the search. + +**Example:** + +```plpgsql +select array_position( + arr(), -- The to-be-searched array. + 'tue'::text, -- The to-be-searched-for value. + 3::int -- The (inclusive) position + -- at which to start searching. [optional] + ) as position; +``` + +This is the result: + +```output + position +---------- + 9 +``` + +## array_positions() + +**Purpose:** Return the indexes, in the supplied array, of all occurrences of the specified value. + +**Signature:** + +```output +input value: anyarray, anyelement +return value: integer[] +``` +**Example:** + +This example uses the _[unnest()](../array-agg-unnest/#unnest)_ built-in function to present the elements of the array that _array_positions()_ returns as a table: + +```plpgsql +select unnest( + array_positions( + arr(), -- The to-be-searched array. + 'tue'::text -- The to-be-searched-for value. + ) + ) as position; +``` + +This is the result: + +```output + position +---------- + 2 + 9 + 16 +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-remove.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-remove.md new file mode 100644 index 000000000000..f5471c872f73 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-remove.md @@ -0,0 +1,45 @@ +--- +title: array_remove() +linkTitle: array_remove() +headerTitle: array_remove() +description: array_remove() +menu: + preview_api: + identifier: array-remove + parent: array-functions-operators +type: docs +--- + +**Purpose:** Return a new array where _every_ occurrence of the specified value has been removed from the specified input array. + +**Signature:** +``` +input value: anyarray, anyelement +return value: anyarray +``` +**Note:** This function requires the array from which values are to be removed is one-dimensional. This restriction is understood in light of the fact that arrays are rectilinear—in other words, the geometry of an array whose dimensionality is two or more is fixed at creation time. For examples illustrating this rule, see [array_fill()](../array-fill). + +**Example:** +```plpgsql +create table t(k int primary key, arr int[]); +insert into t(k, arr) +values (1, '{1, 2, 2, 2, 5, 6}'::int[]); + +select arr as "old value of arr" from t where k = 1; + +update t +set arr = array_remove(arr, 2) +where k = 1; + +select arr as "new value of arr" from t where k = 1; +``` +This is the result of the two queries: +``` + old value of arr +------------------ + {1,2,2,2,5,6} + + new value of arr +------------------ + {1,5,6} +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-to-string.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-to-string.md new file mode 100644 index 000000000000..655aa9686814 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/array-to-string.md @@ -0,0 +1,77 @@ +--- +title: array_to_string() +linkTitle: array_to_string() +headerTitle: array_to_string() +description: array_to_string() +menu: + preview_api: + identifier: array-to-string + parent: array-functions-operators +type: docs +--- + +**Purpose:** Return a `text` value computed by representing each array value, traversing these in row-major order, by its `::text` typecast, using the supplied delimiter between each such representation. (The result, therefore, loses all information about the arrays geometric properties.) Optionally, represent `NULL` by the supplied `text` value. The term _"row-major order"_ is explained in [Joint semantics](../properties/#joint-semantics) within the section _"Functions for reporting the geometric properties of an array"_. + +**Signature:** +``` +input value: anyarray, text [, text] +return value: text +``` + +**Example:** +```plpgsql +create type rt as (f1 int, f2 text); +create table t(k int primary key, arr rt[]); +insert into t(k, arr) values(1, + array[ + array[ + array[(1, 'a')::rt, (2, null)::rt, null, (3, 'c')::rt] + ] + ]::rt[] +); + +select arr::text from t where k = 1; +``` +It shows this: +``` + arr +----------------------------------- + {{{"(1,a)","(2,)",NULL,"(3,c)"}}} +``` +To understand the syntax of the text of this literal, especially when a field is `NULL`, see [The literal for a _"row"_ type value](../../literals/row/). + +Now do this: +```plpgsql +select + array_to_string( + arr, -- the input array + ' | ') -- the delimiter +from t +where k = 1; +``` +It shows this: +``` + array_to_string +---------------------- + (1,a) | (2,) | (3,c) +``` +Notice that the third, `NULL`, array value is not represented. Rather, this implied by the _absence_ of any characters between the comma and the right parenthesis delimiters. + +Now do this; +```plpgsql +select + array_to_string( + arr, -- the input array + ' | ', -- the delimiter + '?') -- the null indicator +from t +where k = 1; +``` +It shows this: +``` + array_to_string +-------------------------- + (1,a) | (2,) | ? | (3,c) +``` + +The third array value is now represented by `?`. But the fact that _"f2"_ `IS NULL` within the second array value is _not_ represented by `?`. In other words, this technique for visualizing `NULL` is applied only at the granularity of top-level array values and not within such values when they are composite. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/comparison.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/comparison.md new file mode 100644 index 000000000000..c81bfd1f2edb --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/comparison.md @@ -0,0 +1,454 @@ +--- +title: Array comparison +linkTitle: Array comparison +headerTitle: Operators for comparing two arrays +description: Operators for comparing two arrays +aliases: + - /preview/compare/ysql/ +menu: + preview_api: + identifier: array-comparison + parent: array-functions-operators + weight: 20 +type: docs +--- + +## Comparison operators overview + +**Purpose:** Each of the comparison operators returns `TRUE` or `FALSE` according to the outcome of the particular comparison test between the input [LHS and RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) arrays. + +**Signature** + +These operators all have the same signature, thus: + +``` +input value: anyarray, anyarray +return value: boolean +``` + +**Note:** These operators require that the LHS and RHS arrays have the same data type. (It's the same rule for the comparison of scalars.) However, they do _not_ require that the arrays have identical geometric properties. Rules are defined so that a difference between one or more of these properties does not mean that comparison is disallowed. Rather, the LHS array might be deemed to be less than, or greater than, the RHS array. It's essential, therefore, to understand the comparison algorithm. + +### Comparison criteria + +These are the unique characteristics of an array with respect to the algorithm that compares two array values: + +- the actual values, compared pairwise in row-major order +- the cardinality +- the number of dimensions +- the lower bound on each dimension. + +The term _"row-major order"_ is explained in [Joint semantics](../properties/#joint-semantics) within the section _"Functions for reporting the geometric properties of an array"_. + +The other geometric properties (the length and upper bound along each dimension) can be derived from the properties that the bullets list.. + +There is, of course, a well-defined priority among the comparisons. Briefly, value comparison is done first. Then, but only if no difference is detected, are the geometric properties compared. + +### Pairwise comparison of values + +The first comparison test scans the values in each of the LHS and RHS arrays in row-major order (see [Joint semantics](../properties/#joint-semantics)) and does a pairwise comparison. Notably, the comparison rule non-negotiably uses `IS NOT DISTINCT FROM` semantics. Moreover, when a `not null` array value is pairwise compared with a `NULL` value, the `not null` value is deemed to be _less than_ the `NULL` value. + +Notice the contrast with the `=` operator comparison rule for free-standing scalar values. This comparison uses `NULL` semantics but, of course, lets you use `IS NOT DISTINCT FROM` comparison if this better suits your purpose. + +Otherwise, the comparison rules are the same as those for scalar values and, by extension, with those for, for example, _"row"_ type values. + +If a pairwise comparison results in inequality, then the LHS and RHS arrays are immediately deemed to be unequal with no regard to the geometric properties. The outcome of the _first_ pairwise comparison, when these values differ, determines the outcome of the array comparison. Remaining pairwise comparisons are not considered. + +Notice that the two arrays might not have the same cardinality. If all possible pairwise comparisons result in equality, then the array with the greater cardinality is deemed to be greater than the other array, and the other geometric properties are not considered. + +### The priority of differences among the geometric properties + +The previous section stated the rule that the cardinality comparison has the highest priority among the geometric property comparisons. And that this rule kicks in only of all possible value comparisons result in equality. + +When _both_ all possible value comparisons _and_ the cardinality comparison result in equality, then the comparison between the number of dimensions has a higher priority than the comparison between the lower bound on each dimension. Of course, the array with the greater number of dimensions is deemed to be the greater array. + +This means that the lower bounds are significant when two arrays are compared _only_ when they are identical in pairwise value comparison, cardinality, and the number of dimensions. Then the array with the greater lower bound, in dimension order, is deemed to be the greater array. + +[Equality and inequality semantics](./#equality-and-inequality-semantics) demonstrates each of the rules that this _"Comparison operators overview"_ section has stated. + +## Containment and overlap operators overview + +These three operators are insensitive to the geometric properties of the two to-be-compared arrays. +- The two containment operators test if the distinct set of values in one array contains, or is contained by, the distinct set of values in the other array. +- The overlap operator tests if the distinct set of values in one array and the distinct set of values in the other array have at least one value in common. + +[Containment and overlap operators semantics](./#containment-and-overlap-operators-semantics) below demonstrates each of the rules that this section has stated. + +## Examples for each operator + +### The   =   and   <>   operators + +- The `=` operator returns `TRUE` if the LHS and RHS arrays are equal. +- The `<>` operator is the natural complement: it returns `TRUE` if the LHS and RHS arrays are not equal. + +```plpgsql +with + v as ( + select + (select array['a', 'b', null, 'd']::text[]) as a1, + (select '{a, b, null, d}'::text[]) as a2 + ) +select (a1 = a2)::text as "EQUALITY comparison result" +from v; +``` +This is the result: +``` + EQUALITY comparison result +---------------------------- + true +``` + +```plpgsql +with + v as ( + select + (select array['a', 'b', 'c', 'd']::text[]) as a1, + (select '{a, b, null, d}'::text[]) as a2 + ) +select (a1 <> a2)::text as "INEQUALITY comparison result" +from v; +``` +This is the result: +``` + INEQUALITY comparison result +------------------------------ + true +``` + +### The   >   and   >=   and   <=   and   <   and   <>   operators + +These four operators implement the familiar inequality comparisons. +- The `>` operator returns `TRUE` if the LHS array is greater than the RHS array. +- The `>=` operator returns `TRUE` if the LHS array is greater than or equal to the RHS array. +- The `<=` operator returns `TRUE` if the LHS array is less than or equal to the RHS array. +- The `<` operator returns `TRUE` if the LHS array is less than the RHS array. + +It's sufficient, therefore, to provide an example for just the `<` operator. +```plpgsql +with + v as ( + select + (select array['a', 'b', 'c', 'd']::text[]) as a1, + (select array['a', 'b', 'e', 'd']::text[]) as a2, + (select '{a, b, null, d}'::text[]) as a3 + ) +select + (a1 < a2)::text as "'LESS THAN' comparison result 1", + (a1 < a3)::text as "'LESS THAN' comparison result 2" +from v; +``` +This is the result: +``` + 'LESS THAN' comparison result 1 | 'LESS THAN' comparison result 2 +---------------------------------+--------------------------------- + true | true +``` + +### The   @>   and   <@   operators + +- The `@>` operator returns `TRUE` if the LHS array contains the RHS array—that is, if every distinct value in the RHS array is found among the LHS array's distinct values. +- The `<@` operator is the natural complement: it returns `TRUE` if every distinct value in the LHS array is found among the RHS array's distinct values. + +```plpgsql +with + v as ( + select + (select array['a', 'b', 'c', 'd']::text[]) as a1, + (select array['a', 'c' ]::text[]) as a2 + ) +select + (a1 @> a2)::text as "CONTAINS comparison result", + (a2 <@ a1)::text as "'IS CONTAINED BY' comparison result" +from v; +``` +This is the result: +``` + CONTAINS comparison result | 'IS CONTAINED BY' comparison result +----------------------------+------------------------------------- + true | true +``` + +### The   &&   operator + +The `&&` operator returns `TRUE` if the LHS and RHS arrays overlap—that is, if they have at least one value in common. The definition of this operator makes it insensitive to which of the two to-be-compared is used on the LHS and which is used on the RHS. + +```plpgsql +with + v as ( + select + (select array['a', 'b', 'c', 'd']::text[]) as a1, + (select array['d', 'e', 'f', 'g']::text[]) as a2 + ) +select + (a1 && a2)::text as "'a1 OVERLAPS a2' comparison result", + (a2 && a1)::text as "'a2 OVERLAPS a1' comparison result" +from v; +``` +This is the result: +``` + 'a1 OVERLAPS a2' comparison result | 'a2 OVERLAPS a1' comparison result +------------------------------------+------------------------------------ + true | true +``` + +## Equality and inequality semantics + +This section demonstrates each of the rules that [Comparison operators overview](./#comparison-operators-overview) above stated. + +```plpgsql +-- Any two arrays can be compared without error if they have the same data type. +do $body$ +begin + ------------------------------------------------------------------------------ + -- Illustrate "IS NOT DISTINCT FROM" semantics. + declare + v1 constant int := 1; + v2 constant int := 1; + n1 constant int := null; + n2 constant int := null; + begin + assert + (v1 = v2) and + (v1 is not distinct from v2) and + + ((n1 = n2) is null) and + (n1 is not distinct from n2), + 'unexpected'; + end; + + ------------------------------------------------------------------------------ + -- Basic demonstration of equaliy when the geom. properties of + -- the two arrays are identical. + -- Shows that pairwise comparison uses "IS NOT DISTINCT FROM" semantics and NOT + -- the conventional NULL semantics used when scalars are compared. + declare + a constant int[] := '{10, null, 30}'; + b constant int[] := '{10, null, 30}'; -- Identical to a. + begin + assert + (a = b), + '"a = b" assert failed'; + + -- Because of this, there's no need ever to write this. + assert + (a is not distinct from b), + '"a is not distinct from b" assert failed'; + end; + + ------------------------------------------------------------------------------ + -- Basic demonstration of inequality when the geometric properties of + -- the two arrays are identical. + -- When the first difference is encountered in row-major order, the comparison + -- is made. Other differences are irrelevant. + declare + a constant int[] := '{10, 20, 30}'; + b constant int[] := '{10, 19, 31}'; + begin + assert + (a <> b) and + (a > b) and + (a >= b) and + (b <= a) and + (b < a) , + '"a > b" assert failed'; + end; + + ------------------------------------------------------------------------------ + -- Demonstration of inequality when the geometric properties of + -- the two arrays are identical. + -- Here, the first pairwise difference is NOT NULL versus NULL. + declare + a constant int[] := '{10, 20, 30}'; + b constant int[] := '{10, null, 29}'; + begin + -- Bizarrely, a NOT NULL value counts as LESS THAN a NULL value in the + -- pairwise comparison. + assert + (a <> b) and + (a < b), + '"a < b" assert failed'; + + -- Again, because of this, there's no need ever to write this. + assert + (a is distinct from b) , + '"a is distinct from b" assert failed'; + end; + + ------------------------------------------------------------------------------ + -- Extreme demonstration of priority. + -- c has just a single value and d has several. + -- c has one dimension and d has two. + -- c's first lower bound is less than d's first lower. + -- d's second lower bound is greater than one, but is presumably irrelevant. + -- But c's first value is GREATER THAN d's first value, + -- scanning in row-major order. + -- + -- Pairwise value comparison has the hoghest priority. + -- therefore c is deemed to be GREATER THAN d. + + declare + c constant int[] := '{2}'; + + -- Notice that d's first value is at [2][3]. + d constant int[] := '[2:3][3:4]={{1, 2}, {3, 3}}'; + + begin + assert + cardinality(c) < cardinality(d), + '"cardinality(c) < cardinality(d)" assert failed'; + + assert + array_ndims(c) < array_ndims(d), + '"ndims(c) < ndims(d)" assert failed'; + assert + array_lower(c, 1) < array_lower(d, 1), + '"lower(c, 1) < lower(d, 1)" assert failed'; + + assert + c[1] > d[2][3], + '"c[1] > d[2][3]" assert failed'; + + assert + c > d, + '"c > d" assert failed'; + end; + + ------------------------------------------------------------------------------ + -- Pairwise comparison is equal are far as it is feasible. + -- e's ndims < f's. + -- e's lb-1 < f's. + -- BUT e's cardinality > f's. + -- Cardinality has highest priority among the geom. propoerties, + -- so e is deemed to be GREATER THAN f. + declare + e constant int[] := '{10, 20, 30, 40, 50, 60, 70}'; + f constant int[] := '[2:3][3:5]={{10, 20, 30}, {40, 50, 60}}'; + begin + assert + e[1] = f[2][3] and + e[2] = f[2][4] and + e[3] = f[2][5] and + e[4] = f[3][3] and + e[5] = f[3][4] and + e[6] = f[3][5] , + '"e-to-f" eqality test, as far as feasible, assert failed'; + + assert + array_ndims(e) < array_ndims(f), + '"ndims(e) < ndims(f)" assert failed'; + + assert + array_lower(e, 1) < array_lower(f, 1), + '"lower(e, 1) < lower(f, 1)" assert failed'; + + assert + cardinality(e) > cardinality(f), + '"cardinality(e) > cardinality(f)" assert failed'; + + assert + (e > f) , + 'e > f assert failed'; + end; + + ------------------------------------------------------------------------------ + -- g's cardinality = h's. + -- So pairwise comparison is feasible for all values, and is equal. + -- g's ndims > h's. + -- g's lb-1 < h's. + -- Ndims has higher priority among ndims and lower bounds, + -- so g is deemed to be GREATER THAN h. + declare + g constant int[] := '{{10, 20, 30}, {40, 50, 60}}'; + h constant int[] := '[2:7]={10, 20, 30, 40, 50, 60}'; + begin + assert + cardinality(g) = cardinality(h), + '"cardinality(g) = cardinality(h)" assert failed'; + + assert + g[1][1] = h[2] and + g[1][2] = h[3] and + g[1][3] = h[4] and + g[2][1] = h[5] and + g[2][2] = h[6] and + g[2][3] = h[7] , + '"g-to-h" eqality test assert failed'; + + assert + array_ndims(g) > array_ndims(h), + '"ndims(g) > ndims(h)" assert failed'; + + assert + array_lower(g, 1) < array_lower(h, 1), + '"lower(g, 1) < lower(h, 1)" assert failed'; + + assert + (g > h) , + '"g > h" assert failed'; + end; + + ------------------------------------------------------------------------------ + declare + i constant int[] := '[5:6][4:6]={{10, 20, 30}, {40, 50, 60}}'; + j constant int[] := '[3:4][6:8]={{10, 20, 30}, {40, 50, 60}}'; + begin + assert + cardinality(i) = cardinality(j), + '"cardinality(i) = cardinality(j)" assert failed'; + + assert + i[5][4] = j[3][6] and + i[5][5] = j[3][7] and + i[5][6] = j[3][8] and + i[6][4] = j[4][6] and + i[6][5] = j[4][7] and + i[6][6] = j[4][8] , + '"i-to-j" eqality test assert failed'; + + assert + array_ndims(i) = array_ndims(j), + '"ndims(i) = ndims(j)" assert failed'; + + assert + array_lower(i, 1) > array_lower(j, 1), + '"lower(i, 1) > lower(j, 1)" assert failed'; + + assert + (i > j) , + '"i > j" assert failed'; + end; + + ------------------------------------------------------------------------------ +end; +$body$; +``` + +## Containment and overlap operators semantics + +This section demonstrates each of the rules that [Containment and overlap operators overview](./#containment-and-overlap-operators-overview) stated. + +```plpgsql +-- Any two arrays can be compared without error if they have the same data type. +-- Insensitive to the geometric properties. +do $body$ +declare + a constant int[] := '[2:3][4:5]={{10, 20}, {30, 40}}'; + b constant int[] := '[5:6]={20, 30}'; + c constant int[] := '[6:9]={40, 50, 70, 70}'; + d constant int[] := '[2:4]={50, 60, 70}'; +begin + assert + -- Containment + (b @> b) and + (b <@ a) and + + -- Overlap. + -- The definition of the semantics makes the LHS, RHS order immaterial. + (a && c) and + (c && a) and + + -- a and d have NO values in common. + not (a && d), + 'unexpected'; +end; +$body$; +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/concatenation.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/concatenation.md new file mode 100644 index 000000000000..29e7e9b086d6 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/concatenation.md @@ -0,0 +1,231 @@ +--- +title: Array concatenation functions and operators +linkTitle: Array concatenation +headerTitle: Array concatenation functions and operators +description: Array concatenation functions and operators +menu: + preview_api: + identifier: array-concatenation + parent: array-functions-operators + weight: 40 +type: docs +--- + +The `||` operator implements, by itself, all of the functionality that each of the `array_cat()`, `array_append()`, and `array_prepend()` functions individually implement. Yugabyte recommends that you use the `||` operator and avoid the functions. They are documented here for completeness—especially in case you find them in inherited code. + +## The   ||   operator + +**Purpose:** Return the concatenation of any number of compatible `anyarray` and `anyelement` values. + +**Signature** + +``` +LHS and RHS input value: [anyarray | anyelement] [anyarray | anyelement]* +return value: anyarray +``` +**Note:** "_Compatible"_ is used here to denote two requirements: + +- The values within the array, or the value of the scalar, must be of the same data type, for example, an `int[]` array and an `int` scalar. +- The LHS and the RHS must be _dimensionally_ compatible. For example, you can produce a one-dimensional array: _either_ by concatenating two scalars; _or_ by concatenating a scalar and a one-dimensional array; _or_ by concatenating two one-dimensional arrays. This notion extends to multidimensional arrays. The next bullet gives the rules. +- When you concatenate two N-dimensional arrays, the lengths along the major (that is, the first dimension) may be different but the lengths along the other dimensions must be identical. And when (as the analogy of concatenating a one-dimensional array and a scalar) you concatenate an N-dimensional and an (N-1)-dimensional array, the lengths along the dimensions of the (N-1)-dimensional array must all be identical to the corresponding lengths along the dimensions that follow the major dimension in the N-dimensional array. + +These rules follow directly from the fact that arrays are rectilinear. For examples, see [|| operator semantics](./#operator-semantics) below. + +**Example:** + +```plpgsql +create table t(k int primary key, arr int[]); +insert into t(k, arr) +values (1, '{3, 4, 5}'::int[]); + +select arr as "old value of arr" from t where k = 1; + +update t +set arr = '{1, 2}'::int[]||arr||6::int +where k = 1; + +select arr as "new value of arr" from t where k = 1; +``` +It shows this: +``` + old value of arr +------------------ + {3,4,5} +``` +and then this: +``` + new value of arr +------------------ + {1,2,3,4,5,6} +``` + +## array_cat() + +**Purpose:** Return the concatenation of two compatible `anyarray` values. + +**Signature** +``` +input value: anyarray, anyarray +return value: anyarray +``` +**Note:** The `DO` block shows that the `||` operator is able to implement the full functionality of the `array_cat()` function. + +```plpgsql +do $body$ +declare + arr_1 constant int[] := '{1, 2, 3}'::int[]; + arr_2 constant int[] := '{4, 5, 6}'::int[]; + val constant int := 5; + workaround constant int[] := array[val]; +begin + assert + array_cat(arr_1, arr_2) = arr_1||arr_2 and + array_cat(arr_1, workaround) = arr_1||val , + 'unexpected'; +end; +$body$; +``` +## array_append() +**Purpose:** Return an array that results from appending a scalar value to (that is, _after_) an array value. + +**Signature** +``` +input value: anyarray, anyelement +return value: anyarray +``` +**Note:** The `DO` block shows that the `||` operator is able to implement the full functionality of the `array_append()` function. The values must be compatible. + +```plpgsql +do $body$ +declare + arr constant int[] := '{1, 2, 3, 4}'::int[]; + val constant int := 5; + workaround constant int[] := array[val]; +begin + assert + array_append(arr, val) = arr||val and + array_append(arr, val) = array_cat(arr, workaround) , + 'unexpected'; +end; +$body$; +``` +## array_prepend() +**Purpose:** Return an array that results from prepending a scalar value to (that is, _before_) an array value. + +**Signature** +``` +input value: anyelement, anyarray +return value: anyarray +``` +**Note:** The `DO` block shows that the `||` operator is able to implement the full functionality of the `array_prepend()` function. The values must be compatible. + +```plpgsql +do $body$ +declare + arr constant int[] := '{1, 2, 3, 4}'::int[]; + val constant int := 5; + workaround constant int[] := array[val]; +begin + assert + array_prepend(val, arr) = val||arr and + array_prepend(val, arr) = array_cat(workaround, arr) , + 'unexpected'; +end; +$body$; +``` +## Concatenation semantics + +**Semantics for one-arrays** + +```plpgsql +create type rt as (f1 int, f2 text); + +do $body$ +declare + arr constant rt[] := array[(3, 'c')::rt, (4, 'd')::rt, (5, 'e')::rt]; + + prepend_row constant rt := (0, 'z')::rt; + prepend_arr constant rt[] := array[(1, 'a')::rt, (2, 'b')::rt]; + append_row constant rt := (6, 'f')::rt; + + cat_result constant rt[] := prepend_row||prepend_arr||arr||append_row; + + expected_result constant rt[] := + array[(0, 'z')::rt, (1, 'a')::rt, (2, 'b')::rt, (3, 'c')::rt, + (4, 'd')::rt, (5, 'e')::rt, (6, 'f')::rt]; + +begin + assert + (cat_result = expected_result), + 'unexpected'; +end; +$body$; +``` + +**Semantics for multidimensional arrays** + +```plpgsql +do $body$ +declare + -- arr_1 and arr_2 are demensionally compatible. + -- Its's OK for array_length(*, 1) to differ. + -- But array_length(*, 1) must be the same. + arr_1 constant int[] := + array[ + array[11, 12, 13], + array[21, 22, 23] + ]; + + arr_2 constant int[] := + array[ + array[31, 32, 33], + array[41, 42, 43], + array[51, 52, 53] + ]; + + -- Notice that this is a 1-d array. + -- Its lenth is the same as that of arr_1 + -- along arr_1's SECOND dimension. + arr_3 constant int[] := array[31, 32, 33]; + + -- Notice that bad_arr is dimensionally INCOMPATIBLE with arr_1: + -- they have different lengths along their SECOND major dimension. + bad_arr constant int[] := + array[ + array[61, 62, 63, 64], + array[71, 72, 73, 74], + array[81, 82, 83, 84] + ]; + + expected_cat_1 constant int[] := + array[ + array[11, 12, 13], + array[21, 22, 23], + array[31, 32, 33], + array[41, 42, 43], + array[51, 52, 53] + ]; + + expected_cat_2 constant int[] := + array[ + array[11, 12, 13], + array[21, 22, 23], + array[31, 32, 33] + ]; +begin + assert + arr_1||arr_2 = expected_cat_1 and + arr_1||arr_3 = expected_cat_2, + 'unexpected'; + + declare + a int[]; + begin + -- ERROR: cannot concatenate incompatible arrays. + a := arr_1||bad_arr; + exception + when array_subscript_error then null; + end; +end; +$body$; +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/properties.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/properties.md new file mode 100644 index 000000000000..e7a708ba3528 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/properties.md @@ -0,0 +1,417 @@ +--- +title: Array properties +linkTitle: Array properties +headerTitle: Functions for reporting the geometric properties of an array +description: Functions for reporting the geometric properties of an array +menu: + preview_api: + identifier: array-properties + parent: array-functions-operators +type: docs +--- + +These functions return the various dimensional properties that jointly characterize an array. These three together completely specify the shape and size: + +- its dimensionality (`int`) +- the lower bound along each dimension (`int`) +- the upper bound along each dimension (`int`). + +There are functions for returning two other properties: the length along each dimension (`int`); and its cardinality (`int`). But each of these can be derived from the set of the lower and upper bounds for all the dimensions. There is also a function that returns the values for the lower and upper bounds for all the dimensions as a single `text` value. + +## Overview + +The behavior of each of the functions for reporting the geometric properties of an array is illustrated by supplying the same two arrays as the first actual argument to two invocations of the function. The return value, in all cases, is insensitive to the array's data type and the values within the array. + +Create and populate _"table t"_ thus: + +```plpgsql +create table t(k int primary key, arr_1 text[], arr_2 text[]); +insert into t(k, arr_1, arr_2) values(1, + '[3:10]={ 1, 2, 3, 4, 5, 6, 7, 8}', + '[2:3][4:7]={{1, 2, 3, 4}, {5, 6, 7, 8}}' + ); +``` + +You will use it in the example for each of the functions. (The optional syntax items `[3:10]` and `[2:3][2:5]` specify the lower and upper bounds along the one dimension of the first array and along both dimensions of the second array. This syntax is explained in [Multidimensional array of numeric values](../../literals/array-of-primitive-values/#multidimensional-array-of-numeric-values). + +Run the `SELECT` statement for each function to illustrate what produces for the same pair of input arrays. + +### array_ndims() + +**Purpose:** Return the number of dimensions (that is, the _dimensionality_) of the specified array. + +**Signature:** +``` +input value: anyarray +return value: int +``` +**Example:** + +```plpgsql +select + array_ndims(arr_1) as ndims_1, + array_ndims(arr_2) as ndims_2 +from t where k = 1; +``` +It produces this result: +``` + ndims_1 | ndims_2 +---------+--------- + 1 | 2 +``` +### array_lower() + +**Purpose:** Return the lower bound of the specified array along the specified dimension. + +**Signature:** +``` +input value: anyarray, int +return value: int +``` +**Example:** + +```plpgsql +select + array_lower(arr_1, 1) as arr_1_lb, + array_lower(arr_2, 1) as arr_2_lb_1, + array_lower(arr_2, 2) as arr_2_lb_2 +from t where k = 1; +``` +It produces this result: +``` + arr_1_lb | arr_2_lb_1 | arr_2_lb_2 +----------+------------+------------ + 3 | 2 | 4 +``` +### array_upper() + +**Purpose:** Return the upper bound of the specified array along the specified dimension. + +**Signature:** +``` +input value: anyarray, int +return value: int +``` +**Example:** +The use of `array_upper()` is exactly symmetrical with the use of `array_lower()`. +```plpgsql +select + array_upper(arr_1, 1) as arr_1_ub, + array_upper(arr_2, 1) as arr_2_ub_1, + array_upper(arr_2, 2) as arr_2_ub_2 +from t where k = 1; +``` +It produces this result: +``` + arr_1_ub | arr_2_ub_1 | arr_2_ub_2 +----------+------------+------------ + 10 | 3 | 7 +``` +### array_length() + +**Purpose:** Return the length of the specified array along the specified dimension. Notice that, among `array_lower()` and `array_upper()` and `array_length()`, the result of any two of them determines the result of the third. You could therefore decide, for example, only to use `array_lower()` and `array_upper()` and to determine the length, when you need it, by subtraction. But, for code clarity and brevity, you may as well use exactly the one that matches your present purpose. + +**Signature:** +``` +input value: anyarray, int +return value: int +``` +**Example:** +The use of `array_length()` is exactly symmetrical with the use of `array_lower()` and `array_upper()`. + +```plpgsql +select + array_length(arr_1, 1) as arr_1_len, + array_length(arr_2, 1) as arr_2_len_1, + array_length(arr_2, 2) as arr_2_len_2 +from t where k = 1; +``` +It produces this result: +``` + arr_1_len | arr_2_len_1 | arr_2_len_2 +-----------+-------------+------------- + 8 | 2 | 4 +``` +### cardinality() + +**Purpose:** Return the total number of values in the specified array. Notice that the value that this function returns can be computed as the product of values returned by the `array_length()` function along each direction. + +**Signature:** +``` +input value: anyarray +return value: int +``` +**Example:** + +```plpgsql +select + cardinality(arr_1) as card_1, + cardinality(arr_2) as card_2 +from t where k = 1; +``` +It produces this result: +``` + card_1 | card_2 +--------+-------- + 8 | 8 +``` +### array_dims() + +**Purpose:** Return a text representation of the same information as `array_lower()` and `array_length()` return, for all dimensions, in a single text value. + +**Signature:** +``` +input value: anyarray +return value: text +``` +**Example:** +The `array_dims()` function is useful to produce a result that is easily humanly readable. If you want to use the information that it returns programmatically, then you should use `array_lower()`, `array_upper()`, or `array_length()`. + +```plpgsql +select + array_dims(arr_1) as arr_1_dims, + array_dims(arr_2) as arr_2_dims +from t where k = 1; +``` +It produces this result: +``` + arr_1_dims | arr_2_dims +------------+------------ + [3:10] | [2:3][4:7] +``` + +## Joint semantics + +Create the procedure _"assert_semantics_and_traverse_values()"_ and then invoke it for each of the three provided data sets. You supply it with a one-dimensional array and a two dimensional array and, for each, your humanly determined estimates of these values: + +- what `array_ndims()` returns +- what `array_lower()` returns for each dimension +- what `array_upper()` returns for each dimension. + +The procedure obtains the actual values, programmatically, for all of the values that you supply and asserts that they agree. + +Notice that the cardinality and the length along each dimension are omitted, by design, from this list. The procedure also obtains these values, programmatically, and checks that these agree with the values that are computed from, respectively, the length along each dimension and the upper and lower bounds along each dimension. + +Notice, too, that the value that `array_dims()` returns can be computed from the upper and lower bounds along each dimension. So the procedure does this too and checks that the value returned by `array_dims()` is consistent with the values returned by `array_lower()` and `array_upper()`. + +The procedure has some particular requirements: + +- The cardinality of each of the two supplied arrays must be the same. +- The actual array values, in row-major order, must be the same, pairwise. + +Briefly, _"row-major"_ order is the order in which the last subscript varies most rapidly. + +Meeting these requirements are allows the procedure to deliver two bonus benefits. _First_, it demonstrates how to traverse array values in row-major order using the values returned by the functions that this section describes, thereby demonstrating what the term "row-major order" means. _Second_. it compares the values, pairwise, for equality. This comparison rule is the basis of the semantics of the comparison operations described in [Operators for comparing two arrays](../comparison). + +**Note:** There are no built-in functions for computing, for example, the product of two matrices or the product of a vector and a matrix. (A vector is a one-dimensional array, and a matrix is a two-dimensional array.) But, as long as you know how to traverse the values in a matrix in row-major order, you can implement the missing vector and matrix multiplication functionality for yourself. + +```plpgsql +create procedure assert_semantics_and_traverse_values( + a in int[], b in int[], + + a_ndims in int, a_lb in int, a_ub in int, + + b_ndims in int, b_lb_1 in int, b_ub_1 in int, + b_lb_2 in int, b_ub_2 in int) + language plpgsql +as $body$ +declare + -- Get the facts that are implied by the user-supplied facts. + a_len constant int := array_length(a, 1); + b_len_1 constant int := array_length(b, 1); + b_len_2 constant int := array_length(b, 2); + + a_dims constant text := '['||a_lb::text||':'||a_ub::text||']'; + b_dims constant text := '['||b_lb_1::text||':'||b_ub_1::text||']'|| + '['||b_lb_2::text||':'||b_ub_2::text||']'; +begin + -- Confirm that the supplied arrays meet the basic + -- dimensionalities requirement. + assert + a_ndims = 1 and b_ndims = 2, + 'ndims assert failed'; + + -- Confirm the user-supplied facts about the shape and size of "a". + assert + array_ndims(a) = a_ndims and + array_lower(a, 1) = a_lb and + array_upper(a, 1) = a_ub , + '"a" dimensions assert failed'; + + -- Confirm the user-supplied facts about the shape and size of "b". + assert + array_ndims(b) = b_ndims and + array_lower(b, 1) = b_lb_1 and + array_upper(b, 1) = b_ub_1 and + array_lower(b, 2) = b_lb_2 and + array_upper(b, 2) = b_ub_2 , + '"b" dimensions assert failed'; + + -- Confirm the length overspecification rule. + assert + (a_ub - a_lb + 1 ) = a_len and + (b_ub_1 - b_lb_1 + 1) = b_len_1 and + (b_ub_2 - b_lb_2 + 1) = b_len_2 , + 'Length overspecification rule assert failed.'; + + -- Confirm the cardinality overspecification rule. + assert + cardinality(a) = a_len and + cardinality(b) = b_len_1 * b_len_2 , + 'Cardinality overspecification rule assert failed.'; + + -- Confirm the "dims" overspecification rule. + assert + array_dims(a) = a_dims and + array_dims(b) = b_dims , + '"dims" overspecification rule assert failed.'; + + -- Do the row-major order traversal and + -- check that the values are pairwise-identical. + for j in 0..(a_len - 1) loop + declare + -- Traversing a 1-d array is trivial. + a_idx constant int := j + a_lb; + + -- Traversing a 2-d array is need a bit more thought. + b_idx_1 constant int := floor(j/b_len_2) + b_lb_1; + b_idx_2 constant int := ((j + b_len_2) % b_len_2) + b_lb_2; + + a_txt constant text := lpad(a_idx::text, 2); + b_txt_1 constant text := lpad(b_idx_1::text, 2); + b_txt_2 constant text := lpad(b_idx_2::text, 2); + val constant text := lpad(a[a_idx]::text, 2); + + line constant text := + 'a['||a_txt||'] = '|| + 'b['||b_txt_1||']['||b_txt_2||'] = '|| + val; + begin + assert + a[a_idx] = b[b_idx_1][b_idx_2], + 'Row-major order pairwise equality assert failed'; + raise info '%', line; + end; + end loop; +end; +$body$; +``` + +Try it on the first data set: +```plpgsql +do $body$ +declare + a constant int[] := '{ 1, 2, 3, 4, 5, 6}'; + b constant int[] := '{{1, 2}, {3, 4}, {5, 6}}'; + + a_ndims constant int := 1; + a_lb constant int := 1; + a_ub constant int := 6; + + b_ndims constant int := 2; + b_lb_1 constant int := 1; + b_ub_1 constant int := 3; + b_lb_2 constant int := 1; + b_ub_2 constant int := 2; +begin + call assert_semantics_and_traverse_values( + a, b, + a_ndims, a_lb, a_ub, + b_ndims, b_lb_1, b_ub_1, + b_lb_2, b_ub_2); +end; +$body$; +``` +It produces this result (after manually removing the _"INFO:"_ prompts): +``` +a[ 1] = b[ 1][ 1] = 1 +a[ 2] = b[ 1][ 2] = 2 +a[ 3] = b[ 2][ 1] = 3 +a[ 4] = b[ 2][ 2] = 4 +a[ 5] = b[ 3][ 1] = 5 +a[ 6] = b[ 3][ 2] = 6 +``` +Try it on the second data set: +```plpgsql +do $body$ +declare + a constant int[] := '[3:14]={ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}'; + b constant int[] := '[3:6][6:8]={{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}}'; + + a_ndims constant int := 1; + a_lb constant int := 3; + a_ub constant int := 14; + + b_ndims constant int := 2; + b_lb_1 constant int := 3; + b_ub_1 constant int := 6; + b_lb_2 constant int := 6; + b_ub_2 constant int := 8; +begin + call assert_semantics_and_traverse_values( + a, b, + a_ndims, a_lb, a_ub, + b_ndims, b_lb_1, b_ub_1, + b_lb_2, b_ub_2); +end; +$body$; +``` +It produces this result: +``` +a[ 3] = b[ 3][ 6] = 1 +a[ 4] = b[ 3][ 7] = 2 +a[ 5] = b[ 3][ 8] = 3 +a[ 6] = b[ 4][ 6] = 4 +a[ 7] = b[ 4][ 7] = 5 +a[ 8] = b[ 4][ 8] = 6 +a[ 9] = b[ 5][ 6] = 7 +a[10] = b[ 5][ 7] = 8 +a[11] = b[ 5][ 8] = 9 +a[12] = b[ 6][ 6] = 10 +a[13] = b[ 6][ 7] = 11 +a[14] = b[ 6][ 8] = 12 +``` +Try it on the third data set: +```plpgsql +do $body$ +declare + a constant int[] := '[3:18]={ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}'; + b constant int[] := '[3:6][2:5]={{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}'; + + a_ndims constant int := 1; + a_lb constant int := 3; + a_ub constant int := 18; + + b_ndims constant int := 2; + b_lb_1 constant int := 3; + b_ub_1 constant int := 6; + b_lb_2 constant int := 2; + b_ub_2 constant int := 5; +begin + call assert_semantics_and_traverse_values( + a, b, + a_ndims, a_lb, a_ub, + b_ndims, b_lb_1, b_ub_1, + b_lb_2, b_ub_2); +end; +$body$; +``` +It produces this result: +``` +a[ 3] = b[ 3][ 2] = 1 +a[ 4] = b[ 3][ 3] = 2 +a[ 5] = b[ 3][ 4] = 3 +a[ 6] = b[ 3][ 5] = 4 +a[ 7] = b[ 4][ 2] = 5 +a[ 8] = b[ 4][ 3] = 6 +a[ 9] = b[ 4][ 4] = 7 +a[10] = b[ 4][ 5] = 8 +a[11] = b[ 5][ 2] = 9 +a[12] = b[ 5][ 3] = 10 +a[13] = b[ 5][ 4] = 11 +a[14] = b[ 5][ 5] = 12 +a[15] = b[ 6][ 2] = 13 +a[16] = b[ 6][ 3] = 14 +a[17] = b[ 6][ 4] = 15 +a[18] = b[ 6][ 5] = 16 +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/replace-a-value.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/replace-a-value.md new file mode 100644 index 000000000000..39027156db71 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/replace-a-value.md @@ -0,0 +1,318 @@ +--- +title: array_replace() +linkTitle: array_replace() / set value +headerTitle: array_replace() and setting an array value explicitly +description: array_replace() and setting an array value explicitly +menu: + preview_api: + identifier: array-replace-a-value + parent: array-functions-operators +type: docs +--- +Each of the approaches described in this section, using the `array_replace()` function and setting an addressed array value explicitly and in place, can be used to change values in an array. But the two approaches differ importantly: + +- `array_replace()` changes all values that match the specified value to the same new value, insensitively to their address in the array. + +- Setting an addressed array value changes that one value insensitively to its present value. + +## array_replace() + +**Purpose:** Return a new array that is derived from the input array by replacing _every_ array value that is equal to the specified value with the specified new value. + +**Signature** + +``` +input value: anyarray, anyelement, anyelement +return value: anyarray +``` +**Example:** + +```plpgsql +create type rt as (f1 int, f2 text); +create table t(k int primary key, arr rt[]); +insert into t(k, arr) +values (1, '{"(1,rabbit)","(2,hare)","(3,squirrel)","(4,horse)"}'::rt[]); + +select arr as "old value of arr" from t where k = 1; + +update t +set arr = array_replace(arr, '(3,squirrel)', '(3,bobcat)') +where k = 1; + +select arr as "new value of arr" from t where k = 1; +``` +This is the result of the two queries: +``` + old value of arr +------------------------------------------------------ + {"(1,rabbit)","(2,hare)","(3,squirrel)","(4,horse)"} + + new value of arr +---------------------------------------------------- + {"(1,rabbit)","(2,hare)","(3,bobcat)","(4,horse)"} +``` + +**Semantics:** + +_One-dimensional array of primitive scalar values_. + +```plpgsql +do $body$ +declare + old_val constant int := 42; + new_val constant int := 17; + + arr constant int[] := + array[1, old_val, 3, 4, 5, old_val, 6, 7]; + + expected_modified_arr constant int[] := + array[1, new_val, 3, 4, 5, new_val, 6, 7]; +begin + assert + array_replace(arr, old_val, new_val) = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` + +_One-dimensional array of _"row"_ type values_. + +The definition of _"rt"_ used here is the same as the example above used. Don't create again if it already exists. + +```plpgsql +create type rt as (f1 int, f2 text); + +do $body$ +declare + old_val constant rt := (42, 'x'); + new_val constant rt := (17, 'y'); + + arr constant rt[] := + array[(1, 'a')::rt, old_val, (1, 'a')::rt, (2, 'b')::rt, (3, 'c')::rt, + old_val, (4, 'd')::rt, (5, 'e')::rt]; + + expected_modified_arr constant rt[] := + array[(1, 'a')::rt, new_val, (1, 'a')::rt, (2, 'b')::rt, (3, 'c')::rt, + new_val, (4, 'd')::rt, (5, 'e')::rt]; +begin + assert + array_replace(arr, old_val, new_val) = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` + +_Two-dimensional array of primitive scalar values_. This is sufficient to illustrate the semantics of the general multidimensional case. The function's signature (at the start of this section) shows that the to-be-replaced value and the replacement value are instances of `anyelement`. There is no overload where these two parameters accept instances of `anyarray`. This restriction is understood by picturing the internal representation as a linear ribbon of values, as was explained in [Synopsis](../../#synopsis). The replacement works by scanning along the ribbon, finding each occurrence in turn of the to-be-replaced value, and replacing it. + +Here is a postive illustration: +```plpgsql +do $body$ +declare + old_val constant int := 22; + new_val constant int := 97; + + arr int[] := + array[ + array[11, 12], + array[11, old_val], + array[32, 33] + ]; + + expected_modified_arr constant int[] := + array[ + array[11, 12], + array[11, new_val], + array[32, 33] + ]; +begin + arr := array_replace(arr, old_val, new_val); + + assert + arr = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` +And here is a negative illustration: +```plpgsql +do $body$ +declare + old_val constant int[] := array[22, 23]; + new_val constant int[] := array[87, 97]; + + arr int[] := + array[ + array[11, 12], + old_val, + array[32, 33] + ]; + expected_modified_arr constant int[] := + array[ + array[11, 12], + new_val, + array[32, 33] + ]; +begin + begin + -- Causes: 42883: function array_replace(integer[], integer[], integer[]) does not exist. + arr := array_replace(arr, old_val, new_val); + exception + when undefined_function then null; + end; + + -- The goal is met by replacing the scalar values one by one. + arr := array_replace(arr, old_val[1], new_val[1]); + arr := array_replace(arr, old_val[2], new_val[2]); + + assert + arr = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` + +## Setting an array value explicitly and in place + +**Purpose:** Change an array in place by changing an explicitly addressed value. + +**Signature** + +``` +-- Uses the notation +-- arr[idx_1][idx_2]...[idx_N] +-- for an N-dimensional array. + +input/output value: anyarray, "vector of index values" +``` +**Example:** +```plpgsql +create table t(k int primary key, arr int[]); + +insert into t(k, arr) values (1, + '{1, 2, 3, 4}'); + +update t set arr[2] = 42 where k = 1; + +select arr from t where k = 1; +``` +This is the result: +``` + arr +------------ + {1,42,3,4} +``` +**Semantics:** + +_Array of primitive scalar values_. Notice that the starting value is "snapshotted" as `old_arr` and that this is marked `constant`. Notice too that _"expected_modified_arr"_ is marked `constant`. This proves that the modification was done in place within the only array value that is _not_ marked `constant`. + +```plpgsql +do $body$ +declare + old_val constant int := 42; + new_val constant int := 17; + + arr int[] := array[1, 2, old_val, 4]; + expected_modified_arr constant int[] := array[1, 2, new_val, 4]; + old_arr constant int[] := arr; +begin + arr[3] := new_val; + assert + old_arr = '{1, 2, 42, 4}' and + expected_modified_arr = '{1, 2, 17, 4}' and + arr = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` +_Array of "record" type values_. + +The definition of _"rt"_ used here is the same as the example above used. Don't create again if it already exists. +```plpgsql +create type rt as (f1 int, f2 text); + +do $body$ +declare + old_val constant rt := (42, 'x'); + new_val constant rt := (17, 'y'); + + arr rt[] := + array[(1, 'a')::rt, old_val, (1, 'a')::rt, (2, 'b')::rt, (3, 'c')::rt, + old_val, (4, 'd')::rt, (5, 'e')::rt]; + + expected_modified_arr constant rt[] := + array[(1, 'a')::rt, new_val, (1, 'a')::rt, (2, 'b')::rt, (3, 'c')::rt, + new_val, (4, 'd')::rt, (5, 'e')::rt]; + + old_arr constant rt[] := arr; +begin + arr[2] := new_val; + arr[6] := new_val; + + assert + old_arr = + '{"(1,a)","(42,x)","(1,a)","(2,b)","(3,c)","(42,x)","(4,d)","(5,e)"}' and + expected_modified_arr = + '{"(1,a)","(17,y)","(1,a)","(2,b)","(3,c)","(17,y)","(4,d)","(5,e)"}' and + arr = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` +_Two-dimensional array of primitive scalar values_. This is sufficient to illustrate the semantics of the general multidimensional case. The approach is just the same as when `array_replace()` is used to meet the same goal. You have no choice but to target the values explicitly. + +```plpgsql +do $body$ +declare + old_val constant int[] := array[21, 22, 23, 24]; + new_val constant int[] := array[81, 82, 83, 84]; + + arr int[] := + array[ + array[11, 12, 13, 14], + old_val, + array[31, 32, 33, 34] + ]; + + expected_modified_arr constant int[] := + array[ + array[11, 12, 13, 14], + new_val, + array[31, 32, 33, 34] + ]; + + len_1 constant int := array_length(arr, 1); + len_2 constant int := array_length(arr, 2); +begin + assert + (len_1 = 3) and (len_2 = 4), + 'unexpected'; + + -- OK to extract a slice. But, even though it's tempting to picture this as one row, + -- it is nevertheless a 2-d array with "array_length(arr, 1)" equal to 1. + assert + arr[2:2][1:4] = array[old_val], + 'unexpected'; + + -- You cannot use the slice notation to specify the target of an assignment. + -- So this + -- arr[2:2][1:4] = array[new_val]; + -- causes a compilation error. + + -- Similarly, this is meaningless. (But it doesn't cause a compilation error.) + -- Because it's a 2-d array, its values (individual values or slices) must be + -- addressed using two indexes or two slice ranges. + assert + arr[2] is null, + 'unexpected'; + + -- Change the individual, addressable, values one by one. + for j in array_lower(arr, 2)..array_upper(arr, 2) loop + arr[2][j] := new_val[j]; + end loop; + + assert + arr = expected_modified_arr, + 'unexpected'; +end; +$body$; +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/slice-operator.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/slice-operator.md new file mode 100644 index 000000000000..c750498d46ab --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/slice-operator.md @@ -0,0 +1,99 @@ +--- +title: The array slice operator +linkTitle: Array slice operator +headerTitle: The array slice operator +description: The array slice operator +menu: + preview_api: + identifier: array-slice-operator + parent: array-functions-operators + weight: 30 +type: docs +--- + +**Purpose:** Return a new array whose length is defined by specifying the slice's lower and upper bound along each dimension. + +**Signature:** +``` +input value: [lb_1:ub_1] ... [lb_N:ub_N]anyarray +return value: anyarray +``` +**Note:** +- You must specify the lower and upper slicing bounds, as `int` values for each of the input array's N dimensions. +- The specified slicing bounds must not exceed the source array's bounds. +- The new array has the same dimensionality as the source array and its lower bound is `1` on each axis. + +**Example:** +```plpgsql +create table t(k int primary key, arr text[]); + +insert into t(k, arr) +values (1, ' + [2:4][3:6][4:5]= + { + { + {a,b}, {c,d}, {e,f}, {g,h} + }, + { + {i,j}, {k,l}, {m,n}, {o,p} + }, + { + {q,r}, {s,t}, {u,v}, {w,x} + } + } + '::text[]); + +select arr as "old value of arr" from t where k = 1; + +select + array_lower(arr, 1) as "lb-1", + array_upper(arr, 1) as "ub-1", + array_lower(arr, 2) as "lb-2", + array_upper(arr, 2) as "ub-2", + array_lower(arr, 3) as "lb-3", + array_upper(arr, 3) as "ub-3" +from t where k = 1; +``` +It produces these results: +``` + old value of arr +------------------------------------------------------------------------------------------------- + [2:4][3:6][4:5]={{{a,b},{c,d},{e,f},{g,h}},{{i,j},{k,l},{m,n},{o,p}},{{q,r},{s,t},{u,v},{w,x}}} +``` +and: +``` + lb-1 | ub-1 | lb-2 | ub-2 | lb-3 | ub-3 +------+------+------+------+------+------ + 2 | 4 | 3 | 6 | 4 | 5 +``` +Now do the slicing: +```plpgsql +update t +set arr = arr[2:3][4:5][3:4] +where k = 1; + +select arr as "new value of arr" from t where k = 1; + +select + array_lower(arr, 1) as "lb-1", + array_upper(arr, 1) as "ub-1", + array_lower(arr, 2) as "lb-2", + array_upper(arr, 2) as "ub-2", + array_lower(arr, 3) as "lb-3", + array_upper(arr, 3) as "ub-3" +from t where k = 1; +``` +It produces these results: +``` + new value of arr +----------------------- + {{{c},{e}},{{k},{m}}} +``` +and: +``` + lb-1 | ub-1 | lb-2 | ub-2 | lb-3 | ub-3 +------+------+------+------+------+------ + 1 | 2 | 1 | 2 | 1 | 1 +``` + +Notice, that as promised, all the lower bounds are equal to `1`. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/string-to-array.md b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/string-to-array.md new file mode 100644 index 000000000000..02a3664c58e3 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/functions-operators/string-to-array.md @@ -0,0 +1,116 @@ +--- +title: string_to_array() +linkTitle: string_to_array() +headerTitle: string_to_array() +description: string_to_array() +menu: + preview_api: + identifier: string-to-array + parent: array-functions-operators +type: docs +--- +**Purpose:** Return a one-dimensional `text[]` array by splitting the input `text` value into subvalues using the specified `text` value as the _"delimiter"_. Optionally, allows a specified `text` value to be interpreted as `NULL`. + +**Signature:** + +``` +input value: text, text [, text] +return value: text[] +``` +**Example:** + +```plpgsql +select string_to_array( + 'a|b|?|c', -- the to-be-split string + '|', -- the character(s) to be taken as the delimiter + '?' -- the character(s) to be taken to the null indicator +) as "string_to_array result"; +``` + +It produces thus result: + +``` + string_to_array result +------------------------ + {a,b,NULL,c} +``` + +**Semantics:** + +The interpretation of the _"delimiter"_ `text` value and the _"null indicator"_ `text` uses this priority rule: + +- _First_, the _"delimiter"_ `text` value is consumed + +- _and only then_ is the _"null indicator"_ `text` value consumed. + +However, this rule matters only when these two critical values are defined by more than one character and when one starts with a sequence that the other ends with. + +Yugabyte recommends, therefore, that when you can choose the `text` values for the _"delimiter"_ and for the _"null indicator"_, you choose two different single characters. This is what the example, above, does. Of course, you must be sure that neither occurs in any of the `text` values that you want to convert into `text[]` arrays. (There is no escaping mechanism.) + +Predicting the outcome, when unfortunate choices for these two values were made, will require some mental effort. The `DO` block below demonstrates an example of this: + +- The _"delimiter"_ is `' !'::text`. + +- The _"null indicator"_ is `'~ '::text`. + +And the input contains this sequence: + +    <tilda><space><exclamationPoint> + +The troublesome sequence is shown in typewriter font here: + +    dog house !~ !x! `~ !` cat flap ! ! + +These considerations, together with the fact that it can produce only a `text[]` output, mean that the `string_to_array()` function has limited usefulness. + +```plpgsql +do $body$ +declare + delim_text constant text := ' !'; + null_text constant text := '~ '; + + input_text constant text := 'dog house !~ !x! ~ ! cat flap ! !'; + + result constant text[] := + string_to_array(input_text, delim_text, null_text); + + good_delim_text constant text := '|'; + good_null_text constant text := '?'; + + delim_first_text constant text := + replace(replace( + input_text, + delim_text, good_delim_text), + null_text, good_null_text); + + null_first_text constant text := + replace(replace( + input_text, + null_text, good_null_text), + delim_text, good_delim_text); + + delim_first_result constant text[] := + string_to_array(delim_first_text, good_delim_text, good_null_text); + + null_first_result constant text[] := + string_to_array(null_first_text, good_delim_text, good_null_text); + + -- Notice that one of the special characters, "!", remains in + -- both expected_result and unexpected_result. + -- If + expected_result constant text[] := + '{"dog house",NULL,"x! ~"," cat flap"," ",""}'; + unexpected_result constant text[] := + '{"dog house",NULL,"x! ?! cat flap"," ",""}'; + +begin + assert + (result = expected_result) and + (delim_first_result = expected_result) and + (null_first_result <> delim_first_result) and + (null_first_result = unexpected_result) and + true, + 'unexpected'; +end; +$body$; +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/literals/_index.md b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/_index.md new file mode 100644 index 000000000000..a6a2e4dec5fc --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/_index.md @@ -0,0 +1,97 @@ +--- +title: Creating an array value using a literal +linkTitle: Literals +headerTitle: Creating an array value using a literal +description: Creating an array value using a literal +image: /images/section_icons/api/subsection.png +menu: + preview_api: + identifier: array-literals + parent: api-ysql-datatypes-array + weight: 20 +type: indexpage +--- + +This section introduces array literals informally with a few examples. Its subsections, listed below, explain formally how you construct syntactically correct array literals that establish the values that you intend. + +An array literal starts with a left curly brace. This is followed by some number of comma-separated literal representations for the array's values. Sometimes, the value representations need not be double-quoted—but _may_ be. And sometimes the value representations must be double-quoted. The array literal then ends with a right curly brace. Depending on the array's data type, its values might be scalar, or they might be composite. For example, they might be _"row"_ type values; or they might be arrays. The literal for a multidimensional array is written as an array of arrays of arrays... and so on. They might even be values of a user-defined `DOMAIN` which is based on an array data type. This powerful notion is discussed in the dedicated section [Using an array of `DOMAIN` values](../array-of-domains/). + +To use such a literal in SQL or in PL/pgSQL it must be enquoted in the same way as is an ordinary `text` literal. You can enquote an array literal using dollar quotes, if this suits your purpose, just as you can for a `text` literal. You sometimes need to follow the closing quote with a suitable typecast operator for the array data type that you intend. And sometimes the context of use uniquely determines the literal's data type. It's never wrong to write the typecast explicitly—and it's a good practice always to do this. + +Here, in use in a SQL `SELECT` statement, is the literal for a one-dimensional array of primitive `int` values: + +```plpgsql +\t on +select '{1, 2, 3}'::int[]; +``` + +The `\t on` meta-command suppresses column headers and the rule-off under these. Unless the headers are important for understanding, query output from `ysqlsh` will be shown, throughout the present "arrays" major section, without these. + +This is the output that the first example produces: + +```output + {1,2,3} +``` + +The second example surrounds the values that the array literal defines with double quotes: + +```plpgsql +select '{"1", "2", "3"}'::int[]; +``` + +It produces the identical output to the first example, where no double quotes were used. + +The third example defines a two-dimensional array of `int` values: + +```plpgsql +select ' + { + {11, 12, 13}, + {21, 22, 23} + } + '::int[]; +``` + +It produces this result: + +```output + {{11,12,13},{21,22,23}} +``` + +The fourth example defines an array whose values are instances of a _"row"_ type: + +```plpgsql +create type rt as (f1 int, f2 text); + +select ' + { + "(1,a1 a2)", + "(2,b1 b2)", + "(3,c1 v2)" + } +'::rt[]; +``` + +It produces this output: + +```output + {"(1,\"a1 a2\")","(2,\"b1 b2\")","(3,\"c1 v2\")"} +``` + +All whitespace (except, of course, within the text values) has been removed. The double quotes around the representation of each _"row"_ type value are retained. This suggests that they are significant. (Test this by removing them. It causes the _"22P02: malformed row literal"_ error.) Most noticeably, there are clearly rules at work in connection with the representation of each `text` value within the representation of each _"row"_ type value. + +The following sections present the rules carefully and, when the rules allow some freedom, give recommendations. + +[The text typecast of a value, the literal for that value, and how they are related](./text-typecasting-and-literals/) establishes the important notions that allow you to distinguish between a _literal_ and the _text of the literal_. It's the _text_ of an array literal that, by following specific grammar rules for this class of literal, actually defines the intended value. The literal, as a whole, enquotes this bare text and typecasts it to the desired target array data type. + +[The literal for an array of primitive values](./array-of-primitive-values/) gives the rules for array literals whose values are scalars (for example, are of primitive data types). + +[The literal for a _"row"_ type value](./row/) gives the rules for the literal for a value of a _"row"_ type. These rules are essential to the understanding of the next section. + +[The literal for an array of _"row"_ type values](./array-of-rows/) gives the rules for array literals whose values are composite (that is, a _"row"_ type). + +These rules are covered in the following sections of the PostgreSQL documentation: + +- [8.15. Arrays](https://www.postgresql.org/docs/15/arrays.html) + +- [8.16. Composite Types](https://www.postgresql.org/docs/15/rowtypes.html) diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-primitive-values.md b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-primitive-values.md new file mode 100644 index 000000000000..96669be1db26 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-primitive-values.md @@ -0,0 +1,389 @@ +--- +title: The literal for an array of primitive values +linkTitle: Array of primitive values +headerTitle: The literal for an array of primitive values +description: The literal for an array of primitive values +menu: + preview_api: + identifier: array-of-primitive-values + parent: array-literals + weight: 10 +type: docs +--- + +This section states a sufficient subset of the rules that allow you to write a syntactically correct array literal that expresses any set of values, for arrays of any scalar data type, that you could want to create. The full set of rules allows more flexibility than do just those that are stated here. But because these are sufficient, the full, and rather complex, set is not documented here. The explanations in this section will certainly allow you to interpret the `::text` typecast of any array value that you might see, for example in `ysqlsh`. + +Then Yugabyte's recommendation in this space is stated. And then the rules are illustrate with examples. + +## Statement of the rules + +The statement of these rules depends on understanding the notion of the canonical form of a literal. [Defining the "canonical form of a literal](../text-typecasting-and-literals/#defining-the-canonical-form-of-a-literal) explained that the `::text` typecast of any kind of array shows you that this form of the literal (more carefully stated, the _text_ of this literal) can be used to recreate the value. + +In fact, this definition, and the property that the canonical form of the literal is sufficient to recreate the value, hold for values of _all_ data types. + +Recall that every value within an array necessarily has the same data type. If you follow the rules that are stated here, and illustrated in the demonstrations below, you will always produce a syntactically valid literal which expresses the semantics that you intend. It turns out that many other variants, especially for `text[]` arrays, are legal and can produce the values that you intend. However, the rules that govern these exotic uses will not documented because it is always sufficient to create your literals in canonical form. + +Here is the sufficient set of rules. + +- The commas that delimit successive values, the curly braces that enclose the entire literal, and the inner curly braces that are used in the literals for multidimensional arrays, can be surrounded by arbitrary amounts of whitespace. If you want strictly to adhere to canonical form, then you ought not to do this. But doing so can improve the readability of _ad hoc_ manually typed literals. It can also make it easier to read trace output in a program that constructs array literals programmatically. +- In numeric and `boolean` array literals, do _not_ surround the individual values with double quotes. +- In the literal for a `timestamp[]` array, _do_ surround the individual values with double quotes—even though this is not strictly necessary. +- In the literal for a `text[]` array, _do_ surround every individual value with double quotes, even though this is not always necessary. It _is_ necessary for any value that itself contains, as ordinary text, any whitespace or any of the characters that have syntactic significance within the outermost curly brace pair. This is the list: +``` + { } , " \ +``` +- It's sufficient to write the curly braces and the comma ordinarily within the enclosing double quotes. But each of the double quote character and the backslash character must be escaped with an immediately preceding single backslash. + + +## Always write array literals in canonical form + +Bear in mind that you will rarely manually type literals in the way that this section does to demonstrate the rules. You'll do this only when teaching yourself, when prototyping new code, or when debugging. Rather, you'll typically create the literals programmatically—often in a client-side program that parses out the data values from, for example, an XML text file or, these days, probably a JSON text file. In these scenarios, the target array is likely to have the data type _"some_user_defined_row_type[]"_. And when you create literals programmatically, you want to use the simplest rules that work and you have no need at all to omit arguably unnecessary double quote characters. + +**Yugabyte recommends that the array literals that you generate programmatically are always spelled using the canonical representations** + +You can relax this recommendation, to make tracing or debugging your code easier (as mentioned above), by using a newline between each successive encoded value in the array—at least when the values themselves use a lot of characters, as they might for _"row"_ type values. + +**Note:** You can hope that the client side programming language that you use, together with the driver that you use to issue SQL to YugabyteDB and to retrieve results, will allow the direct use of data types that your language defines that map directly to the YSQL array and _"row"_ type, just as they have scalar data types that map to `int`, `text`, `timestamp`, and `boolean`. For example Python has _"list"_ that maps to array and _"tuple"_ that maps to _"row"_ type. And the _"psycopg2"_ driver that you use for YugabyteDB can map values of these data types to, for example, a `PREPARE` statement like the one shown below. + +**Note**: YSQL has support for converting a JSON array (and this includes a JSON array of JSON objects) directly into the corresponding YSQL array values. + +The rules for constructing literals for arrays of _"row"_ type values are described in [literal for an array of "row" type values](../array-of-rows/) section. + +Your program will parse the input and create the required literals as ordinary text strings that you'll then provide as the actual argument to a `PREPARE` statement execution, leaving the typecast of the `text` actual argument, to the appropriate array data type, to the prepared `INSERT` or `UPDATE` statement like this: +``` +prepare stmt(text) as insert into t(rs) values($1::rt[]); +``` +Assume that in, this example, _"rt"_ is some particular user-define _"row"_ type. + +## Examples to illustrate the rules + +Here are some examples of kinds array of primitive values: + +- array of numeric values (like `int` and `numeric`) +- array of stringy values (like `text`, `varchar`, and `char`) +- array of date-time values (like `timestamp`) +- array of `boolean` values. + +In order to illustrate the rules that govern the construction of an array literal, it is sufficient to consider only these. + +You'll use the `array[]` constructor to create representative values of each kind and inspect its `::text` typecast. + +### One-dimensional array of int values + +This example demonstrates the principle: + +```plpgsql +create table t(k serial primary key, v1 int[], v2 int[]); +insert into t(v1) values (array[1, 2, 3]); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` +The `\gset` meta-command was used first in this _"Array data types and functionality"_ major section in [`array_agg()` and `unnest()`](../../functions-operators/array-agg-unnest). + +Notice that, in this example, the `SELECT` statement is terminated by the `\gset` meta-command on the next line rather than by the usual semicolon. The `\gset` meta-command is silent. The `\echo` meta-command shows this: + +``` +{1,2,3} +``` +You can see the general form already: + +- The (_text_ of) an array literal starts with the left curly brace and ends with the right curly brace. + +- The items within the braces are delimited by commas, and there is no space between one item, the comma, and the next item. Nor is there any space between the left curly brace and the first item or between the last item and the right curly brace. + +[One-dimensional array of `text` values](./#one-dimensional-array-of-text-values) shows that more needs to be said. But the two rules that you've already noticed always hold. + +To use the literal that you produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\'::int[]' +\echo :canonical_literal +``` +. The `\echo` meta-command now shows this: +``` +'{1,2,3}'::int[] +``` +Next, use the canonical literal that was have produced to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` +It shows this: +``` + v1 = v2 +--------- + true +``` +As promised, the canonical form of the array literal does indeed recreate the identical value that the `array[]` constructor created. + +**Note:** + +Try this: +```plpgsql +select 12512454.872::text; +``` +The result is the canonical form, `12512454.872`. So this (though you rarely see it): +```plpgsql +select 12512454.872::numeric; +``` +runs without error. Now try this: + +```plpgsql +select to_number('12,512,454.872', '999G999G999D999999')::text; +``` +This, too, runs without error because it uses the `to_number()` built-in function. The result here, too, is the canonical form, `12512454.872`—with no commas. Now try this: + +```plpgsql +select '12,512,454.872'::numeric; +``` +This causes the _"22P02: invalid input syntax for type numeric"_ error. In other words, _only_ a `numeric` value in canonical form can be directly typecast using `::numeric`. + +Here, using an array literal, is an informal first look at what follows. For now, take its syntax to mean what you'd intuitively expect. You must spell the representations for the values in a `numeric[]` array in canonical form. Try this: + +```plpgsql +select ('{123.456, -456.789}'::numeric[])::text; +``` +It shows this: + +``` + {123.456,-456.789} +``` + +Now try this: +```plpgsql +select ('{9,123.456, -8,456.789}'::numeric[])::text; +``` +It silently produces this presumably unintended result (an array of _four_ numeric values) because the commas are taken as delimiters and not as part of the representation of a single `numeric` value: +``` + {9,123.456,-8,456.789} +``` +In an array literal (or in a _"row"_ type value literal), there is no way to accommodate forms that cannot be directly typecast. (The same holds for `timestamp` values as for `numeric` values.) YSQL inherits this limitation from PostgreSQL. It is the user's responsibility to work around this when preparing the literal because, of course, functions like _"to_number()"_ cannot be used within literals. Functions can, however, be used in a value constructor as [`array[]` value constructor](../../array-constructor/) shows. + +### One-dimensional array of text values + +Use [One-dimensional array of `int` values](./#one-dimensional-array-of-int-values) as a template for this and the subsequent sections. The example sets array values each of which, apart from the single character `a`, needs some discussion. These are the characters (or, in one case, character sequence), listed here "bare" and with ten spaces between each: + +``` + a a b () , ' " \ +``` + +```plpgsql +create table t(k serial primary key, v1 text[], v2 text[]); +insert into t(v1) values (array['a', 'a b', '()', ',', '{}', $$'$$, '"', '\']); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` +For ordinary reasons, something special is needed to establish the single quote within the surrounding array literal which itself must be enquoted for using in SQL. Dollar quotes are a convenient choice. The `\echo` meta-command shows this: + +``` +{a,"a b",(),",","{}",',"\"","\\"} +``` +This is rather hard (for the human) to parse. To make the rules easier to see, this list doesn't show the left and right curly braces. And the syntactically significant commas are surrounded with four spaces on each side: +``` + a , "a b" , () , "," , "{}" ' , "\"" , "\\" +``` +In addition to the first two rules, notice the following. + +- Double quotes are used to surround a value that includes any spaces. (Though the example doesn't show it, this applies to leading and trailing spaces too.) +- The left and right parentheses are _not_ surrounded with double quotes. Though these have syntactic significance in other parsing contexts, they are insignificant within the curly braces of an array literal. +- The comma _has_ been surrounded by double quotes. This is because it _does_ have syntactic significance, as the value delimiter, within the curly braces of an array literal. +- The curly braces _have_ been surrounded by double quotes. This is because interior curly braces _do_ have syntactic significance, as you'll see below, in the array literal for a multidimensional array. +- The single quote is _not_ surrounded with double quotes. Though it has syntactic significance in other parsing contexts, it is insignificant within the curly braces of an array literal. This holds, also, for all sorts of other punctuation characters like `;` and `:` and `[` and `]` and so on. +- The double quote has been escaped with a single backslash and this has been then surrounded with double quotes. This is because it _does_ have syntactic significance, as the (one and only) quoting mechanism, within the curly braces of an array literal. +- The backslash has also been escaped with another single backslash and this has been then surrounded with double quotes. This is because it _does_ have syntactic significance, as the escape character, within the curly braces of an array literal. + +There's another rule that the present example does not show. Though not every comma-separated value was surrounded by double quotes, it's _never harmful_ to do this. You can confirm this with your own test, Yugabyte recommends that, for consistency, you always surround every `text` value within the curly braces for a `text[]` array literal with double quotes. + +To use the text of the literal that was produced above to recreate the value, you must enquote it and typecast it. Do this, as you did for the `int[]` example above, with the `\set` meta-command. But you must use dollar quotes because the literal itself has an interior single quote. + +```plpgsql +\set canonical_literal '$$':result_text_typecast'$$'::text[] +\echo :canonical_literal +``` +The `\echo` meta-command now shows this: +``` +$${a,"a b",(),",",',"\"","\\"}$$::text[] +``` +Next, use the canonical literal to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` +Again, it shows this: +``` + v1 = v2 +--------- + true +``` +So, again as promised, the canonical form of the array literal does indeed recreate the identical value that the `array[]` constructor created. + +### One-dimensional array of timestamp values + +This example demonstrates the principle: + +```plpgsql +create table t(k serial primary key, v1 timestamp[], v2 timestamp[]); +insert into t(v1) values (array[ + '2019-01-27 11:48:33'::timestamp, + '2020-03-30 14:19:21'::timestamp + ]); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` +The `\echo` meta-command shows this: + +``` +{"2019-01-27 11:48:33","2020-03-30 14:19:21"} +``` +You learn one further rule from this: + +- The `::timestamp` typecastable strings within the curly braces are tightly surrounded with double quotes. + +To use the text of the literal that was produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\'::timestamp[]' +\echo :canonical_literal +``` +. The `\echo` meta-command now shows this: +``` +'{"2019-01-27 11:48:33","2020-03-30 14:19:21"}'::timestamp[] +``` +Next, use the canonical literal to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` +It shows this: +``` + v1 = v2 +--------- + true +``` +Once again, as promised, the canonical form of the array literal does indeed recreate the identical value that the `array[]` constructor created. + +### One-dimensional array of boolean values (and NULL in general) + +This example demonstrates the principle: + +```plpgsql +create table t(k serial primary key, v1 boolean[], v2 boolean[]); +insert into t(v1) values (array[ + true, + false, + null + ]); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` +The `\echo` meta-command shows this: + +``` +{t,f,NULL} +``` +You learn two further rules from this: + +- The canonical representations of `TRUE` and `FALSE` within the curly braces for a `boolean[]` array are `t` and `f`. They are not surrounded by double quotes. +- To specify `NULL`, the canonical form uses upper case `NULL` and does not surround this with double quotes. + +Though the example doesn't show this, `NULL` is not case-sensitive. But to compose a literal that adheres to canonical form, you ought to spell it using upper case. And this is how you specify `NULL` within the array literal for _any_ data type. (A different rule applies for fields within the literal for _"row"_ type value). + +**Note:** If you surrounded `NULL` within a literal for a `text[]` array, then it would be silently interpreted as an ordinary `text` value that just happens to be spelled that way. + +To use the literal that was produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\'::boolean[]' +\echo :canonical_literal +``` +. The `\echo` meta-command now shows this: +``` +'{t,f,NULL}'::boolean[] +``` +Next use the canonical literal to update _"t.v2"_ to can confirm that the value that the row constructor created has been recreated : +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` +It shows this: +``` + v1 = v2 +--------- + true +``` +Yet again, as promised, the canonical form of the array literal does indeed recreate the identical value that the `array[]` constructor created. + +### Multidimensional array of int values + +```plpgsql +create table t(k serial primary key, v int[]); + +-- Insert a 1-dimensional int[] value. +insert into t(v) values(' + {1, 2} + '::int[]); + +-- Insert a 2-dimensional int[] value. +insert into t(v) values(' + { + {1, 2}, + {3, 4} + } + '::int[]); + +-- Insert a 3-dimensional int[] value. +insert into t(v) values(' + { + { + {1, 2}, {3, 4} + }, + { + {5, 6}, {7, 8} + } + } + '::int[]); + +-- Insert a 3-dimensional int[] value, specifying +-- the lower and upper bounds along each dimension. +insert into t(v) values(' + [3:4][5:6][7:8]= + { + { + {1, 2}, {3, 4} + }, + { + {5, 6}, {7, 8} + } + } + '::int[]); + +select k, array_ndims(v) as "ndims", v::text as "v::text" from t order by k; +``` +Notice that the three different `INSERT` statements define arrays with different dimensionality, as the comments state. This illustrates what was explained in [Synopsis](../#synopsis): the column "_t.v"_ can hold array values of _any_ dimensionality. + +Here is the `SELECT` result: + +``` + k | ndims | v::text +---+-------+----------------------------------------------- + 1 | 1 | {1,2} + 2 | 2 | {{1,2},{3,4}} + 3 | 3 | {{{1,2},{3,4}},{{5,6},{7,8}}} + 4 | 3 | [3:4][5:6][7:8]={{{1,2},{3,4}},{{5,6},{7,8}}} +``` +Again, whitespace in the inserted literals for numeric values is insignificant, and the `text` typecasts use whitespace (actually, the lack thereof) conventionally. + +The literal for a multidimensional array has nested `{}` pairs, according to the dimensionality, and the innermost pair contains the literals for the primitive values. + +Notice the spelling of the array literal for the row with _"k = 4"_. The optional syntax `[3:4][5:6][7:8]` specifies the lower and upper bounds, respectively, for the first, the second, and the third dimension. This is the same syntax that you use to specify a slice of a array. ([array slice operator](../../functions-operators/slice-operator)) is described in its own section.) When the freedom to specify the bounds is not exercised, then they are assumed all to start at `1`, and then the canonical form of the literal does not show the bounds. + +When the freedom is exercised, the bounds for _every_ dimension must be specified. Specifying the bounds gives you, of course, an opportunity for error. If the length along each axis that you (implicitly) specify doesn't agree with the lengths that emerge from the actual values listed between the surrounding outer `{}` pair, then you get the _"22P02 invalid_text_representation"_ error with this prose explanation: + +``` +malformed array literal... +Specified array dimensions do not match array contents. +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-rows.md b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-rows.md new file mode 100644 index 000000000000..ad00dc4dd214 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/array-of-rows.md @@ -0,0 +1,241 @@ +--- +title: The literal for an array of rows +linkTitle: Array of rows +headerTitle: The literal for an array of "row" type values +description: The literal for an array of "row" type values +menu: + preview_api: + identifier: array-of-rows + parent: array-literals + weight: 40 +type: docs +--- + +You now combine the understanding of how to write the literal for an array of primitive values with that of how to write the literal for a _"row"_ type value. + +This section uses the same approach as these sections: [The literal for an array of primitive values](../array-of-primitive-values/) and [The literal for a _"row"_ type value](../row/). First, it states the rules, and then it illustrates these with examples. + +## Statement of the rules + +Just as in [Statement of the rules](../array-of-primitive-values/#statement-of-the-rules) that stated the rules for literals for an array of primitive values, the statement of these rules depends on understanding the notion of the canonical form of a literal. + +If you follow the rules that are stated here and illustrated in the demonstration below, then you will always produce a syntactically valid literal which expresses the semantics that you intend. There are many other legal variants—especially because of the freedoms for `text[]` values. This can also produce the result that you intend. However, these rules will not be documented because it is always sufficient to create your literals in canonical form. + +The sufficient set of rules can be stated tersely: + +- Start off with the opening left curly brace. + +- First, prepare the literal for each _"row"_ type value according to the rules set out in [The literal for a _"row"_ type value](../row/). + +- Then, understand that when these are used within the literal for _"row"_ type value within the literal for an array, the _"row"_ must itself be surrounded with double quotes, just like is the rule for, say, `timestamp` values or `text` values that include spaces or other troublesome characters. + +- Then understand that this implies that any occurrences of double quotes and backslashes within the surrounding parentheses of the _"row"_ type literal must be escaped a second time: _double-quote_ becomes _backslash-double-quote_; and _backslash_ becomes _backslash-backslash_. + +- Therefore, to avoid wrongly escaping the double quotes that will surround the parentheses, + + - _first_, do the inner escaping + + - _and only then_, surround the complete representation for the _"row"_ type value with unescaped double quotes. + +- Finish off with the closing right curly brace. + +These rules are presented in [Pseudocode for generating the literal for a one-dimensional array of "row" type values](./#pseudocode-for-generating-the-literal-for-a-one-dimensional-array-of-row-type-values). + +## Example to illustrate the rules + +The example uses a _"row"_ type with four fields: an `int` field; a `text` field; a `timestamp` field; and a `boolean` field. This is enough to illustrate all of the rules. These "challenging" characters need particular care: +``` + , ( ) " \ +``` +First, create the _"row"_ type: +```plpgsql +create type rt as (n int, s text, t timestamp, b boolean); +``` +Next, you create a table with a column with data type _"rt"_ so that you can populate it with six rows that jointly, in their `text` fields, use all of the "challenging" characters listed above: +```plpgsql +create table t1(k int primary key, v rt); +``` +Finally, you populate the table by building the _"row"_ type values bottom-up using appropriately typed PL/pgSQL variables in a `DO` block and inspect the result. This technique allows the actual primitive values that were chosen for this demonstration so be seen individually as the ordinary SQL literals that each data type requires. This makes the code more readable and more understandable than any other approach. In other words, it shows that, for humanly written code, the usability of a value constructor for any composite value is much greater than that of the literal that produces the same value. Of course, this benefit is of no consequence for a programmatically constructed literal. +```plpgsql +do $body$ +declare + n1 constant int := 1; + s1 constant text := ' '; + t1 constant timestamp := '2091-01-20 12:10:05'; + b1 constant boolean := true; + + n2 constant int := 2; + s2 constant text := ','; + t2 constant timestamp := '2002-01-20 12:10:05'; + b2 constant boolean := false; + + n3 constant int := 3; + s3 constant text := '('; + t3 constant timestamp := '2003-01-20 12:10:05'; + b3 constant boolean := null; + + n4 constant int:= 4; + s4 constant text := ')'; + t4 constant timestamp := '2004-01-20 12:10:05'; + b4 constant boolean := true; + + n5 constant int:= 5; + s5 constant text := '"'; + t5 constant timestamp := '2005-01-20 12:10:05'; + b5 constant boolean := false; + + n6 constant int:= 6; + s6 constant text := '\'; + t6 constant timestamp := '2006-01-20 12:10:05'; + b6 constant boolean := null; +begin + insert into t1(k, v) values + (1, (n1, s1, t1, b1)), + (2, (n2, s2, t2, b2)), + (3, (n3, s3, t3, b3)), + (4, (n4, s4, t4, b4)), + (5, (n5, s5, t5, b5)), + (6, (n6, s6, t6, b6)); +end; +$body$; + +select v::text as lit from t1 order by k; +``` + +This is the result: +``` + lit +---------------------------------- + (1," ","2091-01-20 12:10:05",t) + (2,",","2002-01-20 12:10:05",f) + (3,"(","2003-01-20 12:10:05",) + (4,")","2004-01-20 12:10:05",t) + (5,"""","2005-01-20 12:10:05",f) + (6,"\\","2006-01-20 12:10:05",) +``` + +The `int` field and the `timestamp` field are unremarkable given only that you understand that the representation of the `timestamp` values, in order to meet the canonical form requirement, must be double-quoted. The `boolean` fields are unremarkable, too, as long as you remember that `NULL` is represented by leaving no space between the delimiters that surround that field. This leaves just the `text` fields for consideration. Here are the field representations themselves, without the clutter of the delimiters: +``` + " " "," "(" ")" """" "\\" +``` + +The first four are unremarkable, as long as you remember that each of these four single characters, as shown at the start, must be ordinarily surrounded by double quotes. That leaves just the last two: + +- The single double quote occurrence, in the source data, must be doubled up and then surrounded by double quotes. +- The single backslash occurrence, in the source data, must be doubled up and then surrounded by double quotes. + +Next, you concatenate these six _"row"_ type values into an array value by using the `array_agg()` function (described in [`array_agg()`](../../functions-operators/array-agg-unnest/#array-agg)), like this: +```plpgsql +select array_agg(v order by k) from t1; +``` + +The demonstration is best served by inserting this value into a new table, like this: +```plpgsql +create table t2(k int primary key, v1 rt[], v1_text_typecast text, v2 rt[]); +insert into t2(k, v1) +select 1, array_agg(v order by k) from t1; +``` + +The `\get` technique that you used in the earlier sections is not viable here because there's an upper limit on its size. So, instead insert the literal that you produce by `text` typecasting _"t2.v1"_ into the companion _"v1_text_typecast"_ field in the same table, like this: + + +```plpgsql +update t2 set v1_text_typecast = +(select v1::text from t2 where k = 1); +``` +Finally, use this array literal to recreate the original value and check that it's identical to what you started with, thus: +```plpgsql +update t2 set v2 = +(select v1_text_typecast from t2 where k = 1)::rt[]; + +select (v1 = v2)::text as "v1 = v2" from t2 where k = 1; +``` +As promised, the canonical form of the array literal does indeed recreate the identical value that the `array_agg()` function created: + +``` + v1 = v2 +--------- + true +``` + +You haven't yet looked at the literal for the array of _"row"_ type values. Now is the moment to do so, thus: +```plpgsql +select v1_text_typecast from t2 where k = 1; +``` +The result that's produced is too hard to read without some manual introduction of whitespace. But this is allowed around the commas that delimit successive values within an array literal, thus: + +``` +{ + "(1,\"a \",\"2091-01-20 12:10:05\",t)", + "(2,\", \",\"2002-01-20 12:10:05\",f)", + "(3,\"( \",\"2003-01-20 12:10:05\",)", + "(4,\" )\",\"2004-01-20 12:10:05\",t)", + "(5,\"\"\"\",\"2005-01-20 12:10:05\",f)", + "(6,\"\\\\\",\"2006-01-20 12:10:05\",)" +} +``` + +With some effort, you'll see that this is indeed the properly formed canonical representation for the literal for an array of _"row"_ type values that the rules set out above specify. + +## Multidimensional array of "row" type values + +You can work out the rules for a multidimensional array of _"row"_ type values, should you need these, by straightforward induction from what has already been explained this enclosing section. + +## Pseudocode for generating the literal for a one-dimensional array of "row" type values + +This pseudocode shows how to create an array literal of _"row"_ type values that have the same shape as _"type rt"_ in the example above. The input is a succession of an arbitrary number of _"(n, s, t, b)"_ tuples. The text below was derived by straightforward manual massage from actual working, and tested, Python code. The code was written as an exercise to verify the correctness of the algorithm. + +The pseudocode does retain Python locutions, but don't be distracted by this. The meaning is clear enough to allow the algorithm to be described. The various special characters were all set up as manifest constants with self-describing names. + +Notice that the algorithm inserts a newline after the opening curly brace, between the pairs of representations of each _"row"_ type value, and before the closing curly brace. While, strictly speaking, this means that the literal it produces is not in canonical form, this has no effect (as has been shown many times by example throughout this _"Array data types and functionality"_ major section). + +``` +"Start a new array literal": + wip_literal = lft_crly_brace + nl + +"For each next (n, s, t, b) tuple that defines a "row" type value": + curr_rec = dbl_quote + lft_parens + + # Field "n" maps to a SQL numeric + if n is None: + curr_rec += comma + else: + curr_rec += (str(n) + comma) + + # Field "s" maps to a SQL text. + if s is None: + curr_rec += comma + else: + # First, do the escaping needed for any stringy value + # as field in record literal value. + s = s.replace(bk_slash, two_bk_slashes) + s = s.replace(dbl_quote, two_dbl_quotes) + s = dbl_quote + s + dbl_quote + + # Next, do the escaping to fix the bare record representation + # for use as a array element. + s = s.replace(bk_slash, two_bk_slashes) + s = s.replace(dbl_quote, bk_slash_dbl_quote) + curr_rec += (s + comma) + + # Field "t" maps to a SQL timestamp. + if t is None: + curr_rec += comma + else: + curr_rec += (bk_slash_dbl_quote + t + bk_slash_dbl_quote + comma) + + # Field "b" maps to a SQL boolean. + # It's the last field, do nothing if it's neither "t" nor "f" + if (b == "t" or b == "f"): + curr_rec += b + + # Now there are no more inout tuples. + curr_rec = curr_rec + rgt_parens + dbl_quote + wip_literal = wip_literal + curr_rec + comma + nl + +# Now there are no more input tuples. +"Finish off": + # Remove the final (comma + nl), put the nl back, + # and add the closing curly brace. + wip_literal = wip_literal[:-2] + nl + rgt_crly_brace +``` diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/literals/row.md b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/row.md new file mode 100644 index 000000000000..4a35e8f6ad51 --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/row.md @@ -0,0 +1,350 @@ +--- +title: The literal for a row +linkTitle: Row +headerTitle: The literal for a "row" type value +description: The literal for a "row" type value +menu: + preview_api: + identifier: row + parent: array-literals + weight: 30 +type: docs +--- + +The word "row" has two different uses; but these uses are really different sides of the same coin. A row in a schema-level table is actually an occurrence of a _"row"_ type—in other words, a _"row"_ type value. In this case, the schema-level _"row"_ type is created automatically as a side effect of executing the `CREATE TABLE` statement. It has the same name as the table. (This is allowed because tables and types are in different namespaces.) Further, a column in a schema-level table can have a user-defined _"row"_ type as its data type, and in this case the _"row"_ type need not be partnered with a table. + +You might see the term _"record"_ when you use the `\df` meta-command to show the signature of a function. Briefly, it's an anonymous _"row"_ type. You produce a record instance when you use a literal that has the correct form of a _"row"_ type but when you omit the typecast operator. If you adhere to recommended practice, and always explicitly typecast such literals, then you needn't try to understand what a record is. + +You can read more about these notions in the PostgreSQL documentation here: + +- section [43.3.4. Row Types](https://www.postgresql.org/docs/15/plpgsql-declarations.html#PLPGSQL-DECLARATION-ROWTYPES) + +- Section [43.3.5. Record Types](https://www.postgresql.org/docs/15/plpgsql-declarations.html#PLPGSQL-DECLARATION-RECORDS) + +You need first to understand how to write a literal for a _"row"_ type value before you can understand, as [The literal for an array of "row" type values](../array-of-rows/) explains, how to write the literal for an array of such values. + +This section uses the same approach as [The literal for an array of primitive values](../array-of-primitive-values/): first it states the rules; and then it illustrates these with examples. + +## Statement of the rules + +Just as in [Statement of the rules](../array-of-primitive-values/#statement-of-the-rules) in the _"The literal for an array of primitive values"_ section, the statement of these rules depends on understanding the notion of the canonical form of a literal. + +If you follow the rules that are stated here, and illustrated in the demonstrations below, then you will always produce a syntactically valid literal which expresses the semantics that you intend. It turns out that many other variants, especially for `text[]` values, are legal and can produce the result that you intend. However, the rules that govern these exotic uses will not be documented because it is always sufficient to create your literals in canonical form. + +Here is the sufficient set of rules. + +- The commas that delimit successive values, and opening and closing parentheses, must not be surrounded by whitespace. +- Do _not_ surround the individual representations of numeric and `boolean` primitive values with double quotes. +- _Do_ surround the individual representations of `timestamp` values with double quotes, even though this is not strictly necessary. +- _Do_ surround every individual representation of a `text` value with double quotes, even though this is not always necessary. It _is_ necessary for any value that itself contains, as ordinary text, any whitespace or any of the characters that have syntactic significance within the outermost curly brace pair. This is the list: + +```output + ( ) , " \ +``` + +- It's sufficient then to write all special characters ordinarily within the enclosing double quotes except for each of the double quote character itself and the backslash character. These must be escaped. The double quote character is escaped by doubling it up. And the backslash character is escaped with an immediately preceding single backslash. + +- To specify that the value for a field is `NULL` , you must leave no whitespace between the pair of delimiters (Left parenthesis, comma, or right parenthesis) that surround its position. (This is the only choice.) + +## Always write array literals in canonical form + +Exactly the same considerations apply here as were explained in [Always write array literals in canonical form](../array-of-primitive-values/#always-write-array-literals-in-canonical-form) in the section that explained the rules for literals for an array of primitive values. + +## Examples to illustrate the rules + +It will be sufficient to consider _"row"_ types with fields of just these data types: + +- numeric data types (like `int` and `numeric`) +- stringy data types (like `text`, `varchar`, and `char`) +- date-time data types (like `timestamp`) +- the `boolean` data type. + +Use the _"row"_ type constructor to create representative values of each kind and inspect its `::text` typecast. + +### "Row" type with int fields + +This example demonstrates the principle: + +```plpgsql +create type row_t as (f1 int, f2 int, f3 int); +create table t(k serial primary key, v1 row_t, v2 row_t); +insert into t(v1) values (row(1, 2, 3)::row_t); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` + +The keyword `ROW` names the _"row"_ type constructor function. It is optional, but is used here for emphasis. + +The `\gset` meta-command was used first in this _"Array data types and functionality"_ major section in [`array_agg()` and `unnest()`](../../functions-operators/array-agg-unnest). + +Notice that, in this example, the `SELECT` statement is terminated by the `\gset` meta-command on the next line rather than by the usual semicolon. The `\gset` meta-command is silent. The `\echo` meta-command shows this: + +```output +(1,2,3) +``` + +In this case, the value of the `::text` typecast has the identical form to that of the _"row"_ type constructor. But, as is seen below, this is not generally the case. + +You can see the general form already: + +- The (_text_ of) a _"row"_ type literal starts with the left parenthesis and ends with the right parenthesis. + +- The items within the parentheses are delimited by commas, and there is no space between one item, the comma, and the next item. Nor is there any space between the left parenthesis and the first item or between the last item and the right parenthesis. + +The next section, [_"Row"_ type with `text` fields](./#row-type-with-text-fields), shows that more needs to be said. But the two rules that you have already noticed always hold. + +To use the text of the literal that was produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\''::row_t +\echo :canonical_literal +``` + +. The `\echo` meta-command now shows this: + +```output +'(1,2,3)'::row_t +``` + +Next, use the canonical literal that you produced to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: + +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` + +It shows this: + +```output + v1 = v2 +--------- + true +``` + +As promised, the canonical form of the _"row"_ type literal does indeed recreate the identical value that the _"row"_ type constructor created. + +### "Row" type with text fields + +Use [_"Row"_ type with `int` fields](./#row-type-with-text-fields) as a template for this and the subsequent sections. The example sets array values each of which, apart from the single character `a`, needs some discussion. These are the characters (or, in one case, character sequence), listed here "bare" and with ten spaces between each: + +```output + a ' a b () , " \ null +``` + +```plpgsql +create type row_t as (f1 text, f2 text, f3 text, f4 text, f5 text, f6 text, f7 text, f8 text); +create table t(k serial primary key, v1 row_t, v2 row_t); +insert into t(v1) values ( + ('a', $$'$$, 'a b', '()', ',', '"', '\', null)::row_t); + +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` + +Here, the `ROW` keyword in the _"row"_ type constructor function is omitted to emphasize its optional status. + +The `\echo` meta-command shows this: + +```output +(a,',"a b","()",",","""","\\",) +``` + +This is rather hard (for the human) to parse. To make the rules easier to see, the syntactically significant commas are surrounded with three spaces on each side: + +```output +( a , ' , "a b" , "()" , "," , """" , "\\" , ) +``` + +**Note:** The introduction of spaces here, to help readability, is done _only_ for that purpose. Unlike is the case for an array literal, doing this actually affects the value that the literal produces. You will demonstrate this at the end of this section. + +In addition to the first two rules, you notice the following. + +- Double quotes are used to surround a value that includes any spaces. (Though the example doesn't show it, this applies to leading and trailing spaces too.) +- The comma _has_ been surrounded by double quotes. This is because it _does_ have syntactic significance, as the value delimiter, within the parentheses of a _"row"_ type literal. +- The parentheses _have_ been surrounded by double quotes. This is because these _do_ have syntactic significance. +- The single quote is _not_ surrounded with double quotes. Though it has syntactic significance in other parsing contexts, it is insignificant within the parentheses of a _"row"_ type literal. This holds, also, for all sorts of other punctuation characters like `;` and `:` and `[` and `]` and so on. +- The double quote has been escaped by doubling it up and this has been then surrounded with double quotes. This is because it _does_ have syntactic significance, as the (one and only) quoting mechanism, within the parentheses of a _"row"_ type literal. +- The backslash has also been escaped with another single backslash and this has been then surrounded with double quotes. This is because it _does_ have syntactic significance, as the escape character, within the parentheses of a _"row"_ type literal. +- `NULL` is represented in a _"row"_ type literal by the _absence_ of any characters between two successive delimiters: between the left parenthesis and the first comma, between two successive commas, or between the last comma and the right parenthesis. + +There's another rule that the present example does not show. Though not every comma-separated value was surrounded by double quotes, it's _never harmful_ to do this. You can confirm this with your own test, Yugabyte recommends that, for consistency, you always surround every `text` value within the parentheses of a _"row"_ type literal with double quotes. + +To use the text of the literal that was produced to create a value, you must enquote it and typecast it. Do this, as you did for the `int` example above, with the `\set` meta-command. But you must use dollar quotes because the literal itself has an interior single quote. + +```plpgsql +\set canonical_literal '$$':result_text_typecast'$$'::row_t +\echo :canonical_literal +``` + +The `\echo` meta-command now shows this: + +```output +$$(a,',"a b","()",",","""","\\",)$$::row_t +``` + +Next, use the canonical literal that you produced to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: + +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` + +It shows this: + +```output + v1 = v2 +--------- + true +``` + +So, again as promised, the canonical form of the array literal does indeed recreate the identical value that the _"row"_ type constructor created. + +Finally in this section, consider the meaning-changing effect of surrounding the comma delimiters with whitespace. Try this: + +```plpgsql +create type row_t as (f1 text, f2 text, f3 text); +select '( a , "(a b)" , c )'::row_t;; +``` + +It shows this: + +```output + (" a "," (a b) "," c ") +``` + +You understand this by realizing that the entire run of characters between a pair of delimiters is taken as the value. And double quotes act as an _interior_ escaping mechanism. This model holds when, _but only when_, the value between a pair of delimiters is interpreted as a `text` value (because this is the data type of the declared _"row"_ type field at this position). + +This rule is different from the rule for an array literal. It's also different from the rules for JSON documents. In these cases, the value is entirely _within_ the double quotes, and whitespace around punctuation characters outside of the double-quoted values is insignificant. + +**Note:** There is absolutely no need to take advantage of this understanding. Yugabyte recommends that you always use the "almost-canonical" form of the literal—in other words, you surround every single `text` value with double quotes, even when these are not needed, and you allow no whitespace between these double-quoted values and the delimiter at the start an end of each such value. + +### "Row" type with timestamp fields + +This example demonstrates the principle: + +```plpgsql +create type row_t as (f1 timestamp, f2 timestamp); +create table t(k serial primary key, v1 row_t, v2 row_t); +insert into t(v1) values (('2019-01-27 11:48:33', '2020-03-30 14:19:21')::row_t); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` + +The `\echo` meta-command shows this: + +```output +("2019-01-27 11:48:33","2020-03-30 14:19:21") +``` + +To use the text of the literal that was produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\''::row_t +\echo :canonical_literal +``` + +The `\echo` meta-command now shows this: + +```output +'("2019-01-27 11:48:33","2020-03-30 14:19:21")'::row_t +``` + +Next, use the canonical literal that you produced to update _"t.v2"_ to confirm that you have recreated the value that the row constructor created: + +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` + +It shows this: + +```output + v1 = v2 +--------- + true +``` + +Once again, as promised, the canonical form of the array literal does indeed recreate the identical value that the _"row"_ type constructor created. + +### "Row" type with boolean fields + +This example demonstrates the principle: + +```plpgsql +create type row_t as (f1 boolean, f2 boolean, f3 boolean); +create table t(k serial primary key, v1 row_t, v2 row_t); +insert into t(v1) values ((true, false, null)::row_t); +select v1::text as text_typecast from t where k = 1 +\gset result_ +\echo :result_text_typecast +``` + +The `\echo` meta-command shows this: + +```output + (t,f,) +``` + +To use the text of the literal that was produced to create a value, you must enquote it and typecast it. Do this with the `\set` meta-command: + +```plpgsql +\set canonical_literal '\'':result_text_typecast'\''::row_t +\echo :canonical_literal +``` + +The `\echo` meta-command now shows this: + +```output +'(t,f,)'::row_t +``` + +Next, use the canonical literal that you produced to update _"t.v2"_ to confirm that the value that the row constructor created was recreated: + +```plpgsql +update t set v2 = :canonical_literal where k = 1; +select (v1 = v2)::text as "v1 = v2" from t where k = 1; +``` + +It shows this: + +```output + v1 = v2 +--------- + true +``` + +Yet again, as promised, the canonical form of the array literal does indeed recreate the identical value that the _"row"_ type constructor created. + +## Further examples + +There are other cases of interest like this: + +- a _"row"_ type whose definition include one or more fields whose data types are other user-defined _"row"_ types. + +The rules for such cases can be determined by induction from the rules that this section has stated and illustrated. + +## "Row" type literal versus "row" type constructor + +The two notions, _type constructor_ and _literal_, are functionally critically different. You can demonstrate the difference using a `DO` block, because this lets you use a declared variable. It's more effort to do this using a SQL statement because you'd have to use a scalar subquery in place of the PL/pgSQL variable. The `ROW` keyword is deliberately omitted here to emphasize its optional status. + +```plpgsql +create type rt as (n numeric, s text, t timestamp, b boolean); + +do $body$ +declare + n constant numeric := 42.17; + s constant text := 'dog house'; + t constant timestamp := '2020-04-01 23:44:13'; + b constant boolean := true; + r1 constant rt := (n, s, t, b)::rt; + r2 constant rt := '(42.17,"dog house","2020-04-01 23:44:13",t)'::rt; +begin + assert r1 = r2, 'unexpected'; +end; +$body$; +``` + +You can use the _"row"_ type constructor as an expression in the [`array[]` value constructor](../../array-constructor)). But, of course, you can use only the literal for a _"row"_ type value within the _literal_ for an array. [The literal for an array of _"row"_ type values](../array-of-rows/) explains this. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/literals/text-typecasting-and-literals.md b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/text-typecasting-and-literals.md new file mode 100644 index 000000000000..0599fcee74ec --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/literals/text-typecasting-and-literals.md @@ -0,0 +1,269 @@ +--- +title: text typecast of a value, literal for that value, and how they are related +linkTitle: Text typecasting and literals +headerTitle: The text typecast of a value, the literal for that value, and how they are related +description: The text typecast of a value, the literal for that value, and how they are related +menu: + preview_api: + identifier: text-typecasting-and-literals + parent: array-literals + weight: 5 +type: docs +--- + +This section establishes some basic notions that have a much broader scope of applicability than just arrays. But, because using array literals rests on these notions, they are summarized here. + +## The non-lossy round trip: value to text typecast and back to value + +Consider this pattern: +``` +do $body$ +declare + original constant not null := ; + text_cast constant text not null := original::text; + recreated constant not null := text_cast::; +begin + assert + (recreated = original), + 'assert failed'; +end; +$body$; +``` + +It demonstrates a universal rule that YSQL inherits from PostgreSQL: + +- Any value of any data type, primitive or composite, can be `::text` typecast. Similarly, there always exists a `text` value that, when properly spelled, can be typecast to a value of any desired data type, primitive or composite. +- If you `::text` typecast a value of any data type and then typecast that `text` value to the original value's data type, then the value that you get is identical to the original value. + +The following `DO` block applies the pattern using a representative range of both primitive and composite data types. (The data type `text`, as the degenerate case, is not included.) It also displays the value of the `::text` typecast for each data type. + +Notice that the last test uses an array whose data type is the user-created `DOMAIN` _"int_arr_t"_. [Using an array of `DOMAIN` values](../../array-of-domains/) explains this notion. This is a real stress-tests of the rule. + +```plpgsql +-- Needed by the '1-d array of "row" type values' test. +create type rt as (n numeric, s text, t timestamp, b boolean); + +-- Needed by the 'Ragged array' test. +create domain int_arr_t as int[]; + +do $body$ +begin + -- numeric + declare + original constant numeric not null := 42.1763; + text_cast constant text not null := original::text; + recreated constant numeric not null := text_cast::numeric; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'numeric: %', text_cast; + end; + + -- timestamp + declare + original constant timestamp not null := now()::timestamp; + text_cast constant text not null := original::text; + recreated constant timestamp not null := text_cast::timestamp; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'timestamp: %', text_cast; + end; + + -- timestamp with timezone + declare + original constant timestamptz not null := now()::timestamptz; + text_cast constant text not null := original::text; + recreated constant timestamptz not null := text_cast::timestamp; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'timestamptz: %', text_cast; + end; + + -- boolean + declare + original constant boolean not null := true; + text_cast constant text not null := original::text; + recreated constant boolean not null := text_cast::boolean; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'boolean: %', text_cast; + end; + + -- "row" type + declare + original constant rt not null := row(42.1763, 'dog house', now(), true); + text_cast constant text not null := original::text; + recreated constant rt not null := text_cast::rt; + begin + assert + (recreated = original), + 'assert failed'; + raise info '"row" type: %', text_cast; + end; + + -- 2-d array + declare + original constant int[] not null := array[array[1, 2], array[3, 4]]; + text_cast constant text not null := original::text; + recreated constant int[] not null := text_cast::int[]; + begin + assert + (recreated = original), + 'assert failed'; + raise info '2-d array %', text_cast; + end; + + -- 1-d array of "row" type values + declare + original constant rt[] not null := + array[ + row(42.1763, 'dog house', now(), true), + row(19.8651, 'cat flap', now() + interval '1' day, false) + ]; + text_cast constant text not null := original::text; + recreated constant rt[] not null := text_cast::rt[]; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'array of "row" type: %', text_cast; + end; + + -- Ragged array: 1-d array of 1-da arrays of different lengths. + declare + arr_1 constant int_arr_t not null := array[1, 2]; + arr_2 constant int_arr_t not null := array[3, 4, 5]; + original constant int_arr_t[] not null := array[arr_1, arr_2]; + text_cast constant text not null := original::text; + recreated constant int_arr_t[] not null := text_cast::int_arr_t[]; + begin + assert + (recreated = original), + 'assert failed'; + raise info 'array of arrays: %', text_cast; + end; +end; +$body$; +``` +It produces this result (after manually removing the _"INFO:"_ prompt on each output line. +``` +numeric: 42.1763 +timestamp: 2020-05-03 22:25:42.932771 +timestamptz: 2020-05-03 22:25:42.932771-07 +boolean: true +"row" type: (42.1763,"dog house","2020-05-03 22:25:42.932771",t) +2-d array {{1,2},{3,4}} +array of "row" type: {"(42.1763,\"dog house\",\"2020-05-03 22:25:42.932771\",t)","(19.8651,\"cat flap\",\"2020-05-04 22:25:42.932771\",f)"} +array of arrays: {"{1,2}","{3,4,5}"} +``` +[Multidimensional array of `int` values](../array-of-primitive-values/#multidimensional-array-of-int-values) explains the syntax of the _2-d array_ `text` value. + +[The literal for a _"row"_ type value](../row/) explains the syntax of the _"row" type_ `text` value. + +And [The literal for an array of "row" type values](../array-of-rows/) explains the syntax of the value: array of _"row" type_ `text` value. + +Notice how the syntax for the _array of arrays_ `text` value compares with the syntax for the _2-d array_ `text` value. Because the _array of arrays_ is ragged, the two inner `{}` pairs contain respectively two and three values. To distinguish between this case and the ordinary rectilinear case, the inner `{}` pairs are surrounded by double quotes. + +## boolean values show special text forms in ysqlsh + +Try this: +```plpgsql +select true as "bare true", true::text as "true::text"; +``` +This is the result: +``` + bare true | true::text +-----------+------------ + t | true +``` +For all but `boolean` values, the string of characters that `ysqlsh` uses to display any value is the `::text` typecast of that value. (After all, the only feasible means of display is strings of characters.) But uniquely for the two `boolean` values denoted by the keywords `TRUE` and `FALSE` it uses the single characters `t` and `f` rather than their `::text` typecasts—unless you explicitly write the typecast. + +This behavior is inherited from `psql`. + +You saw above that even when you explicitly `::text` typecast a composite value, `TRUE` and `FALSE` are represented as `t` and `f`. You can't influence this outcome because it has to do with the rules for deriving the `text` of the typecast and _not_ with the convention that `ysqlsh` uses. This asymmetry was established many years ago, and it will not change. + +## The relationship between the text typecast of a value and the literal that creates that value + +Try this in `ysqlsh`: +```plpgsql +select + 42.932771::numeric as n, + 'cat'::text as t1, + $$dog's breakfast$$::text as t2, + array[1, 2, 3]::int[] as "int array"; +``` +It shows the result: +``` + n | t1 | t2 | int array +-----------+-----+-----------------+----------- + 42.932771 | cat | dog's breakfast | {1,2,3}} +``` + You won't be surprised by this. But you need to establish the proper terms of art that allow you to describe what's going on precisely and correctly. The remaining sections in [Creating an array value using a literal](../../literals/) rely on this. + +Consider this first: + +``` +42.932771::numeric +``` + +This is the literal that the SQL language (at least in the YSQL and PostgreSQL dialects) uses to establish the corresponding strongly-typed `numeric` value. (PL/pgSQL uses the same form for the same purpose.) But, to state the obvious, a SQL statement and a PL/pgSQL source are nothing but strings of characters. That means that, in the present context, this: + +``` +42.932771 +``` + +is the _text_ of the literal. + +Now consider these two: + +``` +'cat'::text $$dog's breakfast$$::text +``` + +The parsing rules of both SQL and PL/pgSQL (or more properly stated, the definitions of the grammars of these two languages) require that `text` literals are enquoted. Moreover, there are two syntactic mechanisms for doing this: the ordinary single quote; and so-called dollar quotes, where `$$` is a special case of the more general `$anything_you_want$`. You might think that the `::text` typecast is redundant here. But don't forget that the text of these literals might be used to establish `varchar` or `char` values. + +You see already, then, that the rules for composing a `numeric` literal and a `text` literal are different: + +- You compose a `numeric` literal by following the bare text that specifies the intended value with the `::numeric` typecast operator. + +- You compose a `text` literal by enquoting the bare text that specifies the intended value (however you choose to do the quoting) and by then following this with the `::text` typecast operator. + +(If you did enquote the bare text in a `numeric` literal, then you would _not_ see an error. Rather, you would get implicit but undesirable behavior: first, a genuine `text` value would be generated internally, and then, this, in turn, would be typecast to the `numeric` value.) + +You've already seen, informally, some examples of array literals. Here is the rule: + +- You compose the bare text that specifies the intended value by writing an utterance in a dedicated grammar that starts with the left curly brace and ends with the right curly brace. (This grammar is the focus of the remainder of [Creating an array value using a literal](../../literals/).) Then you enquote this bare text (however you choose to do the quoting) and then typecast it to the desired target array data type. + +These are three special cases of a more general rule. In some cases (for example in the literal for a _"row"_ type value) the enquoting mechanism might be optional (depending on the intended value) and, when written uses _double quote_ as the enquoting character. But here, too, the general rule is the same. The bare text that specifies the intended value can always be correctly written as the `::text` typecast of that value. + +## Stating the general rule + +Here is the general rule. + +- The literal for a value of any data type is the possibly enquoted bare text that specifies the intended value, followed by the typecast operator to the desired target data type. +- This rule is applied recursively, for the literal for a composite value, but with different actual rules at different levels of nesting. For example, the literal for an array value as a whole must be typecast. But, because the data type of every value in the array is already determined, the bare text that specifies these values is _not_ typecast. +- The `::text` typecast of any value can always be used as the bare text of the literal that will recreate that value. + +You can see examples of the text of the literal that creates an array value by creating the value using the constructor and then inspecting its `::text` typecast. But the safe way to create the text of a literal for an intended value is to understand the syntax and semantics that govern its composition. + +When this difference is important, the _"Array data types and functionality"_ major section distinguishes between: + +- (1) the _literal_ for the intended value (that is, the whole thing with the enquoting and the typecast, when one or both of these are needed); +- and (2) the _text of the literal_ for the intended value (that is, the bare text that specifies this value). + +**Note:** Often, the data type of the assignment target (for example, a field in a column in a schema-level table or a variable in a PL/pgSQL program) is sufficient implicitly to specify the intended typecast without writing the operator. But it's never harmful to write it. Moreover, in some cases, omitting the explicit typecast can bring performance costs. For this reason, Yugabyte recommends that you always write the explicit typecast unless you are certain beyond all doubt that omitting it brings no penalty. + +## Defining the "canonical form of a literal" + +The term _"canonical form"_ applies specifically to the _text of a literal_ rather than to the _literal as a whole_. But when the text of a literal is in canonical form, the literal as a whole, too, is in canonical form. + +The canonical form of the text of a literal that produces a specific value, of any data type, is the `::text` typecast of that value. + +Many of the examples in this _"Array data types and functionality"_ major section show that many spellings of the text of an array literal, in addition to the canonical form, will produce a particular intended target value. The differences are due to how whitespace, punctuation, and escape characters are used. diff --git a/docs/content/v2.25/api/ysql/datatypes/type_array/looping-through-arrays.md b/docs/content/v2.25/api/ysql/datatypes/type_array/looping-through-arrays.md new file mode 100644 index 000000000000..df32dad764fc --- /dev/null +++ b/docs/content/v2.25/api/ysql/datatypes/type_array/looping-through-arrays.md @@ -0,0 +1,697 @@ +--- +title: Looping through arrays in PL/pgSQL +linkTitle: FOREACH loop (PL/pgSQL) +headerTitle: Looping through arrays in PL/pgSQL +description: Looping through arrays in PL/pgSQL +menu: + preview_api: + identifier: looping-through-arrays + parent: api-ysql-datatypes-array + weight: 30 +type: docs +--- +The PL/pgSQL `FOREACH` loop brings dedicated syntax for looping over the contents of an array. + +## Overview + +**Note:** See [array_lower()](../functions-operators/properties/#array-lower), [array_upper()](../functions-operators/properties/#array-upper), [array_ndims()](../functions-operators/properties/#array-ndims) and [cardinality()](../functions-operators/properties/#cardinality) for descriptions of the functions that the following account mentions. It also mentions _"row-major order"_. See [Joint semantics](../functions-operators/properties/#joint-semantics), within the section _"Functions for reporting the geometric properties of an array"_, for an explanation of this term. [Syntax and semantics](./#syntax-and-semantics) shows where, in the `FOREACH` loop header, the `SLICE` keyword is used. + +- When the operand of the `SLICE` clause is `0`, and for the general case where the iterand array has any number of dimensions, YSQL assigns its successive values, in row-major order, to the loop iterator. Here, its effect is functionally analogous to that of [`unnest()`](../functions-operators/array-agg-unnest/#unnest). + +- For the special case where the iterand array is one-dimensional, the `FOREACH` loop is useful only when the operand of the `SLICE` clause is `0`. In this use, it is a syntactically more compact way to achieve the effect that is achieved with a `FOR var IN` loop like this: + + ``` + for var in array_lower(iterand_arr, 1)..array_upper(iterand_arr, 1) loop + ... iterand_arr[var] ... + end loop; + ``` + +- When the operand of the `SLICE` clause is greater than `0`, and when the dimensionality of the iterand array is greater than `1`, the `FOREACH` loop provides functionality that `unnest()` cannot provide. Briefly, when the iterand array has `n` dimensions and the operand of the `SLICE` clause is `s`, YSQL assigns _slices_ (that is, subarrays) of dimensionality `s` to the iterator. The values in such a slice are those from the iterand array that remain when the distinct values of the first `(n - s)` indexes are used to drive the iteration. These two pseudocode blocks illustrate the idea: + + ``` + -- In this example, the SLICE operand is 1. + -- As a consequence, array_ndims(iterator_array) is 1. + -- Assume that array_ndims(iterand_arr) is 4. + -- There are therefore (4 - 1) = 3 nested loops in this pseudocode. + for i in array_lower(iterand_arr, 1)..array_upper(iterand_arr, 1) loop + for j in array_lower(iterand_arr, 2)..array_upper(iterand_arr, 2) loop + for k in array_lower(iterand_arr, 3)..array_upper(iterand_arr, 3) loop + + the (i, j, k)th iterator_array is set to + iterand_arr[i][j][k][ for all values the remaining 4th index ] + + end loop; + end loop; + end loop; + ``` + + ``` + -- In this example, the SLICE operand is 3. + -- As a consequence, array_ndims(iterator_array) is 3. + -- Assume that array_ndims(iterand_arr) is 4. + -- There is therefore (4 - 3) = 1 nested loop in this pseudocode. + for i in array_lower(iterand_arr, 1)..array_upper(iterand_arr, 1) loop + + the (i)th iterator_array is set to + iterand_arr[i][ for all values the remaining 2nd, 3rd, and 4th indexes ] + + end loop; + ``` + +The examples below clarify the behavior of `FOREACH`. + +## Syntax and semantics + +``` +[ <
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ + +```sql +SET yb_transaction_priority_upper_bound = 0.4; +``` + +
+ +
+ +```sql +SET yb_transaction_priority_lower_bound = 0.6; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ + +```sql +select * from test where k=1 for update; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ +```sql +select * from test where k=1 for update; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+
+ + +```sql +select * from test; +``` + +```output +ERROR: Operation expired: Heartbeat: +Transaction 13fb5a0a-012d-4821-ae1d-5f7780636dd4 expired +or aborted by a conflict: 40001 +``` + +```sql +rollback; +``` + +
+ +```sql +commit; +``` + + +
+ +### Die example + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ + +```sql +SET yb_transaction_priority_lower_bound = 0.6; +``` + +
+ +
+ +```sql +SET yb_transaction_priority_upper_bound = 0.4; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ + +```sql +select * from test where k=1 for update; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ +```sql +select * from test where k=1 for update; +``` + +
+ +```output +ERROR: All transparent retries exhausted. could not serialize +access due to concurrent update +``` + +```sql +rollback; +``` + +
+
+ + +```sql +commit; +``` + +
+ +### Best-effort internal retries for first statement in a transaction + +Note that we see the error message `All transparent retries exhausted` in the preceding example because if the transaction T1, when executing the first statement, finds another concurrent conflicting transaction with equal or higher priority, then T1 will perform a few retries with exponential backoff before giving up in anticipation that the other transaction will be done in some time. The number of retries are configurable by the `yb_max_query_layer_retries` YSQL configuration parameter and the exponential backoff parameters are the same as the ones described in [Performance tuning](../read-committed/#performance-tuning). + +Each retry will use a newer snapshot of the database in anticipation that the conflicts might not occur. This is done because if the read time of the new snapshot is higher than the commit time of the earlier conflicting transaction T2, the conflicts with T2 would essentially be voided as T1 and T2 would no longer be "concurrent". + +Note that the retries will not be performed in case the amount of data to be sent from YSQL to the client proxy exceeds the TServer flag `ysql_output_buffer_size`. + +## Wait-on-Conflict + +This mode of concurrency control is applicable only for YSQL (where it is the default) and provides the same semantics as PostgreSQL. + +In this mode, transactions are not assigned priorities. If a conflict occurs when a transaction T1 tries to read, write, or lock a row in a conflicting mode with a few other concurrent transactions, T1 will **wait** until all conflicting transactions finish by either committing or rolling back. Once all conflicting transactions have finished, T1 will: + +1. Make progress if the conflicting transactions didn't commit any permanent modifications that conflict with T1. +2. Abort otherwise. + +`Wait-on-Conflict` behavior can be enabled by setting the YB-TServer flag `enable_wait_queues=true`, which will enable use of in-memory wait queues that provide waiting semantics when conflicts are detected between transactions. A rolling restart is needed for the flag to take effect. Without this flag set, transactions operate in the priority based `Fail-on-Conflict` mode by default. + +Because T1 can make progress only if the conflicting transactions didn't commit any conflicting permanent modifications, there are some intricacies in the behaviour. The list of exhaustive cases possible are detailed below in the Examples section. + +{{< note title="Best-effort internal retries also apply to Wait-on-Conflict policy" >}} + +The best-effort internal retries described in Fail-on-Conflict apply to Wait-on-Conflict policy as well. YugabyteDB provides this additional enhancement which is not supported by PostgreSQL. + +After a transaction T1 (that was waiting for other transactions) unblocks, it could be the case that some conflicting modifications were committed to the database. In this case, T1 has to abort. However, if its still the first statement that was being executed in T1, best-effort internal retries using a later snapshot of the database will be performed to possibly make progress. + +{{}} + +### Examples + +The following examples describe different use cases detailing the Wait-on-Conflict behavior. To run the examples, you need to do the following: + +1. Set the YB-TServer flag `enable_wait_queues=true`. +1. Set the per-session `yb_max_query_layer_retries=0` YSQL configuration parameter to disable internal query layer retries on conflict. This is done to illustrate the `Wait-on-Conflict` concurrency control semantics separately without query layer retries. It is not recommended to disable these retries in production. To set it at the cluster level, use the `ysql_pg_conf_csv` YB-TServer flag. + +A restart is necessary for the flags to take effect. + +Start by setting up the table you'll use in all of the examples in this section. + +```sql +create table test (k int primary key, v int); +insert into test values (1, 1); +insert into test values (2, 2); +``` + +#### Conflict between two explicit row-level locks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +select * from test where k=1 for update; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+
+ + +```sql +select * from test where k=1 for update; +``` + +
+ +```output +(waits) +``` + +
+ +```sql +commit; +``` + +(OR) + +```sql +rollback; +``` + + +
+ + +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +```sql +commit; +``` + +
+ +#### Explicit row-level lock followed by a conflicting write + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +select * from test where k=1 for share; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+
+ + +```sql +update test set v=1 where k=1; +``` + +```output +(waits) +``` + +
+ +```sql +commit; +``` + +(OR) + +```sql +rollback; +``` + + +
+ + +```sql +UPDATE 1 +``` + +
+ + +```sql +commit; +``` + +
+ +#### Write followed by a conflicting explicit row-level lock + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +update test set v=1 where k=1; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +select * from test where k=1 for share; +``` + +
+ +```output +(waits) +``` + +
+ +```sql +rollback; +``` + +(OR) + +```sql +commit; +``` + + +
+ + +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +```sql +commit; +``` + +(OR) + +```output +ERROR: All transparent retries exhausted. could not serialize +access due to concurrent update +``` + +```sql +rollback; +``` + +
+ +#### Write followed by a conflicting write + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +update test set v=1 where k=1; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +update test set v=1 where k=1; +``` + +```output +(waits) +``` + +
+ +```sql +rollback; +``` + +(OR) + +```sql +commit; +``` + + +
+ + +```output +UPDATE 1 +``` + +```sql +commit; +``` + +(OR) + +```output +ERROR: All transparent retries exhausted. Operation failed. +Try again: Value write after transaction start: { days: 19299 +time: 17:07:42.577762 } >= { days: 19299 time: 17:07:40.561842 }: +kConflict +``` + +```sql +rollback; +``` + +
+ +#### Wait queue jumping is allowed + +A transaction can jump the queue even if it does conflict with waiting transactions but doesn't conflict with any active transactions. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +select * from test where k=1 for share; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +
+ +
+ + +```sql +select * from test where k=1 for update; +``` + +
+ +```output +(waits for T1 to end...) +``` + +
+
+ + + +```sql +select * from test where k=1 for share; +``` + +
+ +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + +(Doesn't wait for T2 even though it conflicts with the explicit row-level lock T2 is waiting for) +
+ +```sql +commit; +``` + + + +
+ + + +```sql +commit; +``` + +
+ + +```output + k | v +---+--- + 1 | 1 +(1 row) +``` + + +
+ + +```sql +commit; +``` + + +
+ +#### Rollback of sub-transaction with conflicting write + +Suppose a transaction T1 is blocked on some operation of another transaction T2. If that blocking operation was part of a sub-transaction which is later rolled back, then T1 may proceed: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +savepoint a; +``` + +```sql +update test set v=1 where k=1; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +update test set v=1 where k=1; +``` + +```output +(waits) +``` + +
+ +```sql +rollback to savepoint a; +``` + + +
+ + +```output +UPDATE 1 +``` + +```sql +commit; +``` + +
+ +```sql +commit; +``` + + +
+ +### Distributed deadlock detection + +In the Wait-on-Conflict mode, transactions can wait for each other and result in a deadlock. By default, any cluster with wait queues enabled will be running a distributed deadlock detection algorithm in the background to detect and break deadlocks. It's possible to explicitly disable deadlock detection by setting the YB-TServer flag `disable_deadlock_detection=true`, but this is generally not recommended unless it is absolutely certain that the application or workload behavior makes deadlocks impossible. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+
+ + +```sql +begin transaction isolation level repeatable read; +``` + +
+ +
+ +```sql +update test set v=2 where k=1; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +update test set v=4 where k=2; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +update test set v=6 where k=2; +``` + +```output +(waits) +``` + + +
+ + +```sql +update test set v=6 where k=1; +``` + +```output +ERROR: Internal error: Transaction 00da00cd-87fa-431b-9521-253582fb23fe +was aborted while waiting for locks +``` + +
+ +```sql +commit; +``` + + +
+ +### Versioning and upgrades + +When turning `enable_wait_queues` on or off, or during a rolling restart, where during an update the flag could be on nodes with a more recent version, if some nodes have wait-on-conflict behavior enabled and some don't, you will experience mixed (but still correct) behavior. + +A mix of both fail-on-conflict and wait-on-conflict traffic results in the following additional YSQL-specific semantics: + +- If a transaction using fail-on-conflict encounters transactions that have conflicting writes - + - If there is even a single conflicting transaction that uses wait-on-conflict, the transaction aborts. + - Otherwise, YugabyteDB uses the regular [fail-on-conflict semantics](#fail-on-conflict), which is to abort the lower priority transaction. +- If a transaction using wait-on-conflict encounters transactions that have conflicting writes, it waits for all conflicting transactions to end (including any using fail-on-conflict semantics). + +### Fairness + +When multiple requests are waiting on the same resource in the wait queue, and that resource becomes available, YugabyteDB generally uses the following process to decide in which order those waiting requests should get access to the contentious resource: + +1. Sort all waiting requests based on the _transaction start time_, with requests from the oldest transactions first. +2. Resume requests in order: + 1. Re-run conflict resolution and acquire locks on the requested resource. + 2. If the resource is no longer available because another waiting request acquired it, re-enter the wait queue. + +YugabyteDB has two mechanisms to detect that a resource has become available: + +1. Direct signal from the transaction coordinator + - Signals are sent with best effort and may not always arrive immediately or in-order +2. Polling from the wait queue to the transaction coordinator + - Ensures guaranteed/bounded detection of resource availability + +Polling from the wait queue is controlled by the flag `wait_queue_poll_interval_ms`, which is set to 100ms by default. Setting this higher can result in slightly lower overhead, but empirically 100ms seems to offer good performance. + +In highly contentious workloads, a low polling interval (around the default 100ms) is required to ensure starvation does not occur. Setting this polling interval higher in contentious settings can cause high tail latency and is not recommended. + +### Metrics + +All metrics are per tablet. + +#### Histograms + +1. `wait_queue_pending_time_waiting`: the amount of time in microseconds a still-waiting transaction has been in the wait queue +2. `wait_queue_finished_waiting_latency`: the amount of time in microseconds an unblocked transaction spent in the wait queue +3. `wait_queue_blockers_per_waiter`: the number of blockers a waiter is stuck on in the wait queue + +#### Counters + +1. `wait_queue_waiters_per_blocker`: the number of waiters stuck on a particular blocker in the wait queue +2. `wait_queue_num_waiters`: the number of waiters stuck on a blocker in the wait queue +3. `wait_queue_num_blockers`: the number of unique blockers tracked in a wait queue + +### Limitations + +Refer to [#5680](https://github.com/yugabyte/yugabyte-db/issues/5680) for limitations. + +## Row-level explicit locking clauses + +The `NOWAIT` clause for row-level explicit locking doesn't apply to the `Fail-on-Conflict` mode as there is no waiting. It does apply to the `Wait-on-Conflict` policy but is currently supported only for Read Committed isolation. [#12166](https://github.com/yugabyte/yugabyte-db/issues/12166) will extend support for this in the `Wait-on-Conflict` mode for the other isolation levels. + +The `SKIP LOCKED` clause is supported in both concurrency control policies and provides a transaction with the capability to skip locking without any error when a conflict is detected. However, it isn't supported for Serializable isolation. [#11761](https://github.com/yugabyte/yugabyte-db/issues/5683) tracks support for `SKIP LOCKED` in Serializable isolation. diff --git a/docs/content/v2.25/architecture/transactions/distributed-txns.md b/docs/content/v2.25/architecture/transactions/distributed-txns.md new file mode 100644 index 000000000000..cd36ae2fd967 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/distributed-txns.md @@ -0,0 +1,97 @@ +--- +title: Distributed transactions +headerTitle: Distributed transactions +linkTitle: Distributed transactions +description: Distributed ACID transactions modify multiple rows spread across multiple shards. +menu: + preview: + identifier: architecture-distributed-acid-transactions + parent: architecture-acid-transactions + weight: 200 +aliases: + - /architecture/concepts/transactions/ +type: docs +--- + +YugabyteDB supports distributed transactions based on principles of atomicity, consistency, isolation, and durability (ACID) that modify multiple rows in more than one shard. This enables strongly consistent secondary indexes, as well as multi-table and multi-row ACID operations in both YCQL and YSQL contexts. + +After you are familiar with the preceding concepts, refer to [Transactional I/O path](../transactional-io-path/) for an overview of a distributed transaction's lifecycle. + +## Provisional records + +Just as YugabyteDB stores values written by single-shard ACID transactions into [DocDB](../../docdb/data-model/), it needs to store uncommitted values written by distributed transactions in a similar persistent data structure. However, they cannot be written to DocDB as regular values, because they would then become visible at different times to clients reading through different tablet servers, allowing a client to see a partially applied transaction and thus breaking atomicity. YugabyteDB therefore writes provisional records to all tablets responsible for the keys the transaction is trying to modify. These transactions are called provisional as opposed to regular (permanent) records because they are invisible to readers until the transaction commits. + +Provisional records are stored in a separate RocksDB instance in the same tablet peer (referred to as IntentsDB, as opposed to RegularDB, for regular records). Compared to other possible design options, such as storing provisional records inline with the regular records, or putting them in the same RocksDB instance together with regular records, the chosen approach has the following benefits: + +- Scanning all provisional records is straightforward, which is helpful in cleaning up aborted or abandoned transactions. +- During the read path, there is a need to handle provisional records very differently from the regular records, and putting them in a separate section of the RocksDB key space allows to simplify the read path. +- Storing provisional records in a separate RocksDB instance allows each one to have different storage, compaction, and flush strategies. + +## Encoding of provisional records + +There are three types of RocksDB key-value pairs corresponding to provisional records, omitting the one-byte prefix that puts these records before all regular records in RocksDB, as per the following diagram: + +![DocDB storage, including provisional records](/images/architecture/txn/provisional_record_storage.svg) + +### Primary provisional records + +```output +DocumentKey, SubKey1, ..., SubKeyN, LockType, ProvisionalRecordHybridTime -> TxnId, Value +``` + +The `DocumentKey`, `SubKey1`, ..., `SubKey` components exactly match those in DocDB's [encoding](../../docdb/data-model) of paths to a particular subdocument (for example, a row, a column, or an element in a collection-type column) to RocksDB keys. + +Each of these primary provisional records also acts as a persistent revocable lock. There are some similarities as well as differences when compared to [blocking in-memory locks](../isolation-levels/) maintained by every tablet's lock manager. These persistent locks can be of any of the same types as for in-memory leader-only locks (SI write, serializable write and read, and a separate strong and weak classification for handling nested document changes). However, unlike the leader-side in-memory locks, the locks represented by provisional records can be revoked by another conflicting transaction. The conflict resolution subsystem makes sure that for any two conflicting transactions, at least one of them is aborted. + +As an example, suppose a snapshot isolation transaction is setting column `col1` in row `row1` to `value1`. Then `DocumentKey` is `row1` and `SubKey1` is `col1`. Suppose the provisional record was written into the tablet with a hybrid timestamp `1516847525206000`, and the transaction ID is `7c98406e-2373-499d-88c2-25d72a4a178c`. There will be the following provisional record values in RocksDB: + + ```output + row1, WeakSIWrite, 1516847525206000 -> 7c98406e-2373-499d-88c2-25d72a4a178c + row1, col1, StrongSIWrite, 1516847525206000 -> 7c98406e-2373-499d-88c2-25d72a4a178c, value1 + ``` + +The `WeakSIWrite` lock type is used for the row (the parent of the column being written), and `StrongSIWrite` is used for the column itself. The provisional record for the column is also where the column's value being written by the transaction is stored. + +### Transaction metadata records + +```output +TxnId -> StatusTabletId, IsolationLevel, Priority +``` + +- `StatusTabletId` is the ID of the tablet that keeps track of this transaction's status. Unlike the case of tables and tablets holding user data, where a [hash-based mapping](../../docdb-sharding/sharding/) is used from keys to tablets, there is no deterministic way to compute the transaction status tablet ID by transaction ID, so this information must be explicitly passed to all components handling a particular transaction. +- `Isolation Level` [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation) or [serializable isolation](https://en.wikipedia.org/wiki/Serializability). +- `Priority` This priority is assigned randomly during transaction creation, when the [Fail-on-Conflict](../concurrency-control/#fail-on-conflict) concurrency control policy is used. + +### Provisional record keys indexed by transaction ID + +```output +TxnId, HybridTime -> primary provisional record key +``` + +This mapping enables finding all provisional RocksDB records belonging to a particular transaction. This is used when cleaning up committed or aborted transactions. Note that because multiple RocksDB key-value pairs belonging to primary provisional records can be written for the same transaction with the same hybrid timestamp, an increasing counter (called write ID) is used at the end of the encoded representation of hybrid time in order to obtain unique RocksDB keys for this reverse index. This write ID is shown as `.0`, `.1`, and so on, in `T130.0`, `T130.1` shown in the diagram in [Encoding details of provisional records](#encoding-of-provisional-records). + +## Transaction status tracking + +Atomicity means that either all values written by a transaction are visible or none are visible. YugabyteDB already provides atomicity of single-shard updates by replicating them via Raft and applying them as one write batch to the underlying DocDB storage engine. The same approach could be reused to make transaction status changes atomic. The status of transactions is tracked in a so-called transaction status table. This table, under the covers, is just another sharded table in the system, although it does not use RocksDB and instead stores all its data in memory, backed by the Raft WAL. The transaction ID (a globally unique ID) serves as the key in the table, and updates to a transaction's status are basic single-shard ACID operations. By setting the status to `committed` in that transaction's status record in the table, all values written as part of that transaction become atomically visible. + +A transaction status record contains the following fields for a particular transaction ID: + +- Status: pending, committed, or aborted. All transactions start in the pending status and then progress to committed or aborted, in which they remain permanently until cleaned up. + +After a transaction is committed, the following two fields are set: + +- Commit hybrid timestamp. This timestamp is chosen as the current hybrid time at the transaction status tablet at the moment of appending the transaction committed entry to its Raft log. It is then used as the final MVCC timestamp for regular records that replace the transaction's provisional records when provisional records are being applied and cleaned up. +- List of IDs of participating tablets. After a transaction commits, the final set of tablets to which the transaction has written is known. The tablet server managing the transaction sends this list to the transaction status tablet as part of the commit message, and the transaction status tablet makes sure that all participating tablets are notified of the transaction's committed status. This process might take multiple retries, and the transaction record can only be cleaned up after this is done. + +## Impact of failures + +Provisional records are written to all the replicas of the tablets responsible for the keys being modified in a transaction. When a node with the tablet that has received or is about to receive the provisional records fails, a new leader is elected for the tablet in a few seconds(`~2s`) as described in [Leader Election](../../docdb-replication/raft/#leader-election). The query layer waits for leader election to occur and then the transaction proceeds further with the newly elected leader. In this case, the time taken for the transaction to complete increases by the time taken for the leader election. + +The transaction manager (typically, the node the client is connected to) sends heartbeats to the transaction status tablet that maintains information about the transaction. When the manager fails, these heartbeats stop and the provisional records expire after certain time. At this point, the status tablet automatically cancels this transaction, so the related provisional records would no longer block conflicting transactions waiting on the same keys. Clients connected to the failed manager receive an error message similar to the following: + +```output.sql +FATAL: 57P01: terminating connection due to unexpected postmaster exit +FATAL: XX000: Network error: recvmsg error: Connection refused +``` + +As the client is unaware of the transaction ID because the client to transaction ID mapping cannot be regenerated, it would be the responsibility of the client to restart the transaction. Other clients with transactions that were blocked on the provisional records written by the failed manager will have to wait for the transaction to be expired due to heartbeat timeout and then proceed normally. diff --git a/docs/content/v2.25/architecture/transactions/isolation-levels.md b/docs/content/v2.25/architecture/transactions/isolation-levels.md new file mode 100644 index 000000000000..c5b25a084490 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/isolation-levels.md @@ -0,0 +1,187 @@ +--- +title: Transaction isolation levels +headerTitle: Transaction isolation levels +linkTitle: Isolation levels +description: Learn how YugabyteDB supports two transaction isolation levels Snapshot Isolation and Serializable. +menu: + preview: + identifier: architecture-isolation-levels + parent: architecture-acid-transactions + weight: 500 +type: docs +--- + +Transaction isolation is foundational to handling concurrent transactions in databases. The SQL-92 standard defines four levels of transaction isolation (in decreasing order of strictness): Serializable, Repeatable Read, Read Committed, and Read Uncommitted. + +YugabyteDB supports the following three strictest transaction isolation levels: + +1. Read Committed, which maps to the SQL isolation level of the same name. This isolation level guarantees that each statement sees all data that has been committed before it is issued (this implicitly also means that the statement sees a consistent snapshot). In addition, this isolation level internally handles read restart and conflict errors. In other words, the client does not see read restart and conflict errors (barring an exception). +2. Serializable, which maps to the SQL isolation level of the same name. This isolation level guarantees that transactions run in a way equivalent to a serial (sequential) schedule. +3. Snapshot, which maps to the SQL Repeatable Read isolation level. This isolation level guarantees that all reads made in a transaction see a consistent snapshot of the database, and the transaction itself can successfully commit only if no updates it has made conflict with any concurrent updates made by transactions that committed after that snapshot. + +Transaction isolation level support differs between the YSQL and YCQL APIs: + +- [YSQL](../../../api/ysql/) supports Serializable, Snapshot, and Read Committed isolation levels. +- [YCQL](../../../api/ycql/dml_transaction/) supports only Snapshot isolation using the `BEGIN TRANSACTION` syntax. + +Similarly to PostgreSQL, you can specify Read Uncommitted for YSQL, but it behaves the same as Read Committed. + +Read Committed is supported only if the YB-TServer flag `yb_enable_read_committed_isolation` is set to `true`. By default, this flag is `false`, in which case the Read Committed isolation level of YugabyteDB's transactional layer falls back to the stricter Snapshot isolation. The default isolation level for the YSQL API is essentially Snapshot because Read Committed, which is the YSQL API and PostgreSQL syntactic default, maps to Snapshot isolation. + +## Internal locking in DocDB + +In order to support the three isolation levels, the lock manager internally supports the following three types of locks: + +- Serializable read lock is taken by serializable transactions on values that they read in order to guarantee they are not modified until the transaction commits. + +- Serializable write lock is taken by serializable transactions on values they write. + +- Snapshot isolation write lock is taken by a snapshot isolation (and also read committed) transaction on values that it modifies. + +The following matrix shows conflicts between these types of locks at a high level: + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Snapshot isolation writeSerializable writeSerializable read
Snapshot isolation write✘ Conflict✘ Conflict✘ Conflict
Serializable write✘ Conflict✔ No conflict✘ Conflict
Serializable read✘ Conflict✘ Conflict✔ No conflict
+ +That is, serializable read locks block writers but allow other simultaneous readers. Serializable write locks block readers as expected but not other serializable writers. Finally, snapshot isolation write locks block all other readers and writers. + +Because serializable write locks do not block other serializable writers, concurrent blind writes are allowed at the serializable isolation level. A blind write is a write to a location that has not been previously read by that transaction. Two serializable transactions blindly writing to the same location can proceed in parallel assuming there are no other conflicts; the value of the location afterwards will be the value written by the transaction that committed last. + +Although described here as a separate lock type for simplicity, the snapshot isolation write lock type is actually implemented internally as a combination of the other two lock types. That is, taking a single snapshot isolation write lock is equivalent to taking both a serializable read lock and a serializable write lock. + +## Locking granularities + +Locks can be taken at many levels of granularity. For example, a serializable read lock could be taken at the level of an entire tablet, a single row, or a single column of a single row. Such a lock will block attempts to take write locks at that or finer granularities. Thus, for example, a read lock taken at the row level will block attempts to write to that entire row or any column in that row. + +In addition to the above-mentioned levels of granularity, locks in DocDB can be taken at prefixes of the primary key columns, treating the hash columns as a single unit. For example, if you created a YSQL table via: + +```sql +CREATE TABLE test (h1 INT, h2 INT, r1 INT, r2 INT, v INT w INT PRIMARY KEY ((h1,h2) HASH, r1 ASC, r2 ASC); +``` + +then any of the following objects could be locked: + +- the entire tablet +- all rows having h1=2, h2=3 +- all rows having h1=2, h2=3, r1=4 +- the row having h1=2, h2=3, r1=4, r2=5 +- column v of the row having h1=2, h2=3, r1=4, r2=5 + +With YCQL, granularities exist below the column level; for example, only one key of a column of map data type can be locked. + +## Efficiently detecting conflicts between locks of different granularities + +The straightforward way to handle locks of different granularities would be to have a map from lockable objects to lock types. However, this is too inefficient for detecting conflicts: attempting, for example, to add a lock at the tablet level would require checking for locks at every row and column in that tablet. + +To make conflict detection efficient, YugabyteDB stores extra information for each lockable object about any locks on sub-objects of it. In particular, instead of just taking a lock on _X_, it takes a normal lock on _X_ and also weaker versions of that lock on all objects that enclose _X_. The normal locks are called _strong_ locks and the weaker variants _weak_ locks. + +As an example, pretend YugabyteDB has only tablet- and row-level granularities. To take a serializable write lock at the row level (say on row _r_ of tablet _b_), it would take a strong write lock at the row level (on _r_) and a weak write lock at the tablet level (on _b_). To take a serializable read lock at the tablet level (assume also on _b_), YugabyteDB would just take a strong read lock at the tablet level (on _b_). + +Using the following conflict rules, YugabyteDB can decide if two original locks would conflict based only on whether or not their strong/weak locks at any lockable object would conflict: + +- two strong locks conflict if and only if they conflict ignoring their strength + - for example, serializable write conflicts with serializable read per the previous matrix +- two weak locks never conflict +- a strong lock conflicts with a weak lock if and only if they conflict ignoring their strength + +That is, for each lockable object that would have two locks, would they conflict under the above rules? There is no need to enumerate the sub-objects of any object. + +Consider our example with a serializable write lock at the row level and a serializable read lock at the tablet level. A conflict is detected at the tablet level because the strong read and the weak write locks on _b_ conflict because ordinary read and write locks conflict. + +What about a case involving two row-level snapshot isolation write locks on different rows in the same tablet? No conflict is detected because the tablet-level locks are weak and the strong row-level locks are on different rows. If they had involved the same row then a conflict would be detected because two strong snapshot isolation write locks conflict. + +Including the strong/weak distinction, the full conflict matrix becomes: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Strong Snapshot isolation writeWeak Snapshot isolation writeStrong Serializable writeWeak Serializable writeStrong Serializable readWeak Serializable read
Strong Snapshot isolation write✘ Conflict✘ Conflict✘ Conflict✘ Conflict✘ Conflict✘ Conflict
Weak Snapshot isolation write✘ Conflict✔ No conflict✘ Conflict✔ No conflict✘ Conflict✔ No conflict
Strong Serializable write✘ Conflict✘ Conflict✔ No conflict✔ No conflict✘ Conflict✘ Conflict
Weak Serializable write✘ Conflict✔ No conflict✔ No conflict✔ No conflict✘ Conflict✔ No conflict
Strong Serializable read✘ Conflict✘ Conflict✘ Conflict✘ Conflict✔ No conflict✔ No conflict
Weak Serializable read✘ Conflict✔ No conflict✘ Conflict✔ No conflict✔ No conflict✔ No conflict
diff --git a/docs/content/v2.25/architecture/transactions/read-committed.md b/docs/content/v2.25/architecture/transactions/read-committed.md new file mode 100644 index 000000000000..44e82c445b46 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/read-committed.md @@ -0,0 +1,1449 @@ +--- +title: Read Committed isolation level +headerTitle: Read Committed isolation level +linkTitle: Read Committed +description: Details about the Read Committed isolation level +menu: + preview: + identifier: architecture-read-committed + parent: architecture-acid-transactions + weight: 800 +type: docs +rightNav: + hideH4: true +--- + +Read Committed is one of the three isolation levels in PostgreSQL, and also its default. A unique property of this isolation level is that, for transactions running with this isolation, clients do not need to retry or handle serialization errors (40001) in application logic. The other two isolation levels (Serializable and Repeatable Read) require applications to have retry logic for serialization errors. Also, each statement in a read committed transactions works on a new latest snapshot of the database, implying that any data committed before the statement was issued, is visible to the statement. + +A read committed transaction in PostgreSQL doesn't raise serialization errors because it internally retries conflicting rows in the statement's execution as of the latest versions of those rows, as soon as conflicting concurrent transactions have finished. This mechanism allows single statements to work on an inconsistent snapshot (in other words, non-conflicting rows are read as of the statement's snapshot, but conflicting rows are re-attempted on the latest version of the row after the conflicting transactions are complete). + +YugabyteDB's Read Committed isolation provides slightly stronger guarantees than PostgreSQL's read committed, while providing the same semantics and benefits, that is, a user doesn't have to retry serialization errors in the application logic (modulo [limitation 2](#limitations) around `ysql_output_buffer_size` which is not of relevance for most OLTP workloads). In YugabyteDB, a read committed transaction retries the whole statement instead of retrying only the conflicting rows. This leads to a stronger guarantee where each statement in a YugabyteDB read committed transaction always uses a consistent snapshot of the database, while in PostgreSQL an inconsistent snapshot can be used for statements when conflicts are present. For a detailed example, see [Stronger guarantees in YugabyteDB's read committed isolation](#yugabytedb-s-implementation-with-a-stronger-guarantee). + +Note that retries for the statement in YugabyteDB's Read Committed isolation are limited to the per-session YSQL configuration parameter `yb_max_query_layer_retries`. To set it at the cluster level, use the `ysql_pg_conf_csv` TServer flag. If a serialization error isn't resolved within `yb_max_query_layer_retries`, the error will be returned to the client. + +{{< tip title="Enable Read Committed" >}} + +To enable Read Committed isolation, set the YB-TServer flag [yb_enable_read_committed_isolation](../../../reference/configuration/yb-tserver/#yb-enable-read-committed-isolation) to `true`. By default this flag is `false` and in this case the Read Committed isolation level of the YugabyteDB transactional layer falls back to the stricter Snapshot isolation (in which case `READ COMMITTED` and `READ UNCOMMITTED` of YSQL also in turn use Snapshot isolation). + +Refer to [Usage](#usage) to start a Read Committed transaction after enabling the flag. +{{< /tip >}} + + +## Implementation and semantics (as in PostgreSQL) + +The following two key semantics set apart Read Committed isolation from Repeatable Read in PostgreSQL (refer [Read Committed level](https://www.postgresql.org/docs/13/transaction-iso.html#XACT-READ-COMMITTED)): + +1. Each statement should be able to read everything that was committed before the statement was issued. In other words, each statement runs on the latest snapshot of the database as of when the statement was issued. +1. Clients never face serialization errors (40001) in read committed isolation level. To achieve this, PostgreSQL re-evaluates statements for conflicting rows based on a set of rules as described below. + +To handle serialization errors in the database without surfacing them to the client, PostgreSQL takes a number of steps based on the statement type. + +### UPDATE, DELETE, SELECT FOR [UPDATE, SHARE, NO KEY UPDATE, KEY SHARE] + +* If the subject row is being updated or deleted by other concurrent transactions in a conflicting way, wait for the conflicting transactions to commit or rollback, and then perform validation steps. + +* If the subject row has been updated or deleted by other concurrent transactions in a conflicting way, perform validation steps. + +* If the subject row has been locked by other concurrent transactions in a conflicting way, wait for them to commit or rollback, and then perform validation steps. + +Note that two transactions are `concurrent` if their `read time` to `commit time` ranges overlap. If a transaction has not yet committed, the closing range is the current time. Also, for read committed isolation, there is a `read time` for each statement, and not one for the whole transaction. + +#### Validation steps + +The validation steps are as follows: + +1. Read the latest version of the conflicting row and lock it appropriately. The latest version could have a different primary key as well. PostgreSQL finds it by following the chain of updates for a row even across primary key changes. Note that locking is necessary so that another conflict isn't seen on this row while re-evaluating the row again and possibly updating/acquiring a lock on it in step 3. If the locking faces a conflict, it would wait and resume traversing the chain further once unblocked. +1. If the updated version of a row is deleted, ignore it. +1. Apply update, delete, or acquire lock on updated version of the row if the `WHERE` clause evaluates to `true` on the updated version of the row. + +### INSERT + +* `ON CONFLICT DO UPDATE`: if a conflict occurs, wait for the conflicting transactions to commit or rollback. + * If all conflicting transactions rollback, proceed as usual. + * On commit of any conflicting transaction, traverse the chain of updates, as described in validation step 1, and re-evaluate the latest version of the row for any conflict. If there is no conflict, insert the original row. Otherwise, perform the `DO UPDATE` part on the latest version of the row. +* `ON CONFLICT DO NOTHING`: if a conflict occurs, do not do anything. + +## YugabyteDB's implementation with a stronger guarantee + +Note that the implementation in PostgreSQL (discussed above) can theoretically lead to two different visible semantics: + +* A common case which uses an inconsistent snapshot of the database for the same statement's execution. +* A degenerate situation that is highly unlikely to be seen in practice, but is nevertheless possible and provides a stronger guarantee by using a consistent snapshot for the whole statement while still upholding the semantics of Read Committed isolation. + +### Common case in PostgreSQL + +```sql +CREATE TABLE test (k int primary key, v int); +INSERT INTO test VALUES (2, 5); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ +```sql +insert into test values (5, 5); +``` + +```output +INSERT 0 1 +``` + + +
+ +```sql +update test set v=10 where k=2; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +update test set v=100 where v>=5; +``` + +```output +(waits) +``` + +
+ +```sql +COMMIT; +``` + + +
+ + +```output +UPDATE 1 +``` + +
+ + +```sql +select * from test; +``` + +
+ + +```output + k | v +---+----- + 5 | 5 + 2 | 100 +(2 rows) +``` + +
+ +As seen above, the UPDATE from transaction 2 first picks the latest snapshot of the database which only has the row (2, 5). The row satisfies the `UPDATE` statement's `WHERE` clause and hence the transaction 2 tries to update the value of `v` from 5 to 100. However, due to an existing conflicting write from transaction 1, it waits for transaction 1 to end. After transaction 1 commits, it re-reads the latest version of only the conflicting row, and re-evaluates the `WHERE` clause. The clause is still satisfied by the new row (2, 10) and so the value is updated to 100. Note that the newly inserted row (5, 5) isn't updated even though it satisfies the `WHERE` clause of transaction 2's `UPDATE`, because it was not part of the snapshot originally picked by transaction 2's `UPDATE` statement. + +So, to avoid serialization errors, PostgreSQL only retries the conflicting rows based on their latest versions, thereby allowing a single statement to run on an inconsistent snapshot. In other words, one snapshot is picked for the statement to read all data and process all non-conflicting rows, and a latest version is used for the conflicting rows. + +### The unlikely case in PostgreSQL + +The other degenerate scenario that can occur differs in the output of the `UPDATE` in transaction 2: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +update test set v=10 where k=2; +``` + +```output +UPDATE 1 +``` + + +
+ + +```sql +update test set v=100 where v>=5; +``` + +```output +(some processing before snapshot is picked, but feels like postgreSQL is waiting due to a conflict) +``` + +
+ +```sql +COMMIT; +``` + + +
+ + +```output +UPDATE 2 +``` + +
+ + +```sql +select * from test; +``` + +
+ + +```output + k | v +---+----- + 5 | 100 + 2 | 100 +(2 rows) +``` + +
+ +The preceding outcome can occur via the following unlikely circumstance: until Client 1 commits, PostgreSQL on Client 2 for some reason is busy or slow, and hasn't yet picked a snapshot for execution. Only after Client 1 commits, transaction on Client 2 picks a snapshot based off the current time for the statement. This leads to both rows being read as part of the snapshot and updated without any observable conflicts. + +Both the `common case` and `unlikely` outcomes are valid and satisfy the semantics of Read Committed isolation level. And theoretically, the user cannot figure out which one will be seen because the user cannot differentiate between a pause due to waiting for a conflicting transaction, or a pause due to the database just being busy or slow. Moreover, the `unlikely` case provides a stronger and more intuitive guarantee that the whole statement runs off a single snapshot. + +These two possibilities show that the client cannot have application logic that relies on the expectation that the common case occurs always. YugabyteDB implements Read Committed isolation by undoing and retrying a statement whenever serialization errors occur. This provides a stronger guarantee that each statement always works off just a single snapshot, and no inconsistency is allowed even in case of a some conflicting rows. This leads to YugabyteDB always returning output similar to the second outcome in the preceding example which is also simpler to reason. + +This might change in future as per [#11573](https://github.com/yugabyte/yugabyte-db/issues/11573), if it gains interest. + +## Read restart errors + +[Read Restart errors](../read-restart-error) stem from clock skew which is inherent in distributed databases due to the distribution of data across more than one physical node. PostgreSQL doesn't require defining semantics around read restart errors in read committed isolation because it is a single-node database without clock skew. + +In general, YugabyteDB has optimizations to resolve this error internally with best-effort before forwarding it to the external client. However, for Read Committed isolation, YugabyteDB gives a stronger guarantee: no `read restart` errors will be thrown to the external client except when a statement's output exceeds `ysql_output_buffer_size` (the size of the output buffer between YSQL and the external client which has a default of 256KB and is configurable). For most OLTP applications, this would hold always as response sizes are usually in this limit. + +YugabyteDB chooses to provide this guarantee as most clients that use read committed with PostgreSQL don't have app-level retries for serialization errors. So, it helps to provide the same guarantee for `read restart` errors which are unique to distributed databases. + +## Interaction with concurrency control + +Read Committed isolation faces the following limitations if using [Fail-on-Conflict](../concurrency-control/#fail-on-conflict) instead of the default [Wait-on-Conflict](../concurrency-control/#wait-on-conflict) concurrency control policy: + +* You may have to manually tune the exponential backoff parameters for performance, as described in [Performance tuning](#performance-tuning). +* Deadlock cycles will not be automatically detected and broken quickly. Instead, the `yb_max_query_layer_retries` YSQL configuration parameter will ensure that statements aren't stuck in deadlocks forever. +* There may be unfairness during contention due to the retry-backoff mechanism, resulting in high P99 latencies. + +The retries for serialization errors are done at the statement level. Each retry will use a newer snapshot of the database in anticipation that the conflicts might not occur. This is done because if the read time of the new snapshot is higher than the commit time of the earlier conflicting transaction T2, the conflicts with T2 would essentially be voided as T1's statement and T2 would no longer be "concurrent". + +## Usage + +To use Read Committed isolation, first set the YB-TServer flag `yb_enable_read_committed_isolation=true`; this maps the syntactic Read Committed isolation in YSQL to the Read Committed implementation in DocDB. (When set to `false`, syntactic Read Committed in YSQL is mapped to Snapshot isolation in DocDB, meaning it behaves as Repeatable Read.) + +Assuming the flag has been set, you can start a Read Committed transaction in the following ways: + +1. `START TRANSACTION isolation level read committed [read write | read only];` +1. `BEGIN [TRANSACTION] isolation level read committed [read write | read only];` +1. `BEGIN [TRANSACTION]; SET TRANSACTION ISOLATION LEVEL READ COMMITTED;` +1. `BEGIN [TRANSACTION]; SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED;` + +## Examples + +Start by creating the table to be used in all of the examples, as follows: + +```sql +CREATE TABLE test (k int primary key, v int); +``` + +### SELECT behavior without explicit locking + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (1, 5); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ +```sql +select * from test where v=5; +``` + +```output + k | v +---+--- + 1 | 5 +(1 row) +``` + + +
+ + +```sql +insert into test values (2, 5); +``` + +```output +INSERT 0 1 +``` + +
+ +```sql +select * from test where v=5; +``` + +```output + k | v +---+--- + 1 | 5 +(1 row) +``` + + +
+ +```sql +insert into test values (3, 5); +``` + +```output +INSERT 0 1 +``` + + +
+ +```sql +select * from test where v=5; +``` + +```output + k | v +---+--- + 1 | 5 + 3 | 5 +(2 rows) +``` + + +
+ + +```sql +commit; +``` + +
+ +```sql +select * from test where v=5; +``` + +```output + k | v +---+--- + 1 | 5 + 2 | 5 + 3 | 5 +(3 rows) +``` + + +
+ +```sql +commit; +``` + + +
+ +### UPDATE behavior + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (0, 5), (1, 5), (2, 5), (3, 5), (4, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +insert into test values (5, 5); +``` + +```output +INSERT 0 1 +``` + +
+ + +```sql +update test set v=10 where k=4; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +delete from test where k=3; +``` + +```output +DELETE 1 +``` + +
+ + +```sql +update test set v=10 where k=2; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +update test set v=1 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +update test set k=10 where k=0; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +update test set v=100 where v>=5; +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output +UPDATE 4 +``` + +```sql +select * from test; +``` + +```output + k | v +----+----- + 5 | 100 + 1 | 1 + 10 | 100 + 4 | 100 + 2 | 100 +(5 rows) +``` + + +
+ +```sql +commit; +``` + + +
+ +### SELECT FOR UPDATE behavior + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (0, 5), (1, 5), (2, 5), (3, 5), (4, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +insert into test values (5, 5); +``` + +```output +INSERT 0 1 +``` + +
+ + +```sql +update test set v=10 where k=4; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +delete from test where k=3; +``` + +```output +DELETE 1 +``` + +
+ + +```sql +update test set v=10 where k=2; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +update test set v=1 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ + +```sql +update test set k=10 where k=0; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +select * from test where v>=5 for update; +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output + k | v +----+---- + 5 | 5 + 10 | 5 + 4 | 10 + 2 | 10 +(4 rows) +``` + + +
+ +```sql +commit; +``` + + +
+ +### INSERT behavior + +Insert a new key that has just been changed by another transaction, as follows: + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (1, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +update test set k=2 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +insert into test values (2, 1); +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output +ERROR: duplicate key value violates unique constraint "test_pkey" +``` + + +
+ +```sql +rollback; +``` + + +
+ +Insert a new key that has just been changed by another transaction, with `ON CONFLICT`: + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (1, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +update test set k=2 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +insert into test values (2, 1) on conflict (k) do update set v=100; +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output +INSERT 0 1 +``` + + +
+ +```sql +select * from test; +``` + +```output + k | v +---+----- + 2 | 100 +(1 row) +``` + + +
+ +```sql +commit; +``` + + +
+ +Insert an old key that has been removed by another transaction, as follows: + +```sql +TRUNCATE TALE test; +INSERT INTO test VALUES (1, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +update test set k=2 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +insert into test values (1, 1); +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output +INSERT 0 1 +``` + + +
+ +```sql +select * from test; +``` + +```output + k | v +---+--- + 1 | 1 + 2 | 1 +(2 rows) +``` + + +
+ +```sql +commit; +``` + + +
+ +Insert an old key that has been removed by another transaction, with `ON CONFLICT`: + +```sql +TRUNCATE TABLE test; +INSERT INTO test VALUES (1, 1); +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Client 1 + + Client 2 +
+ +```sql +begin transaction isolation level read committed; +``` + + +
+ + +```sql +begin transaction isolation level read committed; +``` + +
+ + +```sql +update test set k=2 where k=1; +``` + +```output +UPDATE 1 +``` + +
+ +```sql +insert into test values (1, 1) on conflict (k) do update set v=100; +``` + +```output +(waits) +``` + + +
+ + +```sql +commit; +``` + +
+ +```output +INSERT 0 1 +``` + + +
+ +```sql +select * from test; +``` + +```output + k | v +---+----- + 1 | 1 + 2 | 1 +(2 rows) +``` + + +
+ +```sql +commit; +``` + + +
+ +## Cross-feature interaction + +Read Committed interacts with the following feature: + +* [Follower reads](../../../develop/build-global-apps/follower-reads/): When follower reads is enabled and the transaction block is explicitly marked `READ ONLY`, the read point for each statement in a read committed transaction is selected as `Now()` - `yb_follower_read_staleness_ms`. + +## Limitations + +* A `SET TRANSACTION ISOLATION LEVEL ...` statement immediately issued after `BEGIN;` or `BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;` will fail if the YB-TServer GFlag `yb_enable_read_committed_isolation=true`, and the following error will be issued: + + ```output + ERROR: SET TRANSACTION ISOLATION LEVEL must not be called in a subtransaction + ``` + + For more details, see [#12494](https://github.com/yugabyte/yugabyte-db/issues/12494). + +* Read restart and serialization errors are not internally handled in read committed isolation in the following circumstances: + * the query's response size exceeds the YB-TServer `ysql_output_buffer_size` flag, which has a default value of 256KB (see [#11572](https://github.com/yugabyte/yugabyte-db/issues/11572)). + * multiple semicolon-separated statements in a single query string are sent via the simple query protocol (see [#21833](https://github.com/yugabyte/yugabyte-db/issues/21833)). + * for statements other than the first one in a batch sent by the driver (except for [#21607](https://github.com/yugabyte/yugabyte-db/issues/21607) currently). + +* Non-transactional side-effects can occur more than once when a `conflict` or `read restart` occurs in functions or procedures in read committed isolation. This is because in read committed isolation, the retry logic in the database will undo all work done as part of that statement and re-attempt the whole client-issued statement. (See [#12958](https://github.com/yugabyte/yugabyte-db/issues/12958)) + +Read Committed isolation has the following additional limitations when `enable_wait_queues=false` (see [Wait-on-Conflict](../concurrency-control/#wait-on-conflict) and [Interaction with concurrency control](#interaction-with-concurrency-control)): + +* You may have to manually tune the exponential backoff parameters for performance, as described in [Performance tuning](#performance-tuning). +* Deadlock cycles will not be automatically detected and broken quickly. Instead, the `yb_max_query_layer_retries` YSQL configuration parameter will ensure that statements aren't stuck in deadlocks forever. +* There may be unfairness during contention due to the retry-backoff mechanism, resulting in high P99 latencies. + +## Considerations + +This isolation level allows both phantom and non-repeatable reads (as demonstrated in [SELECT behavior without explicit locking](#select-behavior-without-explicit-locking)). + +Adding this new isolation level does not affect the performance of existing isolation levels. + +### Performance tuning + +If a statement in the Read Committed isolation level faces a conflict, it is retried. If using [Fail-on-Conflict](../concurrency-control/#fail-on-conflict) concurrency control mode, the retries are done with exponential backoff until the statement times out or the `yb_max_query_layer_retries` are exhausted, whichever happens first. The following parameters control the backoff: + +* `retry_max_backoff` is the maximum backoff in milliseconds between retries. +* `retry_min_backoff` is the minimum backoff in milliseconds between retries. +* `retry_backoff_multiplier` is the multiplier used to calculate the next retry backoff. + +You can set these parameters on a per-session basis, or in the `ysql_pg_conf_csv` YB-TServer flag on cluster startup. + +If the [Wait-on-Conflict](../concurrency-control/#wait-on-conflict) concurrency control policy is enabled, there won't be a need to manually tune these parameters for performance. Statements will restart only when all conflicting transactions have committed or rolled back, instead of retrying with an exponential backoff. diff --git a/docs/content/v2.25/architecture/transactions/read-restart-error.md b/docs/content/v2.25/architecture/transactions/read-restart-error.md new file mode 100644 index 000000000000..aaad015c7d97 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/read-restart-error.md @@ -0,0 +1,117 @@ +--- +title: Read Restart error +headerTitle: Read Restart error +linkTitle: Read Restart error +description: Learn about the Read Restart error which stem due to the data distribution across more than one node. +menu: + preview: + identifier: architecture-read-restart-error + parent: architecture-acid-transactions + weight: 900 +type: docs +rightNav: + hideH4: true +--- + +The distributed nature of YugabyteDB means that clock skew can be present between different physical nodes in the database cluster. Given that YugabyteDB is a multi-version concurrency control (MVCC) database, this clock skew can sometimes result in an unresolvable ambiguity of whether a version of data should, or not be part of a read in snapshot-based transaction isolations (that is, repeatable read and read committed). There are multiple solutions for this problem, [each with their own challenges](https://www.yugabyte.com/blog/evolving-clock-sync-for-distributed-databases/). PostgreSQL doesn't require defining semantics around read restart errors because it is a single-node database without clock skew. + +Read restart errors are raised to maintain the _read-after-commit-visibility_ guarantee: any read query should see all data that was committed before the read query was issued (even in the presence of clock skew between nodes). In other words, read restart errors prevent the following stale read anomaly: + +1. First, user X commits some data, for which the database picks a commit timestamp, say commit_time. +2. Next, user X informs user Y about the commit via a channel outside the database, say a phone call. +3. Then, user Y issues a read that picks a read time, which is less than the prior commit_time due to clock skew. +4. As a consequence, without a read restart error, user Y gets an output without the data that user Y was informed about. + +YugabyteDB doesn't require atomic clocks, but instead allows a configurable setting for maximum clock skew. Time synchronization protocols such as NTP synchronize commodity hardware clocks periodically to keep the skew low and bounded. Additionally, YugabyteDB has optimizations to resolve this ambiguity internally with best-effort. However, when it can't resolve the error internally, YugabyteDB outputs a `read restart` error to the external client, similar to the following: + +```output +ERROR: Query error: Restart read required +``` + +The following scenario describes how clock skew can result in the above mentioned ambiguity around data visibility in detail: + +* Tokens 17, 29 are inserted into an empty tokens table. Then, all the tokens from the table are retrieved. + + The SQL commands for the scenario are as follows: + + ```sql + INSERT INTO tokens VALUES (17); + INSERT INTO tokens VALUES (29); + SELECT * FROM tokens; + ``` + +* The SELECT must return both 17 and 29. +* However, due to clock skew, the INSERT operation picks a commit time higher than the reference time, while the SELECT picks a lower read time and thus omits the prior INSERT from the result set. + +The following diagram shows the order of operations that describe this scenario in detail: + + ![Read Restart Error](/images/architecture/txn/read_restart_error.png) + + The cluster has three tablet servers, namely, `TSERVER 1`, `TSERVER 2`, and `TSERVER 3`. The data for the tokens table is hosted on TSERVER 2 and TSERVER 3. The query layer on TSERVER 1 is serving the SQL requests. + + Moreover, TSERVER 3's clock is running 5 units of time ahead of TSERVER 2's clock because of clock skew. + +1. An INSERT of token 29 is issued to YSQL on TSERVER 1. +2. This INSERT is routed to the tablet hosted on TSERVER 3. The operation picks a commit time of `T2=103` even though the reference clock reads `98`. +3. TSERVER 1 acknowledges the INSERT. +4. Now that the INSERT command is complete, the SELECT command is issued. +5. TSERVER 1 starts a distributed read. TSERVER 1 reads data from multiple shards on different physical YB-TServers in the cluster, namely TSERVER 2 and TSERVER 3. The read point that defines the snapshot of the database at which the data will be read, is picked on TSERVER 2 based on the safe time of that YB-TServer, namely `T1=101`. +6. TSERVER 1 gets back token 17 in the result set. +7. TSERVER 1 now issues a read operation to TSERVER 3 to retrieve records from the second shard. However, since the read time `T1=101` is less than the commit time of the prior insert `T2=103`, the record is not part of the read snapshot. +8. Thus, token 29 is omitted from the result set returned. + + More generally, this anomaly occurs whenever the commit time of a prior write operation `T2` is higher than the read time `T1` of a later read operation, thus violating the _read-after-commit-visibility_ guarantee. + +How does YugabyteDB prevent this clock skew anomaly? + +* First, note that the clock skew between all nodes in the cluster is always in a [max_clock_skew_usec](../../../reference/configuration/yb-tserver/#max-clock-skew-usec) bound due to clock synchronization algorithms. +* Recall that the read operation has a read time of `T1`. For records with a commit timestamp later than `T1` + `max_clock_skew`, the database can be sure that these records were written after the read was issued and exclude it from the results. For records with commit timestamp less than `T1`, the database can include the record in the results, even when the write is concurrent with the read. But for records with a commit timestamp between `T1` and `T1` + `max_clock_skew`, the database cannot determine whether the record should be included or not because: + + * The read operation cannot determine whether the record is committed strictly before the read was issued because of clock skew. Therefore, it cannot simply exclude the record from its output. + + * However, the read operation cannot simply include all records in this ambiguity window because a consistent snapshot must be returned. That is, the read cannot simply advance its read time on observing a record with higher timestamp since the read already returned records from an older snapshot thus far. Therefore, the read must be restarted from the beginning with the advanced timestamp. Thus, the name read restart error. + +* Whenever a read operation finds records with timestamp in the range `(T1, T1+max_clock_skew]`, to avoid breaking the strong guarantee that a reader should always be able to read what was committed earlier, and to read a consistent snapshot, the read operation raises a `Read restart` error to restart the read. + +## Troubleshooting + +You can handle and mitigate read restart errors using the following techniques: + +- {{}} Configure [highly accurate clocks](../../../deploy/manual-deployment/system-config#set-up-time-synchronization). +- Implement retry logic in the application. Application retries can help mitigate read restart errors. Moreover, a statement or a transaction may fail in other ways such as transaction conflicts or infrastructure failures. Therefore, a retry mechanism is strongly recommended for a cloud-native, distributed database such as YugabyteDB. + + While implementing application retries is the best long-term approach, there are a few short-term solutions you can use in the interim. +- Use SERIALIZABLE READ ONLY DEFERRABLE mode when running background reads. Read restart errors usually occur when the query is a SELECT statement with a large output footprint and there are concurrent writes that satisfy the SELECT statement. + + Using DEFERRABLE will avoid a read restart error altogether. However, the tradeoff is that the statement waits out the maximum permissible clock skew before reading the data (which is max_clock_skew_usec that has a default of 500ms). This is not an issue for large SELECT statements running in the background because latency is not a priority. + + Examples: + + Set transaction properties at the session level. + + ```sql + SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; + SELECT * FROM large_table; + ``` + + Enclose the offending query within a transaction block. + + ```sql + BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; + SELECT * FROM large_table; + COMMIT; + ``` + +- Using read only, deferrable transactions is not always feasible, either because the query is not read only, or the query is part of a read-write transaction, or because an additional 500ms of latency is not acceptable. In these cases, try increasing the value of `ysql_output_buffer_size`. + + This will enable YugabyteDB to retry the query internally on behalf of the user. As long as the output of a statement hasn't crossed ysql_output_buffer_size to result in flushing partial data to the external client, the YSQL query layer retries read restart errors for all statements in a Read Committed transaction block, for the first statement in a Repeatable Read transaction block, and for any standalone statement outside a transaction block. As a tradeoff, increasing the buffer size also increases the memory consumed by the YSQL backend processes, resulting in a higher risk of out-of-memory errors. + + Be aware that increasing `ysql_output_buffer_size` is not a silver bullet. For example, the COPY command can still raise a read restart error even though the command has a one line output. Increasing `ysql_output_buffer_size` is not useful in this scenario. The application must retry the COPY command instead. Another example is DMLs such as INSERT/UPDATE/DELETE. These do not have enough output to overflow the buffer size. However, when these statements are executed in the middle of a REPEATABLE READ transaction (e.g. BEGIN ISOLATION LEVEL REPEATABLE READ; ... INSERT ... COMMIT;), a read restart error cannot be retried internally by YugabyteDB. The onus is on the application to ROLLBACK and retry the transaction. +- In rare scenarios where neither latency nor memory can be compromised, but _read-after-commit-visibility_ guarantee is not a necessity, set `yb_read_after_commit_visibility` to `relaxed`. This option only affects pure reads. + + ```sql + SET yb_read_after_commit_visibility TO relaxed; + SELECT * FROM large_table; + ``` + + Please exercise caution when using this option. diff --git a/docs/content/v2.25/architecture/transactions/single-row-transactions.md b/docs/content/v2.25/architecture/transactions/single-row-transactions.md new file mode 100644 index 000000000000..49e34e80970a --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/single-row-transactions.md @@ -0,0 +1,128 @@ +--- +title: Single-row transactions +headerTitle: Single-row transactions +linkTitle: Single-row transactions +description: Learn how YugabyteDB offers ACID semantics for mutations involving a single row or rows that are located within a single shard. +menu: + preview: + identifier: architecture-single-row-transactions + parent: architecture-acid-transactions + weight: 400 +type: docs +--- + +YugabyteDB offers [ACID](../../key-concepts/#acid) semantics for mutations involving a single row or rows that fall in the same shard (partition, tablet). These mutations incur only one network roundtrip between the distributed consensus peers. + +Even read-modify-write operations in a single row or single shard, such as the following, incur only one round trip in YugabyteDB: + +```sql + UPDATE table SET x = x + 1 WHERE ... + INSERT ... IF NOT EXISTS + UPDATE ... IF EXISTS +``` + +Note that this is unlike Apache Cassandra, which uses a concept called lightweight transactions to achieve correctness for these read-modify-write operations and incurs [4-network round trip latency](https://docs.datastax.com/en/cassandra/3.0/cassandra/dml/dmlLtwtTransactions.html). + +## Reading the latest data from a recently elected leader + +In a steady state, when the leader is appending and replicating log entries, the latest majority-replicated entry is exactly the committed one. However, it becomes more complicated right after a leader change. When a new leader is elected in a tablet, it appends a no-op entry to the tablet's Raft log and replicates it, as described in the Raft protocol. Before this no-op entry is replicated, the tablet is considered unavailable for reading up-to-date values and accepting read-modify-write operations. This is because the new tablet leader needs to be able to guarantee that all previous Raft-committed entries are applied to RocksDB and other persistent and in-memory data structures, and it is only possible after it is known that all entries in the new leader's log are committed. + +## Leader leases: reading the latest data in case of a network partition + +Leader leases are a mechanism for a tablet leader to establish its authority for a certain short time period to avoid the following inconsistency: + +* The leader is network-partitioned away from its followers. +* A new leader is elected. +* The client writes a new value and the new leader replicates it. +* The client reads a stale value from the old leader. + +![A diagram showing a potential inconsistency in case of a network partition if leader leases are not present](/images/architecture/txn/leader_leases_network_partition.svg) + +The leader lease mechanism in YugabyteDB prevents this inconsistency, as follows: + +* With every leader-to-follower message (`AppendEntries` in Raft's terminology), whether replicating new entries or even an empty heartbeat message, the leader sends a leader lease request as a time interval (for example, "I want a 2-second lease"). The lease duration is usually a system wide parameter. For each peer, the leader also keeps track of the lease expiration time corresponding to each pending request (for example, time when the request was sent plus lease duration), which is stored in terms of what is known as local monotonic time ([CLOCK_MONOTONIC](https://linux.die.net/man/3/clock_gettime) in Linux). The leader considers itself as a special case of a peer for this purpose. Then, as the leader receives responses from followers, it maintains the majority-replicated watermark of these expiration times as stored at request sending time. The leader adopts this majority-replicated watermark as its lease expiration time and uses it when deciding whether it can serve consistent read requests or accept writes. + +* When a follower receives the previously described Raft RPC, it reads the value of its current monotonic clock, adds the provided lease interval to that, and remembers this lease expiration time, also in terms of its local monotonic time. If this follower becomes the new leader, it is not allowed to serve consistent reads or accept writes until any potential old leader's lease expires. + +* To guarantee that any new leader is aware of any old leader's lease expiration, another bit of logic is necessary. Each Raft group member records the latest expiration time of an old leader that it knows about (in terms of this server's local monotonic time). Whenever a server responds to a `RequestVote` RPC, it includes the largest remaining amount of time of any known old leader's lease in its response. This is handled similarly to the lease duration in a leader's `AppendEntries` request on the receiving server: at least this amount of time has to pass since the receipt of this request before the recipient can service up-to-date requests in case it becomes a leader. This part of the algorithm is needed so that it can be proven that a new leader will always know about any old leader's majority-replicated lease. This is analogous to Raft's correctness proof: there is always a server (the voter) that received a lease request from the old leader and voted for the new leader, because the two majorities must overlap. + + Note that there is no reliance on any kind of clock synchronization for this leader lease implementation, as only time intervals are sent over the network, and each server operates in terms of its local monotonic clock. The following are the only two requirements to the clock implementation: + +* Bounded monotonic clock drift rate between different servers. For example, if the standard Linux assumption of less than 500µs per second drift rate is used, it could be accounted for by multiplying all delays mentioned previously by 1.001. + +* The monotonic clock does not freeze. For example, if running takes place on a virtual machine which freezes temporarily, the hypervisor needs to refresh the virtual machine's clock from the hardware clock when it starts running again. + +The leader lease mechanism guarantees that at any point in time there is at most one server in any tablet's Raft group that considers itself to be an up-to-date leader that is allowed to service consistent reads or accept write requests. + +## Safe timestamp assignment for a read request + +Every read request is assigned a particular multi-version concurrency control (MVCC) timestamp or hybrid time (for example, called `ht_read`), which allows write operations to the same set of keys to happen in parallel with reads. It is crucial, however, that the view of the database as of this timestamp is not updated by concurrently happening writes. That is, once `ht_read` is selected for a read request, no further writes to the same set of keys can be assigned timestamps earlier than or the same as `ht_read`. As has already been mentioned, strictly increasing hybrid times are assigned to Raft log entries of any given tablet. Therefore, one way to assign `ht_read` safely would be to use the hybrid time of the last committed record. As committed Raft log records are never overwritten by future leaders, and each new leader reads the last log entry and updates its hybrid time, all future records will have strictly later hybrid times. + +However, with this conservative timestamp assignment approach, `ht_read` can stay the same if there is no write workload on this particular tablet. This results in a client-observed anomaly if [time-to-live (TTL)](../../../api/ycql/dml_insert/#insert-a-row-with-expiration-time-using-the-using-ttl-clause) is being used: no expired values will disappear, as far as the client is concerned, until a new record is written to the tablet. Then, a lot of old expired values could suddenly disappear. To prevent this anomaly, the read timestamp needs to be assigned to be close to the current hybrid time (which is in its turn close to the physical time) to preserve natural TTL semantics. An attempt should be made to choose `ht_read` to be the latest possible timestamp for which it can be guaranteed that all future write operations in the tablet will have a strictly later hybrid time than that, even across leader changes. + +This requires an introduction of a concept of hybrid time leader leases, similarly to absolute-time leader leases discussed previously. With every Raft `AppendEntries` request to a follower, whether it is a regular request or an empty or heartbeat request, a tablet leader computes a hybrid time lease expiration time (for example, called `ht_lease_exp`), and sends that to the follower. `ht_lease_exp` is usually computed as current hybrid time plus a fixed configured duration (for example, 2 seconds). By replying, followers acknowledge the old leader's exclusive authority over assigning any hybrid times up to and including `ht_lease_exp`. Similarly to regular leases, these hybrid time leases are propagated on votes. The leader maintains a majority-replicated watermark and considers itself to have replicated a particular value of a hybrid time leader lease expiration if it sent that or a greater `ht_lease_exp` value to a majority of Raft group members. For this purpose, the leader is always considered to have replicated an infinite leader lease to itself. + +### Definition of safe time + +Suppose the current majority-replicated hybrid time leader lease expiration is called `replicated_ht_lease_exp`. Then the safe timestamp for a read request can be computed as the maximum of: + +* Last committed Raft entry's hybrid time. +* One of the following: + * If there are uncommitted entries in the Raft log, the minimum of the first uncommitted entry's + hybrid time - ε, where ε is the smallest possible difference in hybrid time and `replicated_ht_lease_exp`. + * If there are no uncommitted entries in the Raft log, the minimum of the current hybrid time and `replicated_ht_lease_exp`. + +In other words, the last committed entry's hybrid time is always safe to read at, but for later hybrid times, the majority-replicated hybrid time leader lease is an upper bound. This is because it can only be guaranteed that no future leader will commit an entry with hybrid time earlier than `ht` if `ht` < `replicated_ht_lease_exp`. + +Note that when reading from a single tablet, there is no need to wait for the chosen `ht_read` to become safe to read at because it is chosen as such already. However, if it is decided to read a consistent view of data across multiple tablets, `ht_read` could be chosen on one of them, and there is a need to wait for that timestamp to become safe to read at on the second tablet. This typically happens very quickly, as the hybrid time on the second tablet's leader is instantly updated with the propagated hybrid time from the first tablet's leader, and in the common case there is a need to wait for pending Raft log entries with hybrid times earlier than `ht_read` to be committed. + +## Propagating safe time from leader to followers for follower-side reads + +YugabyteDB supports reads from followers to satisfy use cases that require an extremely low read latency that can only be achieved by serving read requests in the data center closest to the client. This comes at the expense of potentially slightly stale results, and this is a trade-off that you have to make. Similarly to strongly-consistent leader-side reads, follower-side read operations also have to pick a safe read timestamp. + +As stated previously, "safe time to read at" means that no future writes are supposed to change the view of the data as of the read timestamp. However, only the leader is able to compute the safe read time using the algorithm described previously. Therefore, the latest safe time is propagated from leaders to followers on `AppendEntries` RPCs. For example, follower-side reads handled by a partitioned-away follower will see a frozen snapshot of the data, including values with TTL specified not timing out. When the partition is healed, the follower starts receiving updates from the leader and can return read results that would be very close to being up-to-date. + +## Single shard transactions + +When a transaction requires to modify rows with keys `k1` and `k2` belonging to the same tablet (shard), the transaction is executed as a single-shard transaction, in which case atomicity would be ensured by the fact that both updates would be replicated as part of the same [Raft](../../docdb-replication/raft) log record. However, in the most general case, these keys would belong to different tablets, and that is the working assumption. + +## CRUD Examples + +As single-row transactions do not have to update the transaction status table, their performance is much higher than [distributed transactions](../distributed-txns). + +`INSERT`, `UPDATE`, and `DELETE` single-row SQL statements map to single row transactions. + +### INSERT + +All single-row `INSERT` statements: + +```sql +INSERT INTO table (columns) VALUES (values); +``` + +### UPDATE + +Single-row `UPDATE` statements that specify all primary keys: + +```sql +UPDATE table SET column = WHERE ; +``` + +Single-row upsert statements using `UPDATE` .. `ON CONFLICT`: + +```sql +INSERT INTO table (columns) VALUES (values) + ON CONFLICT DO UPDATE + SET ; +``` + +If updates are performed on an existing row, they should match the set of values specified in the `INSERT` clause. + +### DELETE + +Single-row `DELETE` statements that specify all primary keys: + +```sql +DELETE FROM table WHERE ; +``` + diff --git a/docs/content/v2.25/architecture/transactions/transaction-priorities.md b/docs/content/v2.25/architecture/transactions/transaction-priorities.md new file mode 100644 index 000000000000..664812de2ed9 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/transaction-priorities.md @@ -0,0 +1,193 @@ +--- +title: Transaction priorities in YugabyteDB YSQL +headerTitle: Transaction priorities +linkTitle: Transaction priorities +description: Details about Transaction priorities in YSQL +menu: + preview: + identifier: architecture-transaction-priorities + parent: architecture-acid-transactions + weight: 700 +type: docs +--- + + + +When using the [Fail-on-Conflict](../concurrency-control/#fail-on-conflict) concurrency control policy, transactions are assigned priorities that help decide which transactions should be aborted in case of conflict. + +There are two priority buckets, each having a priority range of [reals](https://www.postgresql.org/docs/15/datatype.html) in [0, 1] as follows: + +1. `High-priority` bucket: if the first statement in a transaction takes a `FOR UPDATE/ FOR SHARE/ FOR NO KEY UPDATE` explicit row lock using SELECT, it will be assigned a priority from this bucket. + +2. `Normal-priority` bucket: all other transactions are assigned a priority from this bucket. + +Note that a transaction with any priority P1 from the high-priority bucket can abort a transaction with any priority P2 from the normal-priority bucket. For example, a transaction with priority 0.1 from the high-priority bucket can abort a transaction with priority 0.9 from the normal-priority bucket. + +Priorities are randomly chosen from the applicable bucket. However, you can use the following two YSQL parameters to control the priority assigned to transactions in a specific session: + +- `yb_transaction_priority_lower_bound` +- `yb_transaction_priority_upper_bound` + +These parameters help set lower and upper bounds on the randomly-assigned priority that a transaction should receive from the applicable bucket. These parameters accept a value of `real` datatype in the range [0, 1]. Also note that the same bounds apply to both buckets. + +{{< note title="All single shard transactions have a priority of 1 in the normal-priority bucket." >}} +{{}} + +The `yb_get_current_transaction_priority` function can be used to fetch the transaction priority of the current active transaction. It outputs a pair ` (bucket)`, where `` is of a real datatype between [0, 1] with 9 decimal units of precision, and `` is either `Normal` or `High`. + +{{< note title="Note">}} +As an exception, if a transaction is assigned the highest priority possible, that is, a priority of 1 in the high-priority bucket, the function returns `highest priority transaction` without any real value. +{{}} + +A transaction's priority is `0.000000000 (normal-priority transaction)` until a transaction is really started. + +## Examples + +The following examples demonstrate how to set priorities for your transactions and get the current transaction priority. + +1. Create a table and insert some data. + + ```sql + CREATE TABLE test (k INT PRIMARY KEY, v INT); + INSERT INTO test VALUES (1, 1); + ``` + +1. Set the lower and upper bound values for your transactions as follows: + + ```sql + SET yb_transaction_priority_lower_bound = 0.4; + SET yb_transaction_priority_upper_bound = 0.6; + ``` + +1. Create a transaction in the normal-priority bucket as follows: + + ```sql + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT yb_get_current_transaction_priority(); -- 0 due to an optimization which doesn't really start a real transaction internally unless a write occurs + ``` + + ```output + yb_get_current_transaction_priority + ------------------------------------------- + 0.000000000 (Normal priority transaction) + (1 row) + ``` + + ```sql + SELECT * FROM test; + ``` + + ```output + k | v + ---+--- + 1 | 1 + (1 row) + ``` + + ```sql + SELECT yb_get_current_transaction_priority(); -- still 0 due to the optimization which doesn't really start a real transaction internally unless a write occurs + ``` + + ```output + yb_get_current_transaction_priority + ------------------------------------------- + 0.000000000 (Normal priority transaction) + (1 row) + ``` + + ```sql + INSERT INTO test VALUES (2, '2'); -- perform a write which starts a real transaction + SELECT yb_get_current_transaction_priority(); -- non-zero now + ``` + + ```output + yb_get_current_transaction_priority + ------------------------------------------- + 0.537144608 (Normal priority transaction) + (1 row) + ``` + + ```sql + COMMIT; + ``` + +1. Create a transaction in the high-priority bucket as follows: + + ```sql + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT * FROM test WHERE k = 1 FOR UPDATE; -- starts a transaction in a high-priority bucket + ``` + + ```output + k | v + ---+--- + 1 | 1 + (1 row) + ``` + + ```sql + SELECT yb_get_current_transaction_priority(); + ``` + + ```output + yb_get_current_transaction_priority + ----------------------------------------- + 0.412004009 (High priority transaction) + (1 row) + ``` + + ```sql + COMMIT; + ``` + +1. Create a transaction with the highest priority + + ```sql + SET yb_transaction_priority_upper_bound = 1; + SET yb_transaction_priority_lower_bound = 1; + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT * FROM test WHERE k = 1 FOR UPDATE; + ``` + + ```output + k | v + ---+--- + 1 | 1 + (1 row) + ``` + + ```sql + SELECT yb_get_current_transaction_priority(); + ``` + + ```output + yb_get_current_transaction_priority + ------------------------------------- + Highest priority transaction + (1 row) + ``` + + ```sql + COMMIT; + ``` + +{{< note title="Internal representation of priorities" >}} + +Internally, both the normal and high-priority buckets are mapped to a `uint64_t` space. The 64 bit range is used by the two priority buckets as follows: + +1. Normal-priority bucket: `[yb::kRegularTxnLowerBound, yb::kRegularTxnUpperBound]`, that is, 0 to `uint32_t_max`-1 + +1. High-priority bucket: `[yb::kHighPriTxnLowerBound, yb::kHighPriTxnUpperBound]`, that is, `uint32_t_max` to `uint64_t_max` + +For ease of use, the bounds are expressed as a [0, 1] real range for each bucket in the lower or upper bound YSQL parameters and the `yb_get_current_transaction_priority` function. The [0, 1] real range map proportionally to the integer ranges for both buckets. In other words, the [0, 1] range in the normal-priority bucket maps to `[0, uint32_t_max-1]` and the [0, 1] range in the high-priority bucket maps to `[uint32_t_max, uint64_t_max]`. + +{{< /note >}} diff --git a/docs/content/v2.25/architecture/transactions/transactional-io-path.md b/docs/content/v2.25/architecture/transactions/transactional-io-path.md new file mode 100644 index 000000000000..1bb0103177b8 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/transactional-io-path.md @@ -0,0 +1,118 @@ +--- +title: Transactional I/O path +headerTitle: Transactional I/O path +linkTitle: Transactional I/O path +description: Learn how YugabyteDB manages the write path of a transaction. +aliases: + - /architecture/transactions/transactional-io-path/ +menu: + preview: + identifier: architecture-transactional-io-path + parent: architecture-acid-transactions + weight: 300 +type: docs +--- + +For an overview of common concepts used in YugabyteDB's implementation of distributed transactions, see [Distributed transactions](../distributed-txns/). + +The write path of a transaction is used for modifying multiple keys and the read path is used for reading a consistent combination of values from multiple tablets. + +## Write path + +The following diagram depicts the high-level steps of a distributed write-only transaction, not including +any conflict resolution: + +![Distributed write-only transaction](/images/architecture/txn/distributed_txn_write_path.png) + +### Client requests transaction + +The client sends a request to a YugabyteDB tablet server that requires a distributed transaction. The following example uses an extension to CQL: + + ```sql + START TRANSACTION; + UPDATE t1 SET v = 'v1' WHERE k = 'k1'; + UPDATE t2 SET v = 'v2' WHERE k = 'k2'; + COMMIT; + ``` + +The tablet server that receives the transactional write request becomes responsible for driving all the steps involved in this transaction. This orchestration of transaction steps is performed by a component called a transaction manager. Every transaction is handled by exactly one transaction manager. + +### Create a transaction record + +A transaction ID is assigned and a transaction status tablet is selected to keep track of a transaction status record that has the following fields: + +- Status that can be pending, committed, or aborted. +- Commit hybrid timestamp, if committed. +- List of IDs of participating tablets, if committed. + +It makes sense to select a transaction status tablet in a way such that the transaction manager's tablet server is also the leader of its Raft group, because this allows to cut the RPC latency on querying and updating the transaction status. But in the most general case, the transaction status tablet might not be hosted on the same tablet server that initiates the transaction. + +### Write provisional records + +Provisional records are written to tablets containing the rows that need to be modified. These provisional records contain the transaction ID, the values that need to be written, and the provisional hybrid timestamp, which is not the final commit timestamp and will in general be different for different provisional records within the same transaction. In contrast, there is only one commit hybrid timestamp for the entire transaction. + +As the provisional records are written, it is possible to encounter conflicts with other transactions. In this case, the transaction would have to be aborted and restarted. These restarts still happen transparently to the client up to a certain number of retries. + +### Commit the transaction + +When the transaction manager has written all the provisional records, it commits the transaction by sending an RPC request to the transaction status tablet. The commit operation can only succeed if the transaction has not yet been aborted due to conflicts. The atomicity and durability of the commit operation is guaranteed by the transaction status tablet's Raft group. Once the commit operation is complete, all provisional records immediately become visible to clients. + +The commit request the transaction manager sends to the status tablet includes the list of tablet IDs of all tablets that participate in the transaction. No new tablets can be added to this set by this point. The status tablet needs this information to orchestrate cleaning up provisional records in participating tablets. + +### Send the response back to client + +The YQL engine sends the response back to the client. If any client (either the same one or different) sends a read request for the keys that were written, the new values are guaranteed to be reflected in the response, because the transaction is already committed. This property of a database is sometimes called the "read your own writes" guarantee. + +### Asynchronously apply and clean up provisional records + +This step is coordinated by the transaction status tablet after it receives the commit message for our transaction and successfully replicates a change to the transaction's status in its Raft group. The transaction status tablet already knows what tablets are participating in this transaction, so it sends cleanup requests to them. Each participating tablet records a special "apply" record into its Raft log, containing the transaction ID and commit timestamp. When this record is Raft-replicated in the participating tablet, the tablet removes the provisional records belonging to the transaction, and writes regular records with the correct commit timestamp to its RocksDB databases. These records are virtually indistinguishable from those written by regular single-row operations. + +Once all participating tablets have successfully processed these apply requests, the status tablet can delete the transaction status record because all replicas of participating tablets that have not yet cleaned up provisional records (for example, slow followers) will do so based on information available locally within those tablets. The deletion of the status record happens by writing a special "applied everywhere" entry to the Raft log of the status tablet. Raft log entries belonging to this transaction will be cleaned up from the status tablet's Raft log as part of regular garbage-collection of old Raft logs soon after this point. + +## Read path + +YugabyteDB is a multiversion concurrency control (MVCC) database, which means it internally keeps track of multiple versions of the same value. Read operations do not take any locks. Instead, they rely on the MVCC timestamp in order to read a consistent snapshot of the data. A long-running read operation, either single-shard or cross-shard, can proceed concurrently with write operations modifying the same key. + +As described in [Single-row transactions](../single-row-transactions/), up-to-date reads are performed from a single tablet (shard), with the most recent value of a key being the value written by the last committed Raft log record known to the Raft leader. For reading multiple keys from different tablets, though, it must be ensured that the values read come from a recent consistent snapshot of the database. The following clarifies these properties of the selected snapshot: + +- Consistent snapshot: The snapshot must show any transaction's records fully, or not show them at all. It cannot contain half of the values written by a transaction and omit the other half. The snapshot consistency is ensured by performing all reads at a particular hybrid time (`ht_read`), and ignoring any records with later hybrid time. + +- Recent snapshot: The snapshot includes any value that any client might have already seen, which means all values that were written or read before this read operation was initiated. This also includes all previously written values that other components of the client application might have written to or read from the database. The client performing the current read might rely on the presence of those values in the result set because those other components of the client application might have communicated this data to the current client through asynchronous communication channels. To ensure the snapshot is recent, the read operation needs to be restarted when it is determined that the chosen hybrid time was too early, that is, there are some records that could have been written before the read operation was initiated but have a hybrid time later than the currently set `ht_read`. + +The following diagram depicts the process: + +![Distributed transaction read path diagram](/images/architecture/txn/distributed_txn_read_path.png) + +### Handle the client's request and initialize read transaction + +The client's request to either the YCQL or YSQL API arrives at the YQL engine of a tablet server. The YQL engine detects that the query requests rows from multiple tablets and starts a read-only transaction. A hybrid time `ht_read` is selected for the request, which could be either the current hybrid time on the YQL engine's tablet server or the [safe time](../single-row-transactions/#safe-timestamp-assignment-for-a-read-request) on one of the involved tablets. The latter case would reduce waiting for safe time for at least that tablet and is therefore better for performance. Typically, due to YugabyteDB load-balancing policy, the YQL engine receiving the request also hosts some of the tablets that the request is reading, allowing to implement the more performant second option without an additional RPC round-trip. + +In addition, a point in time called `global_limit` is selected, computed as `physical_time + max_clock_skew`, which helps determine if a particular record was definitely written after the read request had started. `max_clock_skew` is a globally-configured bound on clock skew between different YugabyteDB servers. + +### Read from all tablets at the specific hybrid time + +The YQL engine sends requests to all tablets from which the transaction needs to read. Each tablet waits for `ht_read` to become a safe time to read at, according to the [definition of safe time](../single-row-transactions/#definition-of-safe-time), and then starts executing its part of the read request from its local DocDB. + +When a tablet server sees a relevant record with a hybrid time `ht_record`, it executes the following logic: + +- If `ht_record` ≤ `ht_read`, include the record in the result. +- If `ht_record` > `definitely_future_ht`, exclude the record from the result. `definitely_future_ht` means a hybrid time such that a record with a later hybrid time than that was definitely written after the read request had started. For now, `definitely_future_ht` can be assumed to be `global_limit`. +- If `ht_read` < `ht_record` ≤ `definitely_future_ht`, it is not known if this record was written before or after the start of the read request. But it cannot be omitted from the result because if it was in fact written before the read request, this may produce a client-observed inconsistency. Therefore, the entire read operation must be restarted with an updated value of `ht_read` = `ht_record`. + +To prevent an infinite loop of these read restarts, a tablet-dependent hybrid time value `local_limit``tablet` is returned to the YQL engine, computed as the current safe time in this tablet. It is now known that any record (regular or provisional) written to this tablet with a hybrid time later than `local_limit``tablet` could not have possibly been written before the start of the read request. Therefore, the read transaction would not have to be restarted if a record with a hybrid time later than `local_limit``tablet` is observed in a later attempt to read from this tablet within the same transaction, and `definitely_future_ht` = `min(global_limit, local_limit``tablet``)` is set on future attempts. + +### Tablets query the transaction status + +As each participating tablet reads from its local DocDB, it might encounter provisional records for which it does not yet know the final transaction status and commit time. In these cases, it would send a transaction status request to the transaction status tablet. If a transaction is committed, it is treated as if DocDB already contained permanent records with hybrid time equal to the transaction's commit time. The [cleanup](#asynchronously-apply-and-clean-up-provisional-records) of provisional records happens independently and asynchronously. + +### Tablets respond to YQL + +Each tablet's response to YQL contains the following information: + +- Whether or not read restart is required. +- `local_limit``tablet` to be used to restrict future read restarts caused by this tablet. +- The actual values that have been read from this tablet. + +### YQL sends the response to the client + +As soon as all read operations from all participating tablets succeed and it has been determined that there is no need to restart the read transaction, a response is sent to the client using the appropriate wire protocol. diff --git a/docs/content/v2.25/architecture/transactions/transactions-overview.md b/docs/content/v2.25/architecture/transactions/transactions-overview.md new file mode 100644 index 000000000000..9a5eedb23f97 --- /dev/null +++ b/docs/content/v2.25/architecture/transactions/transactions-overview.md @@ -0,0 +1,61 @@ +--- +title: Fundamentals of Distributed Transactions +linkTitle: Fundamentals +description: The fundamental concepts behind distributed transactions +menu: + preview: + identifier: architecture-transactions-overview + parent: architecture-acid-transactions + weight: 100 +type: docs +--- + +A transaction is a sequence of operations performed as a single logical unit of work. The intermediate states of the database as a result of applying the operations inside a transaction are not visible to other concurrent transactions, and if a failure occurs that prevents the transaction from completing, then none of the steps affect the database. + +Note that all update operations inside DocDB are considered to be transactions, including operations that update only one row, as well as those that update multiple rows that reside on different nodes. If `autocommit` mode is enabled, each statement is executed as one transaction. + +Let us go over some of the fundamental concepts involved in making distributed transactions work. + +## Time synchronization + +A transaction in a YugabyteDB cluster may need to update multiple rows that span across nodes in a cluster. In order to be ACID-compliant, the various updates made by this transaction should be visible instantaneously as of a fixed time, irrespective of the node in the cluster that reads the update. To achieve this, the nodes of the cluster must agree on a global notion of time, which requires all nodes to have access to a highly-available and globally-synchronized clock. [TrueTime](https://cloud.google.com/spanner/docs/true-time-external-consistency), used by Google Cloud Spanner, is an example of such a clock with tight error bounds. However, this type of clock is not available in many deployments. Physical time clocks (or wall clocks) cannot be perfectly synchronized across nodes and cannot order events with the purpose to establish a causal relationship across nodes. + +### Hybrid logical clocks + +YugabyteDB uses hybrid logical clocks (HLC) based on the [hybrid time algorithm](http://users.ece.utexas.edu/~garg/pdslab/david/hybrid-time-tech-report-01.pdf), a distributed timestamp assignment algorithm that combines the advantages of local real-time (physical) clocks and Lamport clocks that track causal relationships. The hybrid time algorithm ensures that events connected by a causal chain of the form "A happens before B on the same server" or "A happens on one server, which then sends an RPC to another server, where B happens", always get assigned hybrid timestamps in an increasing order + +Each node in a YugabyteDB cluster first computes its HLC represented as a tuple (physical time component, logical component). HLCs generated on any node are strictly monotonic, and are compared as a tuple. When comparing two HLCs, the physical time component takes precedence over the logical component. + +* Physical time component: YugabyteDB uses the physical clock (`CLOCK_REALTIME` in Linux) of a node to initialize the physical time component of its HLC. Once initialized, the physical time component can only be updated to a higher value. + +* Logical component: For a given physical time component, the logical component of the HLC is a monotonically increasing number that provides ordering of events happening in that same physical time. This is initially set to 0. If the physical time component is updated at any point, the logical component is reset to 0. + +On any RPC communication between two nodes, HLC values are exchanged. The node with the lower HLC updates its HLC to the higher value. If the physical time on a node exceeds the physical time component of its HLC, the latter is updated to the physical time and the logical component is set to 0. Thus, HLCs on a node are monotonically increasing. + +The same HLC is used to determine the read point in order to determine which updates should be visible to end clients. If an update has safely been replicated onto a majority of nodes, as per the Raft protocol, that update operation can be acknowledged as successful to the client and it is safe to serve all reads up to that HLC. This forms the foundation for [lockless multiversion concurrency control in YugabyteDB](#multi-version-concurrency-control). + +### Hybrid time use + +Multiple aspects of YugabyteDB's transaction model rely on hybrid time. + +Hybrid timestamps assigned to committed [Raft log entries](../../docdb-replication/raft#log-entries) in the same tablet always keep increasing, even if there are leader changes. This is because the new leader always has all committed entries from previous leaders, and it makes sure to update its hybrid clock with the timestamp of the last committed entry before appending new entries. This property simplifies the logic of selecting a safe hybrid time to select for single-tablet read requests. + +A request trying to read data from a tablet at a particular hybrid time needs to ensure that no changes happen in the tablet with timestamp values lower than the read timestamp, which could lead to an inconsistent result set. The need to read from a tablet at a particular timestamp arises during transactional reads across multiple tablets. This condition becomes easier to satisfy due to the fact that the read timestamp is chosen as the current hybrid time on the YB-TServer processing the read request, so hybrid time on the leader of the tablet being read from immediately becomes updated to a value that is at least as high as the read timestamp. Then the read request only has to wait for any relevant entries in the Raft queue with timestamp values lower than the read timestamp to be replicated and applied to RocksDB, and it can proceed with processing the read request after that. + +### Caveat + +The main downside is in certain scenarios where concurrent transactions try to perform conflicting updates. In these scenarios, the conflict resolution depends on the maximum clock skew in the cluster. This leads to a higher number of transaction conflicts or a higher latency of the transaction. + +## Multi-version concurrency control + +YugabyteDB maintains data consistency internally using multi-version concurrency control (MVCC) without the need to lock rows. Each transaction works on a version of the data in the database as of some hybrid timestamp that is derived from [Hybrid Logical Clock](#hybrid-logical-clocks). This prevents transactions from reading the intermediate updates made by concurrently-running transactions, some of which may be updating the same rows. Each transaction, however, can see its own updates, thereby providing transaction isolation for each database session. Using MVCC minimizes lock contention during the execution of multiple concurrent transactions. + +YugabyteDB implements MVCC and internally keeps track of multiple versions of values corresponding to the same key (for example, of a particular column in a particular row), as described in [Persistence on top of RocksDB](../../docdb/data-model). The last part of each key is a timestamp, which enables quick navigation to a particular version of a key in the RocksDB key-value store. + +## Provisional records + +YugabyteDB needs to store uncommitted values written by distributed transactions in a similar persistent data structure. However, they cannot be written to DocDB as regular values, because they would then become visible at different times to clients reading through different tablet servers, allowing a client to see a partially applied transaction and thus breaking atomicity. + +{{}} +To get a deeper understanding of the layout of the uncommited records, see [Provisional records](../distributed-txns#provisional-records) +{{}} diff --git a/docs/content/v2.25/architecture/yb-master.md b/docs/content/v2.25/architecture/yb-master.md new file mode 100644 index 000000000000..1e9e646af715 --- /dev/null +++ b/docs/content/v2.25/architecture/yb-master.md @@ -0,0 +1,77 @@ +--- +title: YB-Master service +headerTitle: YB-Master service +linkTitle: YB-Master +description: Learn how the YB-Master service manages tablet metadata and coordinates cluster configuration changes. +headcontent: Catalog information, tablet metadata, and cluster coordination +aliases: + - /preview/architecture/concepts/yb-master/ +menu: + preview: + identifier: architecture-concepts-yb-master + parent: architecture + weight: 1100 +type: docs +--- + +The YB-Master service keeps the system metadata and records, such as tables and the location of their tablets, users and roles with their associated permissions, and so on. + +The YB-Master service is also responsible for coordinating background operations, such as load-balancing or initiating replication of under-replicated data, as well as performing a variety of administrative operations such as creating, altering, and dropping tables. + +The YB-Master is [highly available](#high-availability), as it forms a Raft group with its peers, and it is not in the critical path of I/O against user tables. + +![master_overview](/images/architecture/master_overview.png) + +The YB-Master performs a number of important operations within the system. Some operations are performed throughout the lifetime of the universe, in the background, without impacting foreground read and write performance. + +## Administrative operations + +When one of these universe-wide operations is initiated, such as creating a new table, modifying an existing table, dropping (deleting) a table, or creating backups, the YB-Master ensures that the operation is successfully propagated and applied to all relevant tablets, regardless of the current state of the YB-TServer nodes hosting those tablets. + +This guarantee is crucial because if a YB-TServer fails while such an operation is in progress, it cannot cause the operation to be only partially applied, leaving the database in an inconsistent state. The YB-Master makes sure the operation is either fully applied everywhere or not applied at all, maintaining data integrity. + +## System metadata + +The YB-Master stores important system-wide metadata, which includes information about: + +- Namespaces (database names) +- Table information +- User roles and permissions + +This system metadata is crucial for managing and coordinating the entire YugabyteDB cluster. The YB-Master stores this system metadata in an internal table. This allows the metadata to be managed and accessed like any other table in the database. + +To ensure redundancy and prevent data loss, the system metadata is replicated across all YB-Master nodes using a replication protocol called Raft. This means that if one YB-Master fails, the others will still have the up-to-date system metadata. + +## Table creation + +The YB-Master leader validates the table schema and decides the desired number of tablets for the table and creates metadata for each of them. The table schema and the tablet metadata information is replicated to YB-Master Raft group. This ensures that the table creation can succeed even if the current YB-Master leader fails. After this, as the operation is asynchronous and can proceed even if the current YB-Master leader fails, the table creation API returns a success. + +## Tablet assignments + +The YB-Master component in YugabyteDB keeps track of all the tablets (data shards) and the YB-TServer nodes that are currently hosting them. It maintains a mapping of which tablets are stored on which YB-TServer nodes. + +When clients, such as the YugabyteDB query layer or applications using the YCQL (Cassandra-compatible) or YSQL (PostgreSQL-compatible) APIs, need to retrieve data, they can efficiently query the YB-Master to get this tablet-to-node mapping. The smart clients then cache (store) this mapping locally. + +By having the tablet-to-node mapping cached, the smart clients can communicate directly with the correct YB-TServer node that holds the required data, without needing to go through additional network hops or intermediate components. This direct communication allows for efficient data retrieval and query processing. + +## Load balancing + +The YB-Master leader places (at `CREATE TABLE` time) the tablets across YB-TServers to enforce any user-defined data placement constraints and ensure uniform load. In addition, during the lifetime of the universe, as nodes are added, fail, or become decommissioned, it continues to balance the load and enforce data placement constraints automatically. + +## Leader balancing + +Aside from ensuring that the number of tablets served by each YB-TServer is balanced across the universe, the YB-Masters also ensure that each node has a symmetric number of tablet leaders across nodes. This is also done for the followers. + +## Re-replication of data + +The YB-Master receives regular "heartbeat" signals from all the YB-TServer nodes in the cluster. These heartbeats allow the YB-Master to monitor the liveness (active state) of each YB-TServer. + +If the YB-Master detects that a YB-TServer has failed (stopped sending heartbeats), it keeps track of how long the node has been in a failed state. If this failure duration exceeds a predetermined threshold, the YB-Master initiates a process to replace the failed node. + +Specifically, the YB-Master identifies replacement YB-TServer nodes and re-replicates (copies) the tablet data from the failed node to the new nodes. This re-replication process ensures that the data remains available and redundant, even after a node failure. + +However, the YB-Master carefully throttles (limits) the rate of re-replication to avoid impacting the ongoing, regular operations of the database cluster. This throttling prevents the re-replication from overloading the system and affecting the performance of user queries and other foreground activities. + +## High availability + +The YB-Master is not in the critical path of normal I/O operations, therefore its failure does not affect a functioning universe. Nevertheless, the YB-Master is a part of a Raft group with the peers running on different nodes. The number of peers is decided by the [replication factor](../key-concepts/#replication-factor-rf) of the [universe](../key-concepts/#universe). One of these peers is the active master and the others are active standbys. If the active master (the YB-Master leader) fails, these peers detect the leader failure and re-elect a new YB-Master leader which becomes the active master in seconds of the failure. diff --git a/docs/content/v2.25/architecture/yb-tserver.md b/docs/content/v2.25/architecture/yb-tserver.md new file mode 100644 index 000000000000..1a0196de436a --- /dev/null +++ b/docs/content/v2.25/architecture/yb-tserver.md @@ -0,0 +1,81 @@ +--- +title: YB-TServer service +headerTitle: YB-TServer service +linkTitle: YB-TServer +description: Learn how the YB-TServer service stores and serves application data using tablets (also known as shards). +headcontent: Serve application data and manage tablets +aliases: + - /preview/architecture/concepts/yb-tserver/ +menu: + preview: + identifier: architecture-concepts-yb-tserver + parent: architecture + weight: 1200 +type: docs +--- + +The YugabyteDB Tablet Server (YB-TServer) service is responsible for the input-output (I/O) of the end-user requests in a YugabyteDB cluster. Data for a table is split (sharded) into tablets. Each tablet is composed of one or more tablet peers, depending on the replication factor. Each YB-TServer hosts one or more tablet peers. + +The following diagram depicts a basic four-node YugabyteDB universe, with one table that has 4 tablets and a replication factor of 3: + +![TServer overview](/images/architecture/tserver_overview.png) + +The tablet-peers corresponding to each tablet hosted on different YB-TServers form a Raft group and replicate data between each other. The system shown in the preceding diagram includes sixteen independent Raft groups. For more information, see [Replication layer](../docdb-replication/). + +In each YB-TServer, cross-tablet intelligence is employed to maximize resource efficiency. There are multiple ways the YB-TServer coordinates operations across the tablets it hosts. + +## Server-global block cache + +The block cache is shared across different tablets in a given YB-TServer, leading to highly efficient memory utilization in cases when one tablet is read more often than others. For example, if one table has a read-heavy usage pattern compared to others, the block cache would automatically favor blocks of this table, as the block cache is global across all tablet peers. + +## Space amplification + +YugabyteDB's compactions are size-tiered. Size-tier compactions have the advantage of lower disk write (I/O) amplification when compared to level compactions. There may be a concern that size-tiered compactions have a higher space amplification (that it needs 50% space headroom). This is not true in YugabyteDB because each table is broken into several tablets and concurrent compactions across tablets are throttled to a specific maximum. The typical space amplification in YugabyteDB tends to be in the 10-20% range. + +## Throttled compactions + +The compactions are throttled across tablets in a given YB-TServer to prevent compaction storms. This prevents, for example, high foreground latencies during a compaction storm. + +The default policy ensures that doing a compaction is worthwhile. The algorithm tries to make sure that the files being compacted are not too disparate in terms of size. For example, it does not make sense to compact a 100GB file with a 1GB file to produce a 101GB file, because it would require a lot of unnecessary I/O for little gain. + +## Compaction queues + +Compactions are prioritized into large and small compactions with some prioritization to keep the system functional even in extreme I/O patterns. + +In addition to throttling controls for compactions, YugabyteDB does a variety of internal optimizations to minimize impact of compactions on foreground latencies. For example, a prioritized queue to give priority to small compactions over large compactions to make sure the number of SSTable files for any tablet stays as low as possible. + +## Manual compactions + +YugabyteDB allows compactions to be externally triggered on a table using the [`compact_table`](../../admin/yb-admin/#compact-table) command in the [yb-admin utility](../../admin/yb-admin/). This is useful when new data is no longer coming into the system for a table and you might want to reclaim disk space due to overwrites or deletes that have already happened, or due to TTL expiry. + +## Statistics-based full compactions to improve read performance + +YugabyteDB tracks the number of key-value pairs that are read at the DocDB level over a sliding period (dictated by the [auto_compact_stat_window_seconds](../../reference/configuration/yb-tserver#auto-compact-stat-window-seconds) YB-TServer flag). If YugabyteDB detects an overwhelming amount of the DocDB reads in a tablet are skipping over tombstoned and obsolete keys, then a full compaction will be triggered to remove the unnecessary keys. + +Once all of the following conditions are met in the sliding window, full compaction is automatically triggered on the tablet: + +- The ratio of obsolete (for example, deleted or removed due to TTL) versus active keys read reaches the threshold [auto_compact_percent_obsolete](../../reference/configuration/yb-tserver/#auto-compact-percent-obsolete). + +- Enough keys have been read ([auto_compact_min_obsolete_keys_found](../../reference/configuration/yb-tserver/#auto-compact-min-obsolete-keys-found)). + +While this feature is compatible with tables with TTL, YugabyteDB won't schedule compactions on tables with TTL if the [TTL file expiration](../../develop/learn/ttl-data-expiration-ycql/#efficient-data-expiration-for-ttl) feature is active. + +## Scheduled full compactions + + YugabyteDB allows full compactions overall data in a tablet to be scheduled automatically using the [scheduled_full_compaction_frequency_hours](../../reference/configuration/yb-tserver#scheduled-full-compaction-frequency-hours) and [scheduled_full_compaction_jitter_factor_percentage](../../reference/configuration/yb-tserver#scheduled-full-compaction-jitter-factor-percentage) YB-TServer flags. This can be useful for performance and disk space reclamation for workloads with a large number of overwrites or deletes on a regular basis. This can be used with tables with TTL as well but is not compatible with the [TTL file expiration](../../develop/learn/ttl-data-expiration-ycql/#efficient-data-expiration-for-ttl) feature. + +## Server-global memstore limit + +Server-global memstore limit tracks and enforces a global size across the memstores for different tablets. This is useful when there is a skew in the write rate across tablets. For example, if there are tablets belonging to multiple tables in a single YB-TServer and one of the tables gets a lot more writes than the other tables, the write-heavy table is allowed to grow much larger than it could if there was a per-tablet memory limit. This allows for good write efficiency. + +## Auto-sizing of block cache and memstore + +The block cache and memstores represent some of the larger memory-consuming components. Since these are global across all the tablet peers, this makes memory management and sizing of these components across a variety of workloads easy. Based on the RAM available on the system, the YB-TServer automatically gives a certain percentage of the total available memory to the block cache and another percentage to memstores. + +## Distributing tablet load uniformly across data disks + +On multi-SSD machines, the data (SSTable) and WAL (Raft write-ahead log) for various tablets of tables are evenly distributed across the attached disks on a per-table basis. This load distribution (also known as striping), ensures that each disk handles an even amount of load for each table. + +## High availability + +The failure of a YB-TServer hosting follower tablets has no impact on the write path. If there are any leader tablets present in a failed YB-TServer, the raft group for that tablet [elects](../docdb-replication/raft#leader-election) a new leader on a different YB-TServer. The unavailability window is approximately 3 seconds (assuming the default heartbeat interval of 500 ms) in the event of a failure of the tablet peer leader. diff --git a/docs/content/v2.25/benchmark/_index.md b/docs/content/v2.25/benchmark/_index.md new file mode 100644 index 000000000000..9854e35cb27f --- /dev/null +++ b/docs/content/v2.25/benchmark/_index.md @@ -0,0 +1,67 @@ +--- +title: Benchmark YugabyteDB +headerTitle: Benchmark YugabyteDB +linkTitle: Benchmark +description: Benchmark YugabyteDB using TPC-C, sysbench, YCSB and more. +aliases: + - /preview/benchmark/performance/ +type: indexpage +--- + +YugabyteDB is designed to provide high availability, scalability, and fault tolerance while providing simple interfaces via YSQL and YCQL APIs. However, to assess its true capabilities and to showcase its potential to handle real-world workloads, rigorous benchmarking is essential. + +Benchmarking is the process of evaluating the performance and capabilities of a system under specific workloads to gain insights into its scalability, resilience, and overall efficiency. This process involves simulating real-world usage scenarios using standardized workloads to understand how well the system performs, scales, and recovers from failures. It is crucial to understand the ability of YugabyteDB to handle various workloads, such as the TPC-C, YCSB, and sysbench benchmarks, which represent different aspects of a distributed database's performance. + +## TPC-C (Transaction Processing Performance Council - Benchmark C) + +[TPC-C](http://www.tpc.org/tpcc/) is a widely recognized benchmark for testing the performance of transactional database systems. It simulates a complex OLTP (Online Transaction Processing) workload that involves a mix of different transactions like order creation, payment processing, and stock level checking. Benchmarking YugabyteDB using TPC-C helps assess its ability to handle a high volume of concurrent transactions and maintain consistency and integrity. + +{{}} +To test performance for concurrent transactions with TPC-C, see [TPC-C](tpcc/). +{{}} + +## YCSB (Yahoo Cloud Serving Benchmark) + +[YCSB](https://github.com/brianfrankcooper/YCSB/wiki) is designed to evaluate the performance of databases under various read and write workloads, ranging from mostly read-heavy to write-heavy. Using YCSB, you can assess how well YugabyteDB handles different data access patterns and query loads, which is crucial for applications with diverse usage requirements. + +{{}} +To test performance using the Yahoo Cloud Serving Benchmark, see [YCSB](ycsb-ysql/). +{{}} + +## sysbench + +[sysbench](https://github.com/akopytov/sysbench) is a versatile benchmarking tool that covers a wide range of database workloads, including CPU, memory, disk I/O, and database operations. It helps measure the system's performance, stability, and scalability under different stress conditions, enabling you to identify potential bottlenecks and weaknesses. + +{{}} +To test performance using sysbench, see [Sysbench](sysbench-ysql/). +{{}} + +## Learn More + +{{}} + + {{}} + + {{}} + + {{}} + + {{}} + +{{}} diff --git a/docs/content/v2.25/benchmark/key-value-workload-ycql.md b/docs/content/v2.25/benchmark/key-value-workload-ycql.md new file mode 100644 index 000000000000..5def902cfa6b --- /dev/null +++ b/docs/content/v2.25/benchmark/key-value-workload-ycql.md @@ -0,0 +1,137 @@ +--- +title: Benchmark YCQL performance with key-value workloads +headerTitle: Key-value workload +linkTitle: Key-value workload +description: Benchmark YCQL performance with key-value workloads. +headcontent: Benchmark YCQL performance with key-value workloads +aliases: + - /preview/benchmark/performance/ + - /preview/benchmark/key-value-workload +menu: + preview: + identifier: key-value-workload-1-ycql + parent: benchmark + weight: 6 +type: docs +--- + +{{}} + +Use this benchmark to test the performance of YugabyteDB using a key-value workload. + +## Recommended configuration + +### Cluster configuration + +For this benchmark, you will set up a three-node YugabyteDB cluster with a replication factor of `3`. + +### Machine configuration + +- Amazon Web Services (AWS) + + - Instance type: i3.4xlarge + - Storage: 2 x 1.9 TB NVMe SSD (comes preconfigured with the instance) + +- Google Cloud Platform (GCP) + + - Instance type: n1-standard-16 + - Storage: 2 x 375 GB SSD + +- on-premises data center + + - Instance: 16 CPU cores + - Storage: 1 x 200 GB SSD (minimum) + - RAM size: 30 GB (minimum) + +### Benchmark tool + +We will use the [YugabyteDB Workload Generator](https://github.com/yugabyte/yb-sample-apps) to perform this benchmark. + +Download the YugabyteDB workload generator JAR file (`yb-sample-apps.jar`) as follows: + +{{% yb-sample-apps-path %}} + +To run the workload generator tool, you must have: + +- Java runtime or JDK installed. +- Set the environment variable $ENDPOINTS to the IP addresses (including hosts and ports) for the nodes of the cluster. + +```output +ENDPOINTS="X.X.X.X:9042,X.X.X.X:9042,X.X.X.X:9042" +``` + +## Run the write-heavy key-value workload + +Run the key-value workload with higher number of write threads (representing write-heavy workload). + +Load 1B keys of 256 bytes each across 256 writer threads + +```sh +$ java -jar ./yb-sample-apps.jar \ + --workload CassandraKeyValue \ + --nodes $ENDPOINTS \ + --nouuid \ + --value_size 256 \ + --num_threads_read 0 \ + --num_threads_write 256 \ + --num_unique_keys 1000000000 +``` + +### Expected results + +Name | Observation +--------|------ +Write Ops/sec | ~90k +Write Latency | ~2.5-3.0 ms/op +CPU (User + Sys) | 60% + +## Run the read-heavy key-value workload + +Run the key-value workload with higher number of read threads (representing read-heavy workload). + +Load 1M keys of 256 bytes and access them with 256 reader threads. + +```sh +$ java -jar ./yb-sample-apps.jar \ + --workload CassandraKeyValue \ + --nodes $ENDPOINTS \ + --nouuid \ + --value_size 256 \ + --num_threads_read 256 \ + --num_threads_write 0 \ + --num_unique_keys 1000000 +``` + +### Expected results + +| Name | Observation | +| :--- | :---------- | +| (Read) Ops/sec | ~150k | +| (Read) Latency | ~1.66 ms/op | +| CPU (User + Sys) | 60% | + +## Batch write-heavy KV workload + +Run the key-value workload in batch mode and higher number of write threads (representing batched, write-heavy workload). + +Load 1B keys of 256 bytes each across 64 writer threads in batches of 25 each. + +```sh +$ java -jar ./yb-sample-apps.jar \ + --workload CassandraBatchKeyValue \ + --nodes $ENDPOINTS \ + --nouuid \ + --batch_size 25 \ + --value_size 256 \ + --num_threads_read 0 \ + --num_threads_write 64 \ + --num_unique_keys 1000000000 +``` + +### Expected results + +| Name | Observation | +| :--- | :---------- | +| (Batch Write) Ops/sec | ~140k | +| (Batch Write) Latency | ~9.0 ms/op | +| CPU (User + Sys) | 80% | diff --git a/docs/content/v2.25/benchmark/key-value-workload-ysql.md b/docs/content/v2.25/benchmark/key-value-workload-ysql.md new file mode 100644 index 000000000000..83de97263bda --- /dev/null +++ b/docs/content/v2.25/benchmark/key-value-workload-ysql.md @@ -0,0 +1,17 @@ +--- +title: Benchmark YSQL performance with key-value workloads +headerTitle: Key-value workload +linkTitle: Key-value workload +description: Benchmark YSQL performance with key-value workloads. +headcontent: Benchmark YSQL performance with key-value workloads +menu: + preview: + identifier: key-value-workload-2-ysql + parent: benchmark + weight: 6 +type: docs +private: true +--- + + +{{}} diff --git a/docs/content/v2.25/benchmark/large-datasets-ycql.md b/docs/content/v2.25/benchmark/large-datasets-ycql.md new file mode 100644 index 000000000000..a7360bde69ab --- /dev/null +++ b/docs/content/v2.25/benchmark/large-datasets-ycql.md @@ -0,0 +1,128 @@ +--- +title: Benchmark YCQL performance with large datasets +headerTitle: Large datasets +linkTitle: Large datasets +description: Benchmark YCQL performance with large datasets +headcontent: Benchmark YCQL performance with large datasets +menu: + preview: + identifier: large-datasets-1-ycql + parent: benchmark + weight: 7 +aliases: + - /benchmark/large-datasets/ +type: docs +--- + +{{}} + +Use this benchmark to understand the performance, failure, and scaling characteristics of YugabyteDB with a massive dataset (multiple TB per node). In order to accomplish that, you will do the following: + +- Load 30 billion key-value records +- Each write operation inserts a single record +- Perform a read-heavy workload that does *random reads* in the presence of some writes +- Perform a read-heavy workload that does *reads of a subset of data* in the presence of some writes + +Each record is a key-value record of size almost 300 bytes. + +- **Key size:** 50 Bytes +- **Value size:** 256 Bytes (chosen to be not very compressible) + +## Recommended configuration + +Note that the load tester was run from a separate machine in the same availability zone (AZ). + +### Machine types + +A machine in the Amazon Web Services (AWS) cloud with the following specifications was chosen: **32-vcpus, 240 GB RAM, 4 x 1.9TB nvme SSD**. + +- **Cloud:** AWS +- **Node type:** i3.8xlarge + +### Cluster creation + +Create a standard four-node cluster, with replication factor (RF) of `3`. Pass the following option to the YugabyteDB processes. + +```sh +--yb_num_shards_per_tserver=20 +``` + +The `yb_num_shards_per_tserver` was set to `20` (default value is `8`). This is done because the `i3.8xlarge` nodes have four disks. In future, YugabyteDB will automatically pick better defaults for nodes with multiple disks. + +Create the `YCQL_ADDRS` environment variable using the `export` command: + +```sh +$ export YCQL_ADDRS=":9042,:9042,:9042,:9042" +``` + +## Initial load phase + +The data was loaded at a steady rate over about 4 days using the `CassandraKeyValue` workload. To load the data, run the following command: + +```sh +$ java -jar yb-sample-apps.jar \ + --workload CassandraKeyValue \ + --nouuid --nodes $YCQL_ADDRS \ + --value_size 256 \ + --num_unique_keys 30000000000 \ + --num_writes 30000000000 \ + --num_threads_write 256 \ + --num_threads_read 1 +``` + +### Write IOPS + +You should see a steady 85,000 inserts per second with write latencies of around 2.5 milliseconds. This is shown graphically below. + +![Load Phase Results](/images/benchmark/bench-large-dataset-inserts-1.png) + +### Data set size growth rate + +The graph below shows the steady growth in `SSTables` size at a node from `Sep 4` to `Sep 7` beyond which it stabilizes at `6.5 TB`. + +![Load Phase Results](/images/benchmark/bench-large-dataset-inserts-2.png) + +## Final data set size + +The following illustration is from the yb-master Admin UI that shows the tablet servers, number of tablets on each, number of tablet leaders, and size of the on-disk `SSTable` files. + +{{< note title="Note" >}} + +The uncompressed dataset size per node is 8 TB, while the compressed size is 6.5 TB. This is because the load generator generates random bytes, which are not very compressible. + +Real world workloads generally have much more compressible data. + +{{< /note >}} + +![Load Phase Results](/images/benchmark/bench-large-dataset-inserts-3.png) + +## Expected results + +The results you see should be similar to the observations shown below. + +### Load phase results + +| Name | Observation | +| :--- | :---------- | +| Records inserted | 30 Billion | +| Size of each record | ~ 300 bytes | +| Time taken to insert data | 4.4 days | +| Sustained insert Rate | 85K inserts/second | +| Final dataset in cluster | 26TB across 4 nodes | +| Final dataset size per node | 6.5TB / node | + +### Read-heavy workload results + +| Name | Observation | +| :--- | :---------- | +| Random-data read heavy workload | 185K reads/sec and 1K writes/sec | +| Recent-data read heavy Workload | 385K reads/sec and 6.5K writes/sec | + +### Cluster expansion and induced failures + +- Expanded from four to five nodes in about eight hours + - Deliberately rate limited at `200 MB/sec` +- New node takes traffic as soon the first tablet arrives + - Pressure relieved from old nodes very quickly +- Induced one node failure in five-node cluster + - Cluster rebalanced in `2 hrs 10 minutes` diff --git a/docs/content/v2.25/benchmark/large-datasets-ysql.md b/docs/content/v2.25/benchmark/large-datasets-ysql.md new file mode 100644 index 000000000000..680709dc8e4e --- /dev/null +++ b/docs/content/v2.25/benchmark/large-datasets-ysql.md @@ -0,0 +1,17 @@ +--- +title: Benchmark YSQL performance with large datasets +headerTitle: Large datasets +linkTitle: Large datasets +description: Benchmark YSQL performance with large datasets +headcontent: Benchmark YSQL performance with large datasets +menu: + preview: + identifier: large-datasets-2-ysql + parent: benchmark + weight: 7 +type: docs +private: true +--- + + +{{}} diff --git a/docs/content/v2.25/benchmark/resilience/_index.md b/docs/content/v2.25/benchmark/resilience/_index.md new file mode 100644 index 000000000000..749875801389 --- /dev/null +++ b/docs/content/v2.25/benchmark/resilience/_index.md @@ -0,0 +1,39 @@ +--- +title: Benchmark resilience (fault tolerance) +headerTitle: Resilience +linkTitle: Resilience +description: Benchmark YugabyteDB's ability to withstand component failure. +headcontent: Benchmarking the ability to withstand component failure +menu: + preview: + identifier: resilience + parent: benchmark + weight: 21 +type: indexpage +--- + +Resiliency refers to the ability of a system to withstand and recover from failures or disruptions, whether they are caused by software bugs, hardware issues, network problems, or external events. A resilient system is designed to absorb the impact of failures and continue operating, even if at a degraded level, without experiencing a complete outage. + +In YugabyteDB, resiliency is achieved through various techniques, including the following: + +- **Fault tolerance**. Replicating tablets on to multiple nodes with one acting as a leader and others as followers. If the leader fails, a new leader is automatically elected, ensuring continuous availability. This replication and fault-tolerant architecture allows the database to withstand the failure of individual nodes, or even entire datacenters without losing data or service availability. +- **Consistency guarantees**. Raft-based consensus ensures full ACID (Atomicity, Consistency, Isolation, Durability) transactions, even across multiple tablets and datacenters. This consistency model helps maintain data integrity and coherence, even in the face of failures or network partitions. +- **Self healing**. YugabyteDB automatically detects and recovers from failures, such as node crashes, disk failures, or network partitions. It can automatically repair and rebalance the cluster by re-replicating data and redistributing tablet leaders to maintain optimal performance and resilience. +- **Elasticity**. YugabyteDB can dynamically adjust the number of replicas and the distribution of data across the cluster, ensuring that the system can handle changes in load and resource requirements. This scalability and elasticity help maintain the overall resilience and availability of the database, even as the workload and infrastructure requirements change over time. +- **Backup and disaster recovery (DR)**. YugabyteDB provides built-in backup and DR capabilities, allowing you to create consistent snapshots of the data and restore it in the event of a major failure or disaster. These backup and DR features help ensure the long-term resilience and recoverability of the database, even in the face of large-scale failures or catastrophic events. + +## Jepsen test + +[Jepsen](https://jepsen.io/) testing is a methodology and set of tools used to rigorously test the fault tolerance and correctness of distributed systems, particularly databases and other data storage systems. Jepsen deliberately injects faults into the system, such as network partitions, process crashes, disk failures, and other types of failures. + +Jepsen employs a rigorous verification process, which includes generating complex, realistic workloads, carefully monitoring the system's behavior, and analyzing the results to identify any inconsistencies or violations of the specified properties. + +YugabyteDB passes 99.9% of the Jepsen tests. + +{{}} +For more details, see [Jepsen test results](jepsen-testing/). +{{}} + +## Learn more + +- [Resiliency, high availability, and fault tolerance](../../explore/fault-tolerance/) diff --git a/docs/content/v2.25/benchmark/resilience/jepsen-testing.md b/docs/content/v2.25/benchmark/resilience/jepsen-testing.md new file mode 100644 index 000000000000..384d95edf24a --- /dev/null +++ b/docs/content/v2.25/benchmark/resilience/jepsen-testing.md @@ -0,0 +1,160 @@ +--- +title: Jepsen testing +headerTitle: Jepsen testing +linkTitle: Jepsen testing +description: Description of Jepsen testing. +menu: + preview: + identifier: jepsen-testing-1 + parent: resilience + weight: 13 +aliases: + - /preview/benchmark/resilience/jepsen-testing-ysql/ + - /preview/benchmark/resilience/jepsen-testing-ycql/ +type: docs +--- + +This page describes Jepsen tests that YugabyteDB runs daily for each major release currently available. + +## Scenarios + +### Bank + +This test simulates money transfer between accounts; it uses a table T with schema `(id int PRIMARY KEY, balance bigint)`. The workload performs transactional transfers between accounts - `(UPDATE Tbalance = balance - x WHERE id = ?; UPDATE T SET balance = balance + x WHERE id = ?;)`as well as reads of the whole table. The grand total is expected to remain the same, both in short and long term. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-1-bank.png) + +```plpgsql +BEGIN TXN; +UPDATE T SET balance = balance + x WHERE id = ?; +UPDATE T SET balance = balance - x WHERE id = ?; +COMMIT; +``` + +### Bank-contention (YSQL) + +In addition to UPDATE transactions, there are also INSERT or INSERT/DELETE transactions, all operating under the assumption that the overall `SUM(balance)` must remain consistent. To simplify Jepsen's scenario, the test uses 5 keys for UPDATEs and 5 for random INSERTs and DELETEs. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-2-bank-contention.png) + +```plpgsql +-- updates +BEGIN TXN; +UPDATE T SET balance = balance + x WHERE id = ?; +UPDATE T SET balance = balance - x WHERE id = ?; +COMMIT; + +-- inserts +BEGIN TXN; +INSERT INTO T (id, balance) VALUES (?, ?); +UPDATE T SET balance = balance - x WHERE id = ?; +COMMIT; + +-- deletes +BEGIN TXN; +SELECT balance FROM T WHERE id = ?; +UPDATE T SET balance = balance + x WHERE id = ?; +DELETE FROM T WHERE id = ?; +COMMIT; +``` + +### Counter + +This test uses a table T with schema `(id int PRIMARY KEY, count int)` with a single row, with the workload consisting of concurrent increments `(UPDATE T SET count = count + ? WHERE id = 0)` and reads. At any given time the value column is expected to be not more than the number of increments issued, and not less than the number of increments succeeded. + +The test utilizes the int column type for YSQL and counter type for YCQL. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-3-counter.png) + +### Set + +This test uses a table T with schema `(id int PRIMARY KEY, val int, grp int)`. Concurrently, values are inserted into this table while simultaneously reading the entire table. After an insert operation succeeds, or when an element is first observed in a read operation (whichever occurs first), all subsequent reads are expected to include that inserted element. + +For YCQL, the table also includes a count column, and the workload permits duplicate inserts. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-4-set.png) + +```plpgsql +INSERT INTO T (id, val, grp) VALUES (?, ?, ?); +SELECT val FROM T where id = 0; +``` + +### Long fork + +The long-fork test uses a table T with schema `(key int PRIMARY KEY, key2 int, val int)`, where individual workers execute either single-row inserts or perform multi-row reads. The expectation is that the read results are serializable. This means that for two write operations, W1 and W2, it should not be possible for a read operation R1 to observe write W1 but not W2, while another read operation R2 observes W2 but not W1. + +For more information, see [Jespen tests documentation](https://jepsen-io.github.io/jepsen/jepsen.tests.long-fork.html). + +![Load Phase Results](/images/benchmark/jepsen/jepsen-5-long-fork.png) + +### Default value + +This scenario entails concurrent Data Definition Language (DDL) and Data Manipulation Language (DML) operations, simulating a migration scenario. Typically, `DEFAULT 0` column is added to ensure that it is actually zero and not null when performing inserts and reads. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-6-default-value.png) + +### Single Key ACID + +The test uses a table T with schema `(id int PRIMARY KEY, val int)` with a fixed number of rows, each row having several worker threads assigned to it. Each worker can either read the row, update the row, or perform compare-and-set `UPDATE T SET val = ? WHERE id = ? AND val = ?`; worker groups for different rows are completely independent of each other. Checker makes sure that the resulting operations history is linearizable, that is, reads observe previous writes and writes don't disappear. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-7-single-key-acid.png) + +### Multi Key ACID + +This test is similar to the single-key ACID test, but uses a composite primary key on table T with schema `(k1 int, k2 int, val int, PRIMARY KEY (k1, k2))` and UPSERTs (for YSQL it is `INSERT .. ON CONFLICT DO UPDATE SET`) instead of compare-and-set. + +```plpgsql +INSERT INTO T VALUES (k1, k2, val) ON CONFLICT DO UPDATE SET val = ?; +SELECT k1, value FROM T where k2 = ? and k1 = ?; +``` + +### Append + +In addition to a usual integer primary key, the table schema for this test uses a few text columns that hold comma-separated integers. Workers perform small transactions - a mix of concatenated updates like `(UPDATE T SET txt = CONCAT(txt, ',', ?) WHERE id = ?)` and single-row reads. It then verifies the history, looking for various serializable isolation anomalies (these are complex and are abbreviated as G0, G1, and G2; see [Generalized Isolation Level Definitions](http://pmg.csail.mit.edu/papers/icde00.pdf)). + +Unlike other tests, this one uses several identical tables rather than just one. For more information about this test, see [CMU Quarantine Tech Talks: Black-box Isolation Checking with Elle](https://www.youtube.com/watch?v=OPJ_IcdSqig) (Kyle Kingsbury, Jepsen.io). + +The tests utilize geo-partitioning, different row-level locking modes, and all isolation types currently supported in YugabyteDB. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-8-append.png) + +## Nemesis + +During the daily testing, an [LXC configuration](https://linuxcontainers.org/lxc/introduction/) is used with a 5-node setup. The original nemesis list is not significantly expanded, except for the inclusion of clock skew by default during the initialization of yb-tserver/yb-master processes. VM nemeses (AWS VM restart/start/stop, volume detach) and some stress OS level nemeses (using stress-ng, network slowdown) are covered in YugabyteDB stress framework. + +The complete nemeses list includes the following: + +### Software clock skew + +YugabyteDB has special flag options that can utilize software clock skew. This nemesis assigns random clock skew values to each yb-tserver or yb-master process in scenarios where process restart is used. + +### Restart yb-master or yb-tserver process + +Nemesis uses the `ps` utility to identify yb-master or yb-tserver processes, and then use the `kill -9` command to forcefully stop and restart them. + +### Pause yb-master or yb-tserver process + +Instead of killing the process, a STOP signal is sent to it, which may lead to tricky behavior between node interactions using the same `kill` utility as before. + +### Network partitioning + +This nemesis uses the `iptables` utility to drop connectivity between testing nodes. + +```shell +# drop connection from current node to 10.20.30.40,10.20,30.41 +iptables -A INPUT -s 10.20.30.40,10.20,30.41 -j DROP -w + +# heal everything +iptables -F -w +iptables -X -w +``` + +### Combination of Kill, Partition, and Pause Nemeses + +The following illustration describes an example of how nemeses work in combination during the test. Notice that there is some space for no nemesis so the test can execute and achieve some successful operations (txn ok). The test fails (txn fail) if an additional check shows that the number of write or read operations is zero. + +![Load Phase Results](/images/benchmark/jepsen/jepsen-9-nemesis-combine.png) + +## Known issues + +All latest information about issues related to Jepsen are tracked in our [GitHub issues](https://github.com/yugabyte/yugabyte-db/issues/10052). diff --git a/docs/content/v2.25/benchmark/scalability/_index.md b/docs/content/v2.25/benchmark/scalability/_index.md new file mode 100644 index 000000000000..5d6728f8e7a0 --- /dev/null +++ b/docs/content/v2.25/benchmark/scalability/_index.md @@ -0,0 +1,28 @@ +--- +title: Benchmark scalability +headerTitle: Scalability +linkTitle: Scalability +description: Benchmark scalability of queries and datasets in YugabyteDB. +headcontent: Benchmark adjusting capacity to meet demand +menu: + preview: + identifier: scalability + parent: benchmark + weight: 20 +type: indexpage +--- + diff --git a/docs/content/v2.25/benchmark/scalability/scaling-queries-ycql.md b/docs/content/v2.25/benchmark/scalability/scaling-queries-ycql.md new file mode 100644 index 000000000000..435d6ca4b1e1 --- /dev/null +++ b/docs/content/v2.25/benchmark/scalability/scaling-queries-ycql.md @@ -0,0 +1,90 @@ +--- +title: Benchmark scaling YCQL queries +headerTitle: Scaling YCQL queries +linkTitle: Scaling queries +description: Benchmark scaling YCQL queries in YugabyteDB. +menu: + preview: + identifier: scaling-queries-2-ycql + parent: scalability + weight: 11 +type: docs +--- + +{{}} + +As a part of our efforts to push the limits of the systems you build, Yugabyte ran some large cluster benchmarks to scale YugabyteDB to million of reads and writes per second while retaining low latencies. This topic covers the details about our 50-node cluster benchmarks. [Results of the earlier benchmark tests performed on a 25-node cluster](https://forum.yugabyte.com/t/large-cluster-perf-1-25-nodes/58) are available in the Yugabyte Community forum. + +![YCQL key-value workload](/images/benchmark/scalability/key-value-workload-ycql.png) +Writes are RF of `3` with strong consistency, reads are leader-only data strongly consistent reads. + +The graph above shows how you can achieve linear scalability with YugabyteDB. The read and write throughput doubles when the cluster size doubles from 25 to 50 nodes, while the latencies remain low in the order of couple milliseconds. + +This test was performed in [Google Cloud Platform (GCP)](https://cloud.google.com/gcp/). Since YugabyteDB is a cloud-native database, it can deliver similar performance results on other public clouds and on-premises data centers. + +The sections below cover the experimental setup and the details of the read and write performance metrics. + +## Benchmark setup + +- 50 compute instances in Google Cloud Platform +- Each instance is a `n1-standard-16` [N1 standard machine type](https://cloud.google.com/compute/docs/machine-types#n1_standard_machine_types) with: + - 16 virtual CPUs + - Intel® Xeon® CPU @ 2.20GHz + - 60 GB RAM + - 2 x 375 GB direct attached SSD +- Replication factor (RF) = `3` +- YugabyteDB version: `0.9.1.0`. All configuration flags are default on the YugabyteDB nodes. + +The workload was generated using a multi-threaded Cassandra key-value sample application that was run from `n1-highcpu-32` machines. The key and value sizes used were 40 and 16 bytes, respectively. + +### Reads + +YugabyteDB performs strongly consistent reads by default. For details, see [Read IO path (single shard)](../../../explore/linear-scalability/scaling-reads/). Below is the summary of the performance metrics observed during a 100% read workload: + +- **2.6& million read operations per second**, sum across the YugabyteDB nodes. +- **0.2 millisecond average latency** per read on the server side. +- **65% CPU usage**, averaged across the YugabyteDB nodes. + +#### 50-node cluster - read IOPS and latency across the nodes + +The graphs below were captured for one hour of the run. The operations per second is the sum across all the nodes while the latency is the average. Note that the throughput and latency metrics are very steady over the entire time window. + +![Total YCQL operations per second and YCQL operations latency](/images/benchmark/scalability/total-cql-ops-per-sec-reads.png) + +#### 50-node cluster - CPU and memory during the read benchmark + +The two graphs below show the corresponding CPU and memory (RAM) usage during that time interval. + +![CPU and memory usage](/images/benchmark/scalability/cpu-usage-reads-ycql.png) + +### Writes + +YugabyteDB performs strongly consistent writes, with a replication factor (RF) of `3` in this case. Here is detailed information of the write IO path in our docs. Below is the summary of the performance metrics observed during a 100% write workload: + +- **1.2 million write operations per second**, sum across the YugabyteDB nodes. +- 3.1 millisecond average latency per write operation on the server side. +- 75% CPU usage on average across the YugabyteDB nodes. + +The graphs below are for twelve hours of the run. Note that this is a much longer time interval than the read benchmark because performance issues in writes often show up after a while of running when latency spikes due to background flushes and compaction start to show up. + +#### 50-node cluster — write IOPS and latency across the nodes + +The two graphs below are the corresponding CPU and RAM usage for those twelve hours, and are the average across all the YugabyteDB nodes. + +![Total YCQL operations per second and YCQL operation latency](/images/benchmark/scalability/total-cql-ops-per-sec-writes-ycql.png) + +#### 50-node cluster — CPU and memory during the write benchmark + +Note that these writes are the logical writes that the application issued. Each write is replicated three times internally by the database using the [Raft consensus protocol](https://raft.github.io/) based on the replication factor (RF) of `3`. + +![CPU usage](/images/benchmark/scalability/cpu-usage-writes-ycql.png) + +## Next steps + +You can visit the [YugabyteDB workload generator](https://github.com/yugabyte/yb-sample-apps) GitHub repository to try out more experiments on your own local setups. After you set up a cluster and test your favorite application, share your feedback and suggestions with other users on the [YugabyteDB Community Slack]({{}}). + +## Learn more + +- [YugabyteDB architecture](../../../architecture/) +- [Scaling reads](../../../explore/linear-scalability/scaling-reads) +- [Scaling writes](../../../explore/linear-scalability/scaling-writes) \ No newline at end of file diff --git a/docs/content/v2.25/benchmark/scalability/scaling-queries-ysql.md b/docs/content/v2.25/benchmark/scalability/scaling-queries-ysql.md new file mode 100644 index 000000000000..ce7c884eee72 --- /dev/null +++ b/docs/content/v2.25/benchmark/scalability/scaling-queries-ysql.md @@ -0,0 +1,150 @@ +--- +title: Benchmark scaling YSQL queries +headerTitle: Scaling YSQL queries +linkTitle: Scaling queries +description: Benchmark scaling YSQL queries in YugabyteDB +menu: + preview: + identifier: scaling-queries-1-ysql + parent: scalability + weight: 11 +type: docs +--- + +{{}} + +There are a number of well-known experiments where eventually-consistent NoSQL databases were scaled out to perform millions of inserts and queries. Here, you do the same using YSQL, the Yugabyte SQL API that is PostgreSQL-compatible, strongly-consistent, and supports distributed SQL. We created a 100-node YugabyteDB cluster, ran single-row INSERT and SELECT workloads with high concurrency – each for an hour and measured the sustained performance (throughput and latency). This topic details the results of this experiment as well as highlights the key aspects of the YugabyteDB architecture that makes it fit for such high-volume ingest workloads. Although this topic describes the results of benchmark tests performed by Yugabyte, you can try the following steps to perform your own benchmarks on the scalability of queries in your YugabyteDB clusters. + +## Database cluster setup + +While YugabyteDB can be deployed across multiple availability zones or regions, this benchmark focused on the aggregate performance of a 100-node cluster. Therefore, all 100 nodes were deployed on the Amazon Web Services (AWS) cloud in the US West (Oregon) region (`us-west-2`) and in a single availability zone (`us-west-2a`). Each of the instances were of type `c5.4xlarge` (16 vCPUs) and is summarized as follows: + +- **Cluster name:** MillionOps +- **Cloud:** Amazon Web Services +- **Region:** Oregon (`us-west-2`) +- **Zone:** `us-west-2a` +- **Number of nodes:** 100 +- **Instance type:** [`c5.4xlarge`](https://aws.amazon.com/ec2/instance-types/c5/) (16 vCPUs) +- **Disk on each node:** 1TB EBS SSD (`gp2`) +- **Replication Factor (RF):** `3` +- **Consistency level:** Strong consistency for both writes and reads + +## Benchmark setup + +The benchmark application was an open-source Java program. The application's database workload does multi-threaded, single-row `INSERT` and `SELECT` statements against a table that has a key and a value column. The size of each row was 64 bytes. The insert and select benchmarks were run for one hour each in order to measure the sustained throughput and latency. + +This benchmark application was run on six instances of eight cores each. Note that we could not consolidate into a fewer number of more powerful instances as we were hitting the maximum network bandwidth on the network instances. Each of these benchmark instances were prepared as follows. + +Java 8 was installed using the following commands. + +```sh +$ sudo apt update +$ sudo apt install default-jre +``` + +The [YugabyteDB workload generator](https://github.com/yugabyte/yb-sample-apps) was downloaded on to these machines as follows: + +{{% yb-sample-apps-path %}} + +This benchmark program can take a list of servers in the database cluster, and then perform random operations across these servers. In order to do this, we set up an environment variable with the list of comma-separated `host:port` entries of the 100 database servers as follows: + +```sh +$ export YSQL_NODES=node-1-ip-addr:5433,node-2-ip-addr:5433,... +``` + +## Benchmarking the INSERT workload + +The first step was to run an INSERT benchmark (using the `SqlInserts` workload generator) on this 100-node cluster. The following command was run on each of the benchmark instances. + +```java +java -jar ~/yb-sample-apps-no-table-drop.jar \ + --workload SqlInserts \ + --nodes $YSQL_NODES \ + --num_unique_keys 5000000000 \ + --num_threads_write 400 \ + --num_threads_read 0 \ + --uuid 00000000-0000-0000-0000-00000000000n +``` + +The table on which the benchmark was run had the following basic schema. + +```plpgsql +CREATE TABLE table_name (k varchar PRIMARY KEY, v varchar); +``` + +This workload performed a number of INSERTs using prepared statements, as follows: + +```plpgsql +INSERT INTO table_name (k, v) VALUES (?, ?); +``` + +Note a few points about the benchmark setup. + +- **Each benchmark program writes unique set of rows.** The `uuid` parameter forms a prefix of the row key. It is set differently (by varying the value of `n` from `1` to `6`) on each benchmark instance to ensure it writes separate keys. + +- **A total of 30 billion unique rows will be inserted upon completion.** Each benchmark program proceeds to write out 5 billion keys, and there are six such programs running in parallel. + +- **There are 2400 concurrent clients performing inserts.** Each benchmark program uses 400 write threads, and there are six such programs running concurrently. + +The following illustration describes the write throughput on this cluster while the benchmark was in progress. The write throughput was 1.26 million inserts per second. + +![Total YSQL operations per second](/images/benchmark/scalability/total-ysql-ops-per-sec.png) + +The corresponding average insert latency across all the 100 nodes was 1.66 milliseconds (ms), as shown in the following illustration. Note that each insert is replicated three-ways to make the cluster fault tolerant. + +![YSQL operations latency](/images/benchmark/scalability/cpu-usage-2.png) + +The average CPU usage across the nodes in the cluster was about 78%, as shown in the following graph: + +![CPU usage](/images/benchmark/scalability/cpu-usage.png) + +## Benchmarking the SELECT workload + +The following command was run for the SELECT workload. + +```java +java -jar ~/yb-sample-apps-no-table-drop.jar \ + --workload SqlInserts \ + --nodes $YSQL_NODES \ + --max_written_key 500000000 \ + --num_writes 0 \ + --num_reads 50000000000 \ + --num_threads_write 0 \ + --num_threads_read 400 \ + --read_only \ + --uuid 00000000-0000-0000-0000-00000000000n +``` + +The SELECT workload looks up random rows on the table that the INSERT workload (described in the previous section) populated. Each SELECT query is performed using prepared statements, as follows: + +```plpgsql +SELECT * FROM table_name WHERE k=?; +``` + +There are 2,400 concurrent clients issuing SELECT statements. Each benchmark program uses 400 threads, and there are six programs running in parallel. Each read operation randomly selects one row from a total of 3 billion rows. Each benchmark program randomly queries one row from a total of 500 million rows, and there are six concurrent programs. + +![Total YSQL operations per second](/images/benchmark/scalability/total-ysql-ops-per-sec.png) + +The following illustration describes the read throughput on this cluster while the benchmark was in progress. The read throughput was 2.8 million selects per second. YugabyteDB reads are strongly consistent by default and that is the setting used for this benchmark. Additional throughput can be achieved by allowing timeline-consistent reads from follower replicas (see [Architecture for horizontal write scaling](#architecture-for-horizontal-write-scaling)). + +![CPU usage](/images/benchmark/scalability/total-ysql-ops-per-sec-2.png) + +The corresponding average select latency across all the 100 nodes was 0.56ms, as follows: + +![CPU usage](/images/benchmark/scalability/ysql-op-latency-2.png) + +The average CPU usage across the nodes in the cluster was about 64%, as shown in the following graph. + +![CPU usage](/images/benchmark/scalability/cpu-usage-2.png) + +## Architecture for horizontal write scaling + +The following illustration describes the architecture of a YugabyteDB cluster. The YB-TServer service is responsible for managing the data in the cluster while the YB-Master service manages the system configuration of the cluster. YB-TServer automatically shards every table into a number of shards (also known as tablets). Given the replication factor (RF) of `3` for the cluster, each tablet is represented as a Raft group of three replicas with one replica considered the leader and other two replicas considered as followers. In a 100-node cluster, each of these three replicas are automatically stored on exactly three (out of 100) different nodes where each node can be thought of as representing an independent fault domain. YB-Master automatically balances the total number of leader and follower replicas on all the nodes so that no single node becomes a bottleneck and every node contributes its fair share to incoming client requests. The end result is strong write consistency (by ensuring writes are committed at a majority of replicas) and tunable read consistency (by serving strong reads from leaders and timeline-consistent reads from followers), irrespective of the number of nodes in the cluster. + +![CPU usage](/images/benchmark/scalability/yugabytedb-cluster.png) + +To those new to the Raft consensus protocol, a basic explanation is that it is a protocol with which a cluster of nodes can agree on values. Currently, it is arguably the most popular distributed consensus protocol. Business-critical cloud-native systems like `etcd` (the configuration store for Kubernetes) and `consul` (HashiCorp's popular service discovery solution) are built on Raft as a foundation. YugabyteDB uses Raft for both leader election as well as the actual data replication. The benefits of YugabyteDB's use of Raft including rapid scaling (with fully-automatic rebalancing) are highlighted in the Yugabyte blog on ['How Does the Raft Consensus-Based Replication Protocol Work in YugabyteDB?'](https://www.yugabyte.com/blog/how-does-the-raft-consensus-based-replication-protocol-work-in-yugabyte-db/). Raft is tightly integrated with a high-performance document store (extended from RocksDB) to deliver on the promise of massive write scalability combined with strong consistency and low latency. + +## Next steps + +Refer to the [YugabyteDB workload generator](https://github.com/yugabyte/yb-sample-apps) GitHub repository to try out more experiments on your own local setups. After you set up a cluster and test an application of your choice, share your feedback and suggestions with other users on the [YugabyteDB Community Slack]({{}}). diff --git a/docs/content/v2.25/benchmark/sysbench-ysql.md b/docs/content/v2.25/benchmark/sysbench-ysql.md new file mode 100644 index 000000000000..6c253094dd2a --- /dev/null +++ b/docs/content/v2.25/benchmark/sysbench-ysql.md @@ -0,0 +1,230 @@ +--- +title: Benchmark YSQL performance using sysbench +headerTitle: sysbench +linkTitle: sysbench +description: Benchmark YSQL performance using sysbench. +headcontent: Benchmark YSQL performance using sysbench +menu: + preview: + identifier: sysbench-ysql + parent: benchmark + weight: 5 +aliases: + - /benchmark/sysbench/ +type: docs +--- + +sysbench is a popular tool for benchmarking databases like PostgreSQL and MySQL, as well as system capabilities like CPU, memory, and I/O. The [YugabyteDB version of sysbench](https://github.com/yugabyte/sysbench) is forked from the [official version](https://github.com/akopytov/sysbench) with a few modifications to better reflect YugabyteDB's distributed nature. + +## Running the benchmark + +### Prerequisites + +To ensure the recommended hardware requirements are met and the database is correctly configured before benchmarking, review the [deployment checklist](../../deploy/checklist/). + +Make sure you have the [YSQL shell](../../api/ysqlsh/) `ysqlsh` exported to the `PATH` variable. + +```sh +$ export PATH=$PATH:/path/to/ysqlsh +``` + +### Install sysbench + +Install sysbench on a machine which satisfies the Prerequisites using one of +the following options: + + + +
+
+ +Install sysbench using the following steps: + +```sh +$ cd $HOME +$ git clone https://github.com/yugabyte/sysbench.git +$ cd sysbench +$ ./autogen.sh && ./configure --with-pgsql && make -j && sudo make install +``` + +
+ +
+ +{{< note title="Note" >}} + +RHEL package is only for EL8 + +{{< /note >}} + +```sh +wget https://github.com/yugabyte/sysbench/releases/download/1.0.0-yb/sysbench-1.0.0-1.el8.x86_64.rpm + +sudo yum install -y sysbench-1.0.0-1.el8.x86_64.rpm +``` + +
+
+ +{{< note title="Note" >}} + +The MacOS package is only for Apple Silicon. + +{{< /note >}} + +```sh +brew install postgresql@14 wget + +wget https://github.com/yugabyte/sysbench/releases/download/1.0.0-yb/Sysbench.pkg + +sudo installer -pkg Sysbench.pkg -target / +``` + +
+ +
+ +This installs the sysbench utility in `/usr/local/bin`. + +### Start YugabyteDB + +Start your YugabyteDB cluster by following the steps in [Manual deployment](../../deploy/manual-deployment/). + +{{< tip title="Tip" >}} +You will need the IP addresses of the nodes in the cluster for the next step. +{{< /tip>}} + +### Run individual workloads + +You can choose to run the following workloads individually: + +* oltp_read_only +* oltp_read_write +* oltp_multi_insert +* oltp_update_index +* oltp_update_non_index +* oltp_delete + +Before starting the workload, load the data as follows: + +```sh +sysbench \ + --pgsql-host= \ + --tables=20 \ + --table_size=5000000 \ + --range_key_partitioning=false \ + --serial_cache_size=1000 \ + --create_secondary=true \ + --pgsql-db=yugabyte \ + --pgsql-user=yugabyte \ + --db-driver=pgsql \ + --pgsql-port=5433 \ + prepare + +``` + +Run a workload as follows: + +```sh +sysbench \ + --pgsql-host= \ + --tables=20 \ + --table_size=5000000 \ + --range_key_partitioning=false \ + --serial_cache_size=1000 \ + --create_secondary=true \ + --pgsql-db=yugabyte \ + --pgsql-user=yugabyte \ + --db-driver=pgsql \ + --pgsql-port=5433 \ + --time=1800 \ + --warmup-time=300 \ + --num_rows_in_insert=10 \ + --point_selects=10 \ + --index_updates=10 \ + --non_index_updates=10 \ + --range_selects=false \ + --thread-init-timeout=90 \ + --threads=60 \ + run + +``` + +## Expected results + +The following results are for a 3-node cluster running YBDB version {{< yb-version version="preview" format="short">}}, with each node running on a c5.2xlarge AWS instance (8 cores, 16 GiB of RAM), all in the same AZ, with a replication factor of 3 and TLS enabled. + +### 10 tables each with 100k rows + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
WorkloadBenchmark StatisticsPer Query StatisticsQueries executed in each transaction
Throughput (txns/sec)Latency (ms) - avgThroughput (queries/sec)Latency (ms) - avg
oltp_read_only4616.321346163.21.310 point selects
oltp_read_write245.4997.767855.683.0510 point selects
10 index updates
10 non-index update
1 Insert
1 Delete
oltp_multi_insert585.6640.985856.64.0910 Insert
oltp_update_index259.6492.432596.49.4310 index updates
+ +The _Queries executed in each transaction_ column shows the individual queries that are executed as part of each sysbench transaction, for each workload. These queries impact the overall transaction performance and are key to understanding the workload distribution for different sysbench benchmarks. diff --git a/docs/content/v2.25/benchmark/tpcc/_index.md b/docs/content/v2.25/benchmark/tpcc/_index.md new file mode 100644 index 000000000000..20ed8e5f09ba --- /dev/null +++ b/docs/content/v2.25/benchmark/tpcc/_index.md @@ -0,0 +1,66 @@ +--- +title: TPC-C Benchmark on YugabyteDB +headerTitle: TPC-C Benchmark on YugabyteDB +linkTitle: TPC-C +description: Benchmark YugabyteDB using TPC-C. +aliases: + - /benchmark/tpcc + - /benchmark/tpcc-ysql + - /preview/benchmark/tpcc-ysql/ +menu: + preview: + identifier: tpcc + parent: benchmark + weight: 4 +type: indexpage +--- + +[TPC-C](http://www.tpc.org/tpcc/) is a popular online transaction processing benchmark that provides metrics you can use to evaluate the performance of YugabyteDB for concurrent transactions of different types and complexity, and which are either executed online or queued for deferred execution. Developed by the Transaction Processing Performance Council (TPC), it simulates a complete computing environment where a population of users execute transactions against a database. + +{{}} +All benchmarks were run on a single-region YugabyteDB cluster running on {{}}, except 150K warehouses, which was run on [v2.11](/preview/releases/ybdb-releases/end-of-life/v2.11/). +{{}} + +## Running the benchmark + +Conducting an accurate TPC-C benchmark requires aligning your test environment with your production landscape. Begin by assessing your anticipated workload in terms of IOPS and projected data volume. These estimates will guide you in selecting an appropriate cluster configuration that closely mirrors your operational requirements. + +After you've identified a cluster specification that matches your needs, apply the TPC-C workload recommended for that particular setup. The goal is to validate that the cluster can sustain the expected transaction throughput—measured in tpmC with a high degree of efficiency, typically exceeding 99.5%. This high-efficiency rate ensures that the cluster meets the benchmark's demands with minimal resource overhead, indicating its readiness to handle your real-world, high-volume transactional workloads. + +{{}} +For information on cluster specification/workload and how to run the TPC-C against a local or a YugabyteDB Aeon cluster, see [Running TPC-C](running-tpcc/). +{{}} + +## Scale out + +YugabyteDB exhibits exemplary scalability under the TPC-C workload, demonstrating a linear growth in performance as the cluster expands. The accompanying graph illustrates this linear scalability, showing how YugabyteDB's transaction throughput—quantified in tpmC increases in direct proportion to the number of nodes added to the cluster. + +![Horizontal scaling](/images/benchmark/tpcc-horizontal.png) + +{{}} +To see how effectively YugabyteDB handles the TPC-C workload while scaling out, see [Testing horizontal scaling](horizontal-scaling/). +{{}} + +## High scale workloads + +YugabyteDB's robust performance in the TPC-C benchmark, particularly when scaled to a high number of warehouses, serves as a compelling testament to its prowess in handling high-volume transaction processing workloads. By excelling in this industry-standard test, which simulates complex, concurrent transactions across a vast, distributed dataset, YugabyteDB has effectively demonstrated its ability to manage the intense demands of large-scale OLTP environments. + +{{}} +To see how well YugabyteDB handles extremely high workloads, see [Testing high scale workloads](high-scale-workloads/). +{{}} + +## Max scale tested + +In our testing, YugabyteDB was able to process 1M tpmC with 150,000 warehouses at an efficiency of 99.8% on an RF3 cluster of 75 c5d.12xlarge machines with a total data size of 50TB. + +{{}} +The 150K warehouses benchmark was run on [v2.11](/preview/releases/ybdb-releases/end-of-life/v2.11/). +{{}} + +| Warehouses | TPMC | Efficiency(%) | Nodes | Connections | New Order Latency | Machine Type (vCPUs) | +| ---------: | :--- | :-----------: | :---: | ----------- | :---------------: | :--------------------- | +| 150,000 | 1M | 99.30 | 75 | 9000 | 123.33 ms | c5d.12xlarge (48) | + +{{}} +To know more about this accomplishment, see [Largest benchmark](./high-scale-workloads/#largest-benchmark). +{{}} diff --git a/docs/content/v2.25/benchmark/tpcc/high-scale-workloads.md b/docs/content/v2.25/benchmark/tpcc/high-scale-workloads.md new file mode 100644 index 000000000000..9342cee4c80a --- /dev/null +++ b/docs/content/v2.25/benchmark/tpcc/high-scale-workloads.md @@ -0,0 +1,303 @@ +--- +title: Testing high scale workloads of TPC-C benchmark +headerTitle: Testing high scale workloads of TPC-C benchmark +linkTitle: Testing high scale workloads +headcontent: Understand how YugabyteDB performs with high scale workloads +menu: + preview: + identifier: tpcc-high-scale + parent: tpcc + weight: 300 +type: docs +rightNav: + hideH3: true +--- + +Workloads in TPC-C are defined by the number of warehouses the benchmark run will simulate. We will explore how YugabyteDb performs as the number of warehouses is increased. + +## Get TPC-C binaries + +First, you need the benchmark binaries. To download the TPC-C binaries, run the following commands: + +```sh +$ wget https://github.com/yugabyte/tpcc/releases/latest/download/tpcc.tar.gz +$ tar -zxvf tpcc.tar.gz +$ cd tpcc +``` + +## Client machine + +The client machine is where the benchmark is run from. An 8vCPU machine with at least 16GB memory is recommended. The following instance types are recommended for the client machine. + +| vCPU | AWS | AZURE | GCP | +| ---- | ----------------------- | ---------------------------- | ------------------------- | +| 8 | {{}} | {{}} | {{}} | + +## Cluster setup + +The following cloud provider instance types are recommended for this test. + +| vCPU | AWS | AZURE | GCP | +| ---- | ------------------------ | ----------------------------- | --------------------------- | +| 2 | {{}} | {{}} | {{}} | +| 8 | {{}} | {{}} | {{}} | +| 16 | {{}} | {{}} | {{}} | + + +{{